In [1]:
import os

# Import required libraries
import pandas as pd
import psycopg2
import json
import requests
import dotenv

# Load environment variables (for API keys)
dotenv.load_dotenv()


True

## 4. Create a Job Vacancy Description

Create a detailed job description with all requirements.


In [68]:
import pandas as pd

vacancies = pd.read_excel("4 вакансии на 05.05.2025.xlsx")
vacancy_id = vacancies.iloc[2]["id"]
vacancy_text = str(vacancies.iloc[2].to_dict())
vacancy_text

"{'id': 6432, 'title': 'Fullstack engineer (Python/Angular)', 'description': '**Company Overview:**  \\r\\nJoin an international team at a company that has pioneered the records and information management industry. With over 225,000 businesses in our client base, including 95% of the Fortune 1000, we focus on building secure and scalable solutions to manage and protect corporate information.  \\r\\n  \\r\\n**Role Overview:**  \\r\\nWe are looking for a FullStack Engineer with expertise in Python and Angular to help solve business and technology challenges through our engineering and IT consulting services.   \\r\\n  \\r\\n**Tech Stack:**   \\r\\n\\\\- Python;  \\r\\n\\\\- Angular;  \\r\\n\\\\- JavaScript;  \\r\\n\\\\- SQL.  \\r\\n  \\r\\n**Responsibilities:**  \\r\\n\\\\- Develop and maintain scalable fullstack applications using Python and Angular.  \\r\\n\\\\- Optimize SQL queries and manage database interactions.  \\r\\n\\\\- Collaborate with cross-functional teams, including DevOps

## 5. Send Data to the API

Submit candidates for matching against the vacancy.


In [73]:

payload = {
    "vacancy_id": int(vacancy_id),
    "vacancy_text": vacancy_text,
}

# Send the data to the matching endpoint
response = requests.post(
    "http://93.127.132.57:8910/api/v1/matching/match_candidates_batch",
    json=payload
)

print(f"API Response: {response.text}")

# Get the batch job ID from the response
batch_id = json.loads(response.text)["batch_id"]
print(f"Job started with ID: {batch_id}")


API Response: {"batch_id":"batch_6820967867648190a06cd2c0f7d25932","status":"validating"}
Job started with ID: batch_6820967867648190a06cd2c0f7d25932


In [76]:
# Step 2: Check the job status
response = requests.get(f"http://93.127.132.57:8910/api/v1/matching/batch_job/{batch_id}")
print(f"Current status: {response.text}")

# Note: The job may take some time to complete


Current status: {"batch_id":"batch_6820967867648190a06cd2c0f7d25932","status":"in_progress"}


## 6. Process the Results

When complete, load and analyze the match results.


In [None]:
# Load and analyze the results
import pandas as pd
# Replace the filename with your actual results file
results = pd.read_json("data/candidate_scores_20250506_191347.json")

# Convert to a flat DataFrame for easier analysis
df_candidates = pd.json_normalize(results["candidates"])

# Show the top matches
df_candidates = df_candidates.sort_values(by="info.score", ascending=False)
print(f"Found {len(df_candidates)} matching candidates")
df_candidates


In [None]:
df_candidates.to_csv("data/candidates_NodeJS_06.05.2025.csv", index=False)

In [26]:
url = f"http://93.127.132.57:8911/querystring"

headers = {"Content-Type": "application/json"}
payload = [
       {
        "vacancy_id": "6436",
        "keywords": ["Node.js", "TypeScript", "AWS", "Node.js Developer"],
        "start":"0",
        "geo":""
    }
]
params = {"querystring": json.dumps(payload)}

try:
    response = requests.post(url, headers=headers, params=params)
except requests.exceptions.RequestException as exc:
    print(f"Network problem: {exc}", file=sys.stderr)
    sys.exit(1)

# Print status and body in a readable way
print("Status:", response.status_code)
print("Body:", response.json())


KeyboardInterrupt: 

In [19]:
response.json()["message"]

'Vacancies were handled'

In [67]:
from openai import OpenAI
import os
from enum import Enum
from typing import List
from pydantic import BaseModel, Field




class KeywordResponse(BaseModel):
    keywords: List[str]
    locations: List[str] = Field(
        default_factory=list,
        description="List of country codes for candidate search locations. These codes must be valid `LocationCode` enum values."
    )
    explanation: str = Field(
        description="Explanation of the keywords and locations extracted."
    )


sync_client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

response = sync_client.beta.chat.completions.parse(
    model="gpt-4o",
    messages=[
        {"role": "system", "content": "You are an expert keyword extractor for recruitment AI System. "},
        {"role": "user", "content": f"""
Extract the most important keywords from the vacancy description to search for candidates in Linkedin.
Focus on terms useful for searching a candidate database, limit the keywords to a maximum of 10.
The list of keywords should be diverse, cover all aspects of the vacancy, and enrich the search.

Also, provide a list of country codes for candidate search locations. These codes **must** be valid `LocationCode` enum values (e.g., UNITED_STATES, GERMANY, FRANCE are some examples of valid codes; refer to the `LocationCode` schema for all options). If the vacancy does not specify countries, return an empty list. Base your answer on the vacancy description.

When you see a country name in the vacancy you MUST convert it to the exact enum in LocationCode:
Poland -> POLAND, Germany -> GERMANY
If the whole region instead of a country is mentioned, list all countries inside of the region. For example, EU -> [FRANCE, BELGIUM, SPAIN, ENGLAND, GERMANY, ITALY, NETHERLANDS, POLAND, SWITZERLAND, SWEDEN, AUSTRIA, BULGARIA, CROATIA, CZECH_REPUBLIC, DENMARK, ESTONIA, FINLAND, GREECE, HUNGARY, ROMANIA, PORTUGAL, NORWAY, LITHUANIA, LUXEMBOURG, SLOVAKIA] and so on.
If no country is mentioned, return an empty list.
                    Vacancy Description:
                    ---
                    I need python developer in belarus, if not, in caucasi
                    ---
    """}
    ]
    ,
    response_format=KeywordResponse,
    temperature=0.0,
)
keywords = response.choices[0].message.parsed.keywords
locations = response.choices[0].message.parsed.locations
explanation = response.choices[0].message.parsed.explanation
print(f"Extracted keywords, location: {keywords} : {locations}; explanation: {explanation}")

Extracted keywords, location: ['Python Developer', 'Python', 'Software Development', 'Programming', 'Coding', 'Software Engineer', 'Backend Development', 'Scripting', 'Object-Oriented Programming', 'Software Design'] : ['BELARUS', 'GEORGIA', 'ARMENIA', 'AZERBAIJAN']; explanation: The vacancy is for a Python Developer, so 'Python Developer' and 'Python' are primary keywords. 'Software Development', 'Programming', 'Coding', 'Software Engineer', 'Backend Development', 'Scripting', 'Object-Oriented Programming', and 'Software Design' are included to cover various aspects of the role and skills required. The location specified is Belarus, and the Caucasus region, which includes Georgia, Armenia, and Azerbaijan, is also mentioned, so these are included in the location codes.


In [52]:
vacancy_text

"{'id': 6432, 'title': 'Fullstack engineer (Python/Angular)', 'description': '**Company Overview:**  \\r\\nJoin an international team at a company that has pioneered the records and information management industry. With over 225,000 businesses in our client base, including 95% of the Fortune 1000, we focus on building secure and scalable solutions to manage and protect corporate information.  \\r\\n  \\r\\n**Role Overview:**  \\r\\nWe are looking for a FullStack Engineer with expertise in Python and Angular to help solve business and technology challenges through our engineering and IT consulting services.   \\r\\n  \\r\\n**Tech Stack:**   \\r\\n\\\\- Python;  \\r\\n\\\\- Angular;  \\r\\n\\\\- JavaScript;  \\r\\n\\\\- SQL.  \\r\\n  \\r\\n**Responsibilities:**  \\r\\n\\\\- Develop and maintain scalable fullstack applications using Python and Angular.  \\r\\n\\\\- Optimize SQL queries and manage database interactions.  \\r\\n\\\\- Collaborate with cross-functional teams, including DevOps