In [None]:
import requests
import pandas as pd
import json
import openai
import glob

In [None]:
#Initialisation
# API credentials
client_id = 'input your HH.ru client id'
client_secret = 'input your HH.ru client secret'
redirect_uri = 'input any link to copy a code from it'  # Usually for OAuth2, but might not be needed
auth_url = 'https://hh.ru/oauth/authorize'

app_name = 'Candidate_filter'

In [None]:
# Create the URL to redirect the user for authorization
authorization_url = f"{auth_url}?response_type=code&client_id={client_id}&redirect_uri={redirect_uri}"

print(f"Go to the following URL to authorize the application: {authorization_url}")

In [None]:
# Replace 'your_authorization_code' with the actual code you received
authorization_code = 'your_authorization_code '
token_url = 'https://hh.ru/oauth/token'

# Parameters for the POST request to exchange the authorization code for an access token
data = {
    'grant_type': 'authorization_code',
    'client_id': client_id,
    'client_secret': client_secret,
    'code': authorization_code,
    'redirect_uri': redirect_uri
}

In [None]:
# Send the POST request test
response = requests.post(token_url, data=data)

if response.status_code == 200:
    access_token = response.json().get('access_token')
    print("Access Token:", access_token)
else:
    print(f"Error: {response.status_code}, {response.text}")

In [None]:
# HH.ru API search function
def search_resumes(query_params, access_token):
    url = "https://api.hh.ru/resumes"
    headers = {
        'Authorization': f'Bearer {access_token}',
        'User-Agent': 'your-app-name'
    }
    
    # Make the request with the given filters
    response = requests.get(url, headers=headers, params=query_params)
    
    if response.status_code == 200:
        return response.json()
    else:
        print(f"Error searching resumes: {response.status_code}, {response.text}")
        return None

In [None]:
# Example of query parameters (you can adjust as needed)
query_params = {
    'text': 'Data Scientist',  # Job title
    'area': 1,                 # Region ID (1 is Moscow, you can use others)
    'experience': 'between3And6',  # Experience (e.g., between3And6 is 3-6 years)
    'relocation': 'living_but_relocation',  # Willingness to relocate
    'industry': 7,             # Industry ID (for example, 7 is "IT, Telecoms")
    'per_page': 1,            # Number of resumes per page
    'page': 0                  # Page number for pagination
}

# Search resumes
resumes_data = search_resumes(query_params, access_token)

In [None]:
if response.status_code == 200:
    #resumes_data = response.json()
    print(resumes_data)
else:
    print(f"Error searching resumes: {response.status_code}, {response.text}")

In [None]:
# Function to parse the JSON response into a flat structure
def parse_full_resume_data(json_data):
    items = json_data.get("items", [])
    
    # List to store extracted data
    data_list = []
    
    for item in items:
        # Flatten the JSON structure by checking nested fields
        resume_data = {
            "Resume ID": item.get("id"),
            "Oracle DB Dev Score": '',
            "BA Score": '',
            "DE Score": '',
            "Total Score": '',
            "Last Name": item.get("last_name"),
            "First Name": item.get("first_name"),
            "Middle Name": item.get("middle_name"),
            "Title": item.get("title"),
            "Created At": item.get("created_at"),
            "Updated At": item.get("updated_at"),
            "Location": item.get("area", {}).get("name", None),
            "Age": item.get("age"),
            "Gender": item.get("gender", {}).get("name", None),
            "Salary": item.get("salary"),
            "Photo": item.get("photo", None),
            "Total Experience (months)": item.get("total_experience", {}).get("months", None),
            "Can View Full Info": item.get("can_view_full_info"),
            "Negotiations History URL": item.get("negotiations_history", {}).get("url", None),
            "Hidden Fields": "; ".join([str(field) for field in item.get("hidden_fields", [])]),
            "PDF Download Link": item.get("download", {}).get("pdf", {}).get("url", None),
            "RTF Download Link": item.get("download", {}).get("rtf", {}).get("url", None),
            "Resume URL": item.get("url"),
            "Alternate URL": item.get("alternate_url"),
            "Platform": item.get("platform", {}).get("id", None),
            "Favorited": item.get("favorited", False),
            "Viewed": item.get("viewed", False),
            "Marked": item.get("marked", False),
            "Last Negotiation": item.get("last_negotiation", None),
        }

        # Handling certificates
        certificates = item.get("certificate", [])
        resume_data["Certificates"] = "; ".join([cert.get("name", "N/A") for cert in certificates])

        # Handling owner ID
        resume_data["Owner ID"] = item.get("owner", {}).get("id", None)

        # Handling education
        primary_education = item.get("education", {}).get("primary", [])
        if primary_education:
            education_details = [edu.get("name", "N/A") + " - " + edu.get("result", "N/A") for edu in primary_education]
            resume_data["Education Details"] = "; ".join(education_details)
        resume_data["Education Level"] = item.get("education", {}).get("level", {}).get("name", None)

        # Handling work experience
        experience_list = []
        for exp in item.get("experience", []):
            exp_str = f"{exp.get('company', 'N/A')} ({exp.get('position', 'N/A')})"
            experience_list.append(exp_str)
        
        resume_data["Experience"] = "; ".join(experience_list)
        
        # Add the parsed resume data to the list
        data_list.append(resume_data)
    
    return data_list

In [None]:
# Parse the response and store it in a DataFrame
parsed_resume_data = parse_full_resume_data(resumes_data)

In [None]:
# Create a Pandas DataFrame
df_resumes = pd.DataFrame(parsed_resume_data)

In [None]:
# Display the DataFrame
df_resumes

In [None]:
# Function to load JSON files and create a job description array
def create_job_descriptions_array(json_directory):
    job_descriptions = []

    # Read all JSON files from the directory
    json_files = glob.glob(f"{json_directory}/*.json")

    for file in json_files:
        with open(file, 'r') as f:
            job_data = json.load(f)
            job_descriptions.append(job_data)
    
    return job_descriptions

In [None]:
# Example usage
json_directory = "."  # directory where your job JSON files are stored
job_descriptions_array = create_job_descriptions_array(json_directory)

In [None]:
# Print or use the job descriptions array
print(json.dumps(job_descriptions_array, indent=4))

In [None]:
# Function to submit resumes and compare them to job descriptions
def gpt_process_resumes(client, job_descriptions, resume):
    # Store the scores
    results = []

    # Create a prompt for each resume and job description
    prompt = f"""
    You are an expert in evaluating candidates for specific job positions. 
    I will provide you with a candidate's resume and job descriptions for three different positions. 
    Please evaluate the resume and score how well it matches each job description. 
    For each job description, score from 0 to 10, where 0 means no match and 10 means a perfect match.

    Resume:
    {resume}

    Job Descriptions:
    1. {json.dumps(job_descriptions[0], indent=2)}
    2. {json.dumps(job_descriptions[1], indent=2)}
    3. {json.dumps(job_descriptions[2], indent=2)}

    Provide scores in the following format:
    {{
        "Oracle_DB_Dev_score": X,
        "BA_score": Y,
        "DE_score": Z,
        "total_score": (X + Y + Z) / 3
    }}
    """

    # Call the GPT model with the prepared prompt
    response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": "You are an expert job evaluator."},
            {"role": "user", "content": prompt}
        ]
    )
    return response


In [None]:
def parse_scores(response, resume):
    results = []
    # Parse the GPT response and extract scores
    try:
        # Retrieve content from the response object
        scores = json.loads(response.choices[0].message.content)
        # Append results for each resume
        results.append({
            #"Resume": resume['first_name'] + " " + resume['last_name'],
            "Resume ID": resume['Resume ID'],
            "Oracle DB Dev score": scores.get("Oracle_DB_Dev_score", "N/A"),
            "BA score": scores.get("BA_score", "N/A"),
            "DE score": scores.get("DE_score", "N/A"),
            "Total Score": scores.get("total_score", "N/A")
        })
        # Return the results as a DataFrame for better visibility
        df = pd.DataFrame(results)
    except (json.JSONDecodeError, KeyError, AttributeError):
        df = "Error parsing GPT response."
        print("Error parsing GPT response. Skipping this resume.")
    
    return df

In [None]:
# API from OpenAI account
openai.api_key = 'your_openAI_api_key'
client = openai.Client(api_key=openai.api_key)

In [None]:
#set up the process in a way, that scores are added to the list of resumes and only then the df is created in the end.
for index, row in df_resumes.iterrows():
    #print('I am here')
    #print(index, row)
    resume_data = row.to_dict()
    if not response: #debug
        response = gpt_process_resumes(client=client, job_descriptions=job_descriptions_array, resume=resume_data)
    print(response,'\n') #debug
    scores = parse_scores(response=response, resume=resume_data)
    print(f"Scores: {scores}") #debug
    if not isinstance(scores, str):
        df_resumes.at[index, 'Scores'] = scores
        df_resumes #debug
    else:
        print(f"Scores 2: {scores}")

In [None]:
# Display the updated DataFrame with scores
#print(resumes_df)