In [None]:
!pip install google-generativeai pandas python-dotenv

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import google.generativeai as genai
import os
import json
import re
from dotenv import load_dotenv

# Load environment variables (for API keys)
load_dotenv()

# Configure Gemini API
# You'll need to set your API key in a .env file or environment variable
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
genai.configure(api_key=GEMINI_API_KEY)

print("Libraries imported and Gemini API configured.")

In [None]:
# Load your assessment data
df = pd.read_csv('shl_assessments.csv')

# Display the first few rows to understand the data structure
print("Dataset shape:", df.shape)
print("\nColumns:", df.columns.tolist())
print("\nSample data:")
display(df.head())

In [None]:
# Function to extract structured information from natural language queries using Gemini
def extract_query_info(query_text):
    """
    Use Gemini to extract structured information from a natural language query
    
    Args:
        query_text (str): Natural language query or job description
        
    Returns:
        dict: Structured information including skills, roles, and constraints
    """
    prompt = f"""
    You are an expert in job role analysis and assessment selection. Extract structured information from this job description or query:
    
    "{query_text}"
    
    Provide the following information in JSON format:
    
    1. technical_skills: List of specific technical skills mentioned (languages, tools, platforms)
    2. soft_skills: List of soft/behavioral skills and traits mentioned
    3. job_roles: List of job titles or roles this is for
    4. seniority_level: Extract the seniority level (entry, mid, senior, leadership)
    5. duration_constraint: Maximum minutes allowed for assessments (if mentioned)
    6. must_have_skills: Skills that are absolutely required
    7. nice_to_have_skills: Skills that are preferred but not required
    8. remote_testing_needed: Boolean indicating if remote testing capability is needed
    9. expanded_keywords: Additional relevant skills/keywords not explicitly mentioned but relevant to this role
    
    Only return valid JSON with these fields and nothing else.
    """
    
    model = genai.GenerativeModel('gemini-pro')
    response = model.generate_content(prompt)
    
    try:
        # Extract JSON from response
        json_str = response.text
        
        # Clean up any markdown code blocks if present
        if "```json" in json_str:
            json_str = json_str.split("```json")[1].split("```")[0].strip()
        elif "```" in json_str:
            json_str = json_str.split("```")[1].split("```")[0].strip()
            
        # Parse the JSON
        return json.loads(json_str)
    except Exception as e:
        print(f"Error parsing Gemini response: {str(e)}")
        print(f"Raw response: {response.text}")
        # Return a basic structure if parsing fails
        return {
            "technical_skills": [],
            "soft_skills": [],
            "job_roles": [],
            "seniority_level": "not specified",
            "duration_constraint": None,
            "must_have_skills": [],
            "nice_to_have_skills": [],
            "remote_testing_needed": False,
            "expanded_keywords": []
        }

# Function to generate a weighted query text for vector search
def generate_weighted_query(query_info):
    """
    Generate a weighted query text that emphasizes important aspects
    
    Args:
        query_info (dict): Structured query information
        
    Returns:
        str: Weighted query for vector search
    """
    # Start with the job roles and seniority which are most important
    weighted_query = f"{query_info['seniority_level']} {' '.join(query_info['job_roles'])} "
    
    # Add must-have skills with more repetition (higher weight)
    must_have = ' '.join([skill + ' ' + skill for skill in query_info['must_have_skills']])
    weighted_query += must_have + ' '
    
    # Add technical skills
    tech_skills = ' '.join(query_info['technical_skills'])
    weighted_query += tech_skills + ' '
    
    # Add soft skills
    soft_skills = ' '.join(query_info['soft_skills'])
    weighted_query += soft_skills + ' '
    
    # Add nice-to-have skills
    nice_to_have = ' '.join(query_info['nice_to_have_skills'])
    weighted_query += nice_to_have + ' '
    
    # Add expanded keywords
    expanded = ' '.join(query_info['expanded_keywords'])
    weighted_query += expanded
    
    return weighted_query

# Function to explain why an assessment was recommended
def generate_explanation(assessment, query_info):
    """
    Generate an explanation for why this assessment was recommended
    
    Args:
        assessment (dict): Assessment information
        query_info (dict): Structured query information
        
    Returns:
        str: Explanation text
    """
    prompt = f"""
    You are an expert in HR assessment selection. Explain why this assessment would be recommended for this job role.
    
    Assessment:
    - Name: {assessment['name']}
    - Test Type: {assessment['test_type']}
    - Duration: {assessment['duration']}
    - Remote Testing: {assessment['remote_testing']}
    - Adaptive/IRT: {assessment['adaptive_irt']}
    
    Job Information:
    - Roles: {query_info['job_roles']}
    - Seniority: {query_info['seniority_level']}
    - Technical Skills: {query_info['technical_skills']}
    - Soft Skills: {query_info['soft_skills']}
    - Duration Constraint: {query_info['duration_constraint']}
    
    Provide a concise 1-2 sentence explanation of why this assessment is a good match.
    """
    
    model = genai.GenerativeModel('gemini-pro')
    response = model.generate_content(prompt)
    
    return response.text.strip()

print("Helper functions defined.")

In [None]:
def test_query_understanding():
    """
    Test function for the LLM query understanding component
    """
    # Define test queries
    test_queries = [
        "I am hiring for Java developers who can also collaborate effectively with my business teams. Looking for an assessment(s) that can be completed in 40 minutes.",
        "Looking to hire mid-level professionals who are proficient in Python, SQL and Java Script. Need an assessment package that can test all skills with max duration of 60 minutes.",
        "Need to assess leadership potential and decision-making skills for a senior management role."
    ]
    
    print("=== TESTING LLM QUERY UNDERSTANDING ===\n")
    
    for i, query in enumerate(test_queries):
        print(f"Test Query {i+1}:")
        print(f"'{query}'")
        
        try:
            # Extract structured info
            query_info = extract_query_info(query)
            
            # Print key results
            print("\nExtracted Information:")
            print(f"• Technical Skills: {', '.join(query_info['technical_skills'])}")
            print(f"• Soft Skills: {', '.join(query_info['soft_skills'])}")
            print(f"• Job Roles: {', '.join(query_info['job_roles'])}")
            print(f"• Seniority: {query_info['seniority_level']}")
            print(f"• Duration Constraint: {query_info['duration_constraint']} minutes")
            
            # Generate weighted query
            weighted_query = generate_weighted_query(query_info)
            print("\nWeighted Query:")
            print(weighted_query[:100] + "..." if len(weighted_query) > 100 else weighted_query)
            
            print("\n" + "-"*50 + "\n")
            
        except Exception as e:
            print(f"Error processing query: {str(e)}")
    
    print("=== TEST COMPLETE ===")

# Run the test
test_query_understanding()