In [1]:
import streamlit as st
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import google.generativeai as genai
import os
import json
import numpy as np
import requests
from bs4 import BeautifulSoup
import re


  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def load_data():
    df = pd.read_csv("merged_assessments.csv")
    
    # Clean and enhance data
    df['duration'] = pd.to_numeric(df['duration'], errors='coerce').fillna(0)
    df['test_type'] = df['test_type'].str.replace('"', '').str.replace('Remote,Testing', '')
    
    # Create technical skills dictionary from assessment names
    # This is crucial for matching skills mentioned in queries
    skill_keywords = {
        'python': ['python'],
        'sql': ['sql', 'oracle dba', 'oracle pl/sql', 'sql server', 'ssas', 'ssis', 'ssrs'],
        'javascript': ['javascript', 'js', 'front end javascript', 'automata front end javascript'],
        'java': ['java'],
        'c#': ['c#', 'csharp', '.net'],
        'c': ['c'],
        'html': ['html','html5'],
        'c++': ['c++', 'cplusplus'],
        'cloud computing': ['cloud', 'aws', 'azure', 'gcp'],
        'data analysis': ['data analysis', 'data analytics', 'data science'],
        'machine learning': ['machine learning', 'ml', 'AI'],
        'artificial intelligence': ['artificial intelligence', 'ai'],
        'cybersecurity': ['cybersecurity', 'cyber security'],
        'networking': ['networking', 'network'],
        'web development': ['web development', 'web dev'],
        'mobile development': ['mobile development', 'mobile dev'],
        'devops': ['devops', 'dev ops'],
        'agile': ['agile', 'scrum'],
        'nlp': ['nlp', 'natural language processing', 'text analysis'],
        'project management': ['project management', 'pm'],
        'database management': ['database management', 'dbms'],
        'data warehousing': ['data warehousing', 'data warehouse', 'AI'],
        # Add more skills as needed
    }
    
    # Create a skill mapping for each assessment
    df['skills'] = df['name'].apply(lambda name: extract_skills(name, skill_keywords))
    
    # Create enhanced embeddings text with skill emphasis
    df['combined_text'] = df.apply(lambda x: (
        f"Technical Assessment: {x['name']} "
        f"Skills: {', '.join(x['skills'])} " if x['skills'] else ""
        f"Tests: {x['test_type']} "
        f"Duration: {x['duration']}mins "
        f"Remote: {x['remote_support']} "
        f"Adaptive: {x['adaptive_support']}"
    ), axis=1)
    
    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = model.encode(df['combined_text'].tolist())
    return df, model, embeddings

def extract_skills(name, skill_keywords):
    """Extract skills from assessment name"""
    name_lower = name.lower()
    detected_skills = []
    
    for skill, keywords in skill_keywords.items():
        if any(keyword in name_lower for keyword in keywords):
            detected_skills.append(skill)
            
    return detected_skills

In [4]:
df,model,embeddings = load_data()

In [17]:
df['name_idx'] = df.index.astype(str) + ' ' + df['name']

In [25]:
df = df.drop(columns=['combined_text'])

In [22]:
TEST_TYPE_MAP = {
    'A': 'Ability & Aptitude',
    'B': 'Biodata & Situational Judgement',
    'C': 'Competencies',
    'D': 'Development & 360',
    'E': 'Assessment Exercises',
    'K': 'Knowledge & Skills',
    'P': 'Personality & Behavior',
    'S': 'Simulations'
}

df['test_type_mapped'] = df['test_type'].apply(lambda x: ', '.join([TEST_TYPE_MAP[t] for t in x.split(',') if t in TEST_TYPE_MAP]))

In [26]:
df.head()

Unnamed: 0,name,url,remote_support,adaptive_support,duration,test_type,name_idx,test_type_mapped
0,Account Manager Solution,https://www.shl.com/solutions/products/product...,Yes,Yes,49.0,"C,P,A,B,",0 Account Manager Solution,"Competencies, Personality & Behavior, Ability ..."
1,Administrative Professional - Short Form,https://www.shl.com/solutions/products/product...,Yes,Yes,36.0,"A,K,P,",1 Administrative Professional - Short Form,"Ability & Aptitude, Knowledge & Skills, Person..."
2,Agency Manager Solution,https://www.shl.com/solutions/products/product...,Yes,Yes,51.0,"A,B,P,S,",2 Agency Manager Solution,"Ability & Aptitude, Biodata & Situational Judg..."
3,Apprentice + 8.0 Job Focused Assessment,https://www.shl.com/solutions/products/product...,Yes,No,30.0,"B,P,",3 Apprentice + 8.0 Job Focused Assessment,"Biodata & Situational Judgement, Personality &..."
4,Apprentice 8.0 Job Focused Assessment,https://www.shl.com/solutions/products/product...,Yes,No,20.0,"B,P,",4 Apprentice 8.0 Job Focused Assessment,"Biodata & Situational Judgement, Personality &..."


In [27]:
df.to_csv("merged_assessments_1.csv", index=False)