In [2]:
import pandas as pd
df = pd.read_csv('cleaned_parsed_resumes.csv')
df.head()

Unnamed: 0,name,email,phone,links,role,skills,education,experience_years,summary
0,Zane Clark,ymyeliyi@email.com,+1 0729651222,https://linkedin.com/in/jskknimopu; https://gi...,Data Journalist,tableau; power bi; r; scikit-learn; excel; mac...,Bachelor of Science in Data Management,14.0,A dedicated and results-driven Data Journalist...
1,Penelope Young,frgfpacq@email.com,+1 8121057943,https://linkedin.com/in/wvohfjcbgm; https://gi...,Fraud Analyst,tableau; power bi; r; numpy; scikit-learn; exc...,Master of Science in Quantitative Finance,14.0,A dedicated and results-driven Fraud Analyst w...
2,Zach Lee,tuobnixp@email.com,+1 8690904865,https://linkedin.com/in/trxzrapnqj; https://gi...,Data Analyst,sql; tableau; power bi; r; machine learning; s...,Bachelor of Science in Economics and Data Science,10.0,A dedicated and results-driven Healthcare Data...
3,Liam Carter,wvtixbwk@email.com,+1 4517200462,https://linkedin.com/in/ytsvtwokny; https://gi...,Bi Developer,sql; tableau; power bi; r; pandas; machine lea...,Master of Engineering in Data Engineering,4.0,A dedicated and results-driven BI Developer wi...
4,Henry Hill,iwvoyjov@email.com,+1 4516660758,https://linkedin.com/in/tabgyhvjll; https://gi...,Data Scientist,tableau; power bi; r; excel; java; machine lea...,Bachelor of Engineering in AI and ML,6.0,A dedicated and results-driven Sports Data Sci...


In [6]:
df['summary'][0]

'A dedicated and results-driven Data Journalist with over 14 years of experience in data analysis,\nmachine learning, and predictive modeling.\nSkilled in transforming business needs into technical\nsolutions using modern data science tools and practices. Passionate about solving real-world problems\nthrough data-driven approaches and delivering measurable outcomes.'

In [3]:
#jd requirements
job_requirements = {
    "required_skills": ["sql", "python", "tableau"],
    "preferred_skills": ["aws athena", "snowflake", "git", "jira", "confluence", "agile", "scrum"],
    "education_fields": ["statistics", "mathematics", "quantitative analysis", "computer science", "information technology"],
    "min_experience_years": 3,
    "soft_skills_keywords": ["communication", "english", "problem-solving", "cross-functional", "client-facing"],
}


In [11]:
%pip install ace_tools



In [13]:
# Update scoring weights based on user's latest configuration
def classify_resume_final(row):
    score = 0.0

    # Normalize text inputs
    skills = str(row['skills']).lower().split("; ")
    summary = str(row['summary']).lower()
    exp = row['experience_years']

    # Required skills: 5 points max (1.67 each)
    required_matched = sum(1 for skill in job_requirements['required_skills'] if skill in skills)
    score += min(required_matched * (5 / len(job_requirements['required_skills'])), 5)

    # Preferred skills: 0.5 each, max 2 points
    preferred_matched = sum(0.5 for skill in job_requirements['preferred_skills'] if skill in skills)
    score += min(preferred_matched, 2.0)

    # Experience: 2 points if >= 3 years, 1 point if 1–2 years
    if exp >= 3:
        score += 2
    elif 1 <= exp < 3:
        score += 1

    # Soft skills: 0.25 each, max 1 point
    soft_matched = sum(0.25 for skill in job_requirements['soft_skills_keywords'] if skill in summary)
    score += min(soft_matched, 1.0)

    # Classification based on new threshold
    if score >= 7:
        return "High Match"
    elif score >= 5:
        return "Medium Match"
    else:
        return "Low Match"

# Apply the final classification logic
df['match_level'] = df.apply(classify_resume_final, axis=1)

# Output match level distribution
df['match_level'].value_counts().to_dict()


{'Low Match': 532, 'Medium Match': 422, 'High Match': 46}

In [14]:
df.head()

Unnamed: 0,name,email,phone,links,role,skills,education,experience_years,summary,match_level
0,Zane Clark,ymyeliyi@email.com,+1 0729651222,https://linkedin.com/in/jskknimopu; https://gi...,Data Journalist,tableau; power bi; r; scikit-learn; excel; mac...,Bachelor of Science in Data Management,14.0,A dedicated and results-driven Data Journalist...,Low Match
1,Penelope Young,frgfpacq@email.com,+1 8121057943,https://linkedin.com/in/wvohfjcbgm; https://gi...,Fraud Analyst,tableau; power bi; r; numpy; scikit-learn; exc...,Master of Science in Quantitative Finance,14.0,A dedicated and results-driven Fraud Analyst w...,Low Match
2,Zach Lee,tuobnixp@email.com,+1 8690904865,https://linkedin.com/in/trxzrapnqj; https://gi...,Data Analyst,sql; tableau; power bi; r; machine learning; s...,Bachelor of Science in Economics and Data Science,10.0,A dedicated and results-driven Healthcare Data...,Medium Match
3,Liam Carter,wvtixbwk@email.com,+1 4517200462,https://linkedin.com/in/ytsvtwokny; https://gi...,Bi Developer,sql; tableau; power bi; r; pandas; machine lea...,Master of Engineering in Data Engineering,4.0,A dedicated and results-driven BI Developer wi...,Medium Match
4,Henry Hill,iwvoyjov@email.com,+1 4516660758,https://linkedin.com/in/tabgyhvjll; https://gi...,Data Scientist,tableau; power bi; r; excel; java; machine lea...,Bachelor of Engineering in AI and ML,6.0,A dedicated and results-driven Sports Data Sci...,Low Match


In [15]:
df.to_csv("complete_classified_resumes.csv", index=False)