In [72]:
import pandas as pd
import json
import re

In [73]:
resumes = pd.read_csv('/Users/28stabwoundz/Desktop/Uni/group-project/Resume.csv')
resumes.columns = [col.strip().lower() for col in resumes.columns] #lowercase for columns
for col in resumes.columns:
    if resumes[col].dtype == object:
        resumes[col] = resumes[col].str.lower() #strings in columns lower case
resumes.to_csv('/Users/28stabwoundz/Desktop/Uni/group-project/Resume_lower.csv', index= False)
with open('/Users/28stabwoundz/Desktop/Uni/group-project/job_roles.json', 'r') as f:
    job_roles = json.load(f)
job_dict = {job['category'].lower():job for job in job_roles}

resumes_lower = pd.read_csv('/Users/28stabwoundz/Desktop/Uni/group-project/Resume_lower.csv')

In [74]:
resumes_lower.head()


Unnamed: 0,id,resume_str,resume_html,category
0,16852973,hr administrator/marketing associate\...,"<div class=""fontsize fontface vmargins hmargin...",hr
1,22323967,"hr specialist, us hr operations ...","<div class=""fontsize fontface vmargins hmargin...",hr
2,33176873,hr director summary over 2...,"<div class=""fontsize fontface vmargins hmargin...",hr
3,27018550,hr specialist summary dedica...,"<div class=""fontsize fontface vmargins hmargin...",hr
4,17812897,hr manager skill highlights ...,"<div class=""fontsize fontface vmargins hmargin...",hr


In [75]:
print("Columns:", resumes_lower.columns.tolist())
print("\nCategory Counts:")
print(resumes_lower['category'].value_counts())

Columns: ['id', 'resume_str', 'resume_html', 'category']

Category Counts:
category
information-technology    120
business-development      120
finance                   118
advocate                  118
accountant                118
engineering               118
chef                      118
aviation                  117
fitness                   117
sales                     116
banking                   115
healthcare                115
consultant                115
construction              112
public-relations          111
hr                        110
designer                  107
arts                      103
teacher                   102
apparel                    97
digital-media              96
agriculture                63
automobile                 36
bpo                        22
Name: count, dtype: int64


In [76]:
#keywords to compare to resumes
degree_keywords = ['md', 'bachelor', 'associate', 'master', 'phd', 'nursing', 'education', 'psychology', 'paramedic', 'criminal justice']
license_keywords = ['medical license', 'nursing license', 'security clearance', 'dog handling license', 'food hygiene certificate',
                    'security license', 'paramedic license', 'fire safety certification', 'commercial driver’s license', 'forklift license']
experience_pattern = re.compile(r'(\d+)\s*(?:years|yrs)[\s\w]*experience', re.IGNORECASE)
job_keywords = ['hr', 'healthcare', 'consultant', 'law enforcement', 'advocate', 'teacher', 'chef', 'construction', 'automobile']

In [77]:
print(resumes_lower.columns.tolist())

['id', 'resume_str', 'resume_html', 'category']


In [78]:
def parse_resume(resume_text):
    r = resume_text.lower()
    info = {
        'role': [],
        'degree': [],
        'experience': 0,
        'licenses': [],
        'skills': []
    }

    for job in job_keywords:
        if job in r:
            info['role'].append(job)
                
    for keyword in degree_keywords:
        if keyword in r:
            info['degree'].append(keyword)

    for lic in license_keywords:
        if lic in r:
            info['licenses'].append(lic)

    exp_match = experience_pattern.search(r)
    if exp_match:
        info['experience'] = int(exp_match.group(1))

    skill_words = ['kitchen experience','social work', 'care', 'support', 'security', 'teaching', 'maintenance', 'cleaning', 'counseling', 'communication', 'handling', 'training', 'it', 'dog handling']
    for word in skill_words:
        if word in r:
            info['skills'].append(word)

    return info

def decide(row, parsed_info):
    role = row['category'].strip().lower()
    if role not in job_dict:
        return "Rejected: Unknown role"

    job = job_dict[role]
    

    if job['degree_required']:
        required = job['degree_required'].lower()
        if not any(req in required for req in parsed_info['degree']):
            return "Rejected: Missing required degree"


    if parsed_info['experience'] < job['min_experience_years']:
        return f"Rejected: Needs ≥ {job['min_experience_years']} yrs exp"

    missing_licenses = [lic for lic in job['special_licenses'] if lic.lower() not in parsed_info['licenses']]
    if missing_licenses:
        return "Rejected: Missing license(s)"

    return "Interview"


parsed_info_list = resumes_lower['resume_str'].apply(parse_resume)
resumes_lower['Decision'] = [decide(row, info) for row, info in zip(resumes_lower.to_dict(orient='records'), parsed_info_list)]
resumes_lower['Parsed Info'] = parsed_info_list

#save to new csv with results
resumes_lower.to_csv('/Users/28stabwoundz/Desktop/Uni/group-project/Resume_Evaluation_By_Category.csv', index=False)
print("Evaluation complete. Results saved.")

Evaluation complete. Results saved.
