In [4]:
# Importing the dataset
import pandas as pd
df = pd.read_csv('UpdatedResumeDataSet.csv')
df.head()

Unnamed: 0,Category,Resume
0,Data Science,Skills * Programming Languages: Python (pandas...
1,Data Science,Education Details \r\nMay 2013 to May 2017 B.E...
2,Data Science,"Areas of Interest Deep Learning, Control Syste..."
3,Data Science,Skills â¢ R â¢ Python â¢ SAP HANA â¢ Table...
4,Data Science,"Education Details \r\n MCA YMCAUST, Faridab..."


In [14]:
# Create Resumes Object
resumes = df[['Category', 'Resume']]

In [15]:
# Save interim structure
resumes.to_json(r'C:/Users/lenovo/OneDrive/Desktop/IntelligentResumeScreenerProjectFolder/parsed_data/raw_text.json', orient='records')

In [17]:
# Resume Parsing & NLP Extraction
import re
import spacy
import os
import json

# Load spaCy English NLP model
nlp = spacy.load("en_core_web_sm")

# Predefined list of skills and education keywords
skills_list = ['python', 'sql', 'tableau', 'power bi', 'machine learning']
edu_keywords = ['bachelor', 'bsc', 'msc', 'phd', 'computer science']

# Function to parse a single resume's text
def parse_resume(text):
    doc = nlp(text)

    # Extract possible year phrases
    year_matches = re.findall(r'(\d+(?:\.\d+)?)\+?\s*(?:years?|yrs?)', text.lower())
    experience = None

    if year_matches:
        # Convert to float and take max (handles '3', '2.5', etc.)
        experience = max([float(x) for x in year_matches])
    elif re.search(r'\bfresher\b|\bno experience\b|0\s*(?:years?|yrs?)', text.lower()):
        experience = 0.0

    return {
        'name': doc.ents[0].text if doc.ents else None,
        'email': re.search(r'[\w.-]+@[\w.-]+', text).group(0) if re.search(r'[\w.-]+@[\w.-]+', text) else None,
        'phone': re.search(r'\+?\d[\d\s\-]{8,}\d', text).group(0) if re.search(r'\+?\d[\d\s\-]{8,}\d', text) else None,
        'skills': list({s for s in skills_list if s in text.lower()}),
        'education': list({e for e in edu_keywords if e in text.lower()}),
        'experience': experience  # Could be None, 0.0, or float
    }


# Parse all resumes from DataFrame
parsed = []
for _, row in resumes.iterrows():
    entry = parse_resume(row['Resume'])
    entry['category'] = row['Category']  # Add resume category (e.g., HR, DS)
    parsed.append(entry)

# Define output path
output_dir = r'C:/Users/lenovo/OneDrive/Desktop/IntelligentResumeScreenerProjectFolder/parsed_data'
output_file = os.path.join(output_dir, 'raw_text.json')

# Ensure the folder exists
os.makedirs(output_dir, exist_ok=True)

# Save parsed results as JSON
with open(output_file, "w") as f:
    json.dump(parsed, f, indent=4)

print("Resume parsing complete. Output saved to:", output_file)




Resume parsing complete. Output saved to: C:/Users/lenovo/OneDrive/Desktop/IntelligentResumeScreenerProjectFolder/parsed_data\raw_text.json


In [18]:
jd_dict = {
    "HR": """
        Job Title: Human Resources Executive

        Responsibilities:
        - Recruit and onboard new employees
        - Handle employee relations
        - Maintain HR documentation and compliance
        - Assist in performance reviews and policy enforcement

        Required Skills:
        - Communication
        - HR policies
        - Employee engagement
        - MS Office
    """,

    "Data Science": """
        Job Title: Data Scientist

        Responsibilities:
        - Build machine learning models
        - Analyze large datasets
        - Perform feature engineering and data wrangling
        - Present findings to stakeholders

        Required Skills:
        - Python
        - SQL
        - Machine Learning
        - Data Analysis
        - Tableau / Power BI
    """,

    # Add more categories if applicable
}


In [19]:
# Define the Match Score Function
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def compute_match_score(resume_skills_text, jd_text):
    if not resume_skills_text or not jd_text:
        return 0.0  # Return 0 if either is missing

    tfidf = TfidfVectorizer()
    tfidf_matrix = tfidf.fit_transform([jd_text.lower(), resume_skills_text.lower()])
    similarity = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]
    return round(similarity * 100, 2)


In [20]:
# Apply Matching to Each Resume
import json

# Load parsed resumes if not already in memory
with open("C:/Users/lenovo/OneDrive/Desktop/IntelligentResumeScreenerProjectFolder/parsed_data/raw_text.json", "r") as f:
    parsed_resumes = json.load(f)

# Compute match score
for resume in parsed_resumes:
    jd_text = jd_dict.get(resume.get('category', ''), "")
    resume_skills_text = ' '.join(resume.get('skills', []))
    resume['match_score'] = compute_match_score(resume_skills_text, jd_text)


In [21]:
# Save Ranked Results
# Sort by match_score descending
parsed_resumes = sorted(parsed_resumes, key=lambda x: x.get('match_score', 0), reverse=True)

# Save to ranked_resumes.json
output_path = r"C:/Users/lenovo/OneDrive/Desktop/IntelligentResumeScreenerProjectFolder/parsed_data/ranked_resumes.json"
with open(output_path, "w") as f:
    json.dump(parsed_resumes, f, indent=4)

print("JD matching complete. Ranked resumes saved to:", output_path)

JD matching complete. Ranked resumes saved to: C:/Users/lenovo/OneDrive/Desktop/IntelligentResumeScreenerProjectFolder/parsed_data/ranked_resumes.json


In [22]:
pip install mysql-connector-python

Collecting mysql-connector-pythonNote: you may need to restart the kernel to use updated packages.

  Downloading mysql_connector_python-9.3.0-cp312-cp312-win_amd64.whl.metadata (7.7 kB)
Downloading mysql_connector_python-9.3.0-cp312-cp312-win_amd64.whl (16.4 MB)
   ---------------------------------------- 0.0/16.4 MB ? eta -:--:--
    --------------------------------------- 0.3/16.4 MB ? eta -:--:--
   - -------------------------------------- 0.8/16.4 MB 2.2 MB/s eta 0:00:07
   --- ------------------------------------ 1.6/16.4 MB 2.8 MB/s eta 0:00:06
   ----- ---------------------------------- 2.4/16.4 MB 3.2 MB/s eta 0:00:05
   ------- -------------------------------- 3.1/16.4 MB 3.2 MB/s eta 0:00:05
   ---------- ----------------------------- 4.5/16.4 MB 3.7 MB/s eta 0:00:04
   ------------- -------------------------- 5.5/16.4 MB 3.9 MB/s eta 0:00:03
   ----------------- ---------------------- 7.1/16.4 MB 4.4 MB/s eta 0:00:03
   -------------------- ------------------- 8.4/16.4 MB 4

In [27]:
#  Connect to MySQL from Python
import mysql.connector

# Replace with your actual credentials
db_config = {
    'host': 'localhost',
    'user': 'root',
    'password': 'Dhanoosh@1',
    'database': 'resume_screener'
}

In [28]:
# Load and Insert Ranked Resume Data
import json
import mysql.connector

# Load parsed & ranked data
with open(r"C:/Users/lenovo/OneDrive/Desktop/IntelligentResumeScreenerProjectFolder/parsed_data/ranked_resumes.json", "r") as f:
    resumes = json.load(f)

# Connect to MySQL
conn = mysql.connector.connect(**db_config)
cursor = conn.cursor()

# Insert each resume into the MySQL table
for r in resumes:
    cursor.execute("""
        INSERT INTO resumes (name, email, phone, skills, education, experience, category, match_score)
        VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
    """, (
        r.get('name'),
        r.get('email'),
        r.get('phone'),
        ', '.join(r.get('skills') or []),
        ', '.join(r.get('education') or []),
        r.get('experience'),
        r.get('category'),
        r.get('match_score')
    ))

# Commit & close
conn.commit()
cursor.close()
conn.close()

print("All resume data inserted into MySQL database.")


All resume data inserted into MySQL database.


In [29]:
# Query Examples in Python
conn = mysql.connector.connect(**db_config)
cursor = conn.cursor()

cursor.execute("SELECT name, match_score FROM resumes ORDER BY match_score DESC LIMIT 5")
for row in cursor.fetchall():
    print(row)

cursor.close()
conn.close()


('Sql', 36.34)
('Tableau', 36.34)
('Sql', 36.34)
('Tableau', 36.34)
('Sql', 36.34)


In [30]:
# Candidates with Python & Power BI
conn = mysql.connector.connect(**db_config)
cursor = conn.cursor()

cursor.execute("""
    SELECT name, skills, match_score 
    FROM resumes 
    WHERE skills LIKE '%python%' AND skills LIKE '%power bi%' 
    ORDER BY match_score DESC
""")
for row in cursor.fetchall():
    print(row)

cursor.close()
conn.close()


In [2]:
!pip install pymupdf docx2txt

Collecting pymupdf
  Downloading pymupdf-1.26.1-cp39-abi3-win_amd64.whl.metadata (3.4 kB)
Collecting docx2txt
  Downloading docx2txt-0.9-py3-none-any.whl.metadata (529 bytes)
Downloading pymupdf-1.26.1-cp39-abi3-win_amd64.whl (18.5 MB)
   ---------------------------------------- 0.0/18.5 MB ? eta -:--:--
    --------------------------------------- 0.3/18.5 MB ? eta -:--:--
   - -------------------------------------- 0.5/18.5 MB 1.9 MB/s eta 0:00:10
   -- ------------------------------------- 1.0/18.5 MB 1.7 MB/s eta 0:00:11
   --- ------------------------------------ 1.6/18.5 MB 2.2 MB/s eta 0:00:08
   ----- ---------------------------------- 2.6/18.5 MB 2.6 MB/s eta 0:00:07
   ------- -------------------------------- 3.7/18.5 MB 3.0 MB/s eta 0:00:05
   --------- ------------------------------ 4.5/18.5 MB 3.2 MB/s eta 0:00:05
   ----------- ---------------------------- 5.2/18.5 MB 3.4 MB/s eta 0:00:04
   -------------- ------------------------- 6.6/18.5 MB 3.6 MB/s eta 0:00:04
   -----