In [None]:
# importing basic files
import pandas as pd
import numpy as np
import io
import re
import matplotlib.pyplot as plt
from collections import Counter

# NLP & TF-IDF
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# PDF extraction
import pdfplumber

# Widgets
import ipywidgets as widgets
from IPython.display import display

# Download NLTK stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Matplotlib style
plt.style.use('seaborn')


[nltk_data] Downloading package stopwords to C:\Users\RIYA
[nltk_data]     PARIRA\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


# Text extraction and cleaning functions

In [None]:
# Extracting the text from PDF
def extract_text_from_pdf(pdf_file):
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text + "\n"
    return text

# Cleaning the text for matching
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', ' ', text)
    text = re.sub(r'\s+', ' ', text).strip()
    words = [w for w in text.split() if w not in stop_words]
    return " ".join(words)


# Scoring Functions

In [None]:
# Compute the missing skills
def get_missing_skills(resume_text, jd_skills):
    resume_text_cleaned = clean_text(resume_text)
    resume_words = set(resume_text_cleaned.split())
    missing = []
    for skill in jd_skills:
        skill_words = set(clean_text(skill).split())
        if not skill_words & resume_words:
            missing.append(skill)
    return missing

# Compute the skill match %
def compute_skill_match_score(resume_text, jd_skills):
    missing = get_missing_skills(resume_text, jd_skills)
    total_skills = len(jd_skills)
    present_skills = total_skills - len(missing)
    return round((present_skills / total_skills) * 100, 2)


# Multi-Resume Processing

In [4]:
def process_multiple_resumes(jd_skills, uploaded_files):
    results = []
    for file_info in uploaded_files:
        filename = file_info['metadata']['name']
        content = file_info['content']
        
        # Extract text
        if filename.lower().endswith('.pdf'):
            resume_text = extract_text_from_pdf(io.BytesIO(content))
        else:
            resume_text = content.decode('utf-8')
        
        # Skill match
        skill_score = compute_skill_match_score(resume_text, jd_skills)
        missing = get_missing_skills(resume_text, jd_skills)
        matched = [skill for skill in jd_skills if skill not in missing]
        
        results.append({
            "Candidate": filename,
            "Skill Match (%)": skill_score,
            "Matched Skills": ", ".join(matched),
            "Missing Skills": ", ".join(missing)
        })
    
    # Rank by Skill Match
    results = sorted(results, key=lambda x: x["Skill Match (%)"], reverse=True)
    return pd.DataFrame(results)


# Visualization Functions

In [5]:
def plot_two_bar_charts_side_by_side(df_results):
    """
    Plots Skill Coverage and Resume Ranking side by side
    """
    candidates = df_results['Candidate']
    skill_scores = df_results['Skill Match (%)']

    # Sort candidates for ranking
    df_sorted = df_results.sort_values(by='Skill Match (%)', ascending=False)
    ranking_scores = df_sorted['Skill Match (%)']
    ranking_candidates = df_sorted['Candidate']

    x1 = np.arange(len(candidates))
    x2 = np.arange(len(ranking_candidates))

    fig, axes = plt.subplots(1, 2, figsize=(16,6))

    # Left: Skill Coverage
    axes[0].bar(x1, skill_scores, color='#4CAF50')
    axes[0].set_title("Skill Coverage per Resume")
    axes[0].set_ylabel("Skill Match (%)")
    axes[0].set_xticks(x1)
    axes[0].set_xticklabels(candidates, rotation=45, ha='right')
    axes[0].set_ylim(0, 100)

    # Right: Resume Ranking
    axes[1].bar(x2, ranking_scores, color='#2196F3')
    axes[1].set_title("Resume Ranking")
    axes[1].set_ylabel("Skill Match (%)")
    axes[1].set_xticks(x2)
    axes[1].set_xticklabels(ranking_candidates, rotation=45, ha='right')
    axes[1].set_ylim(0, 100)

    plt.tight_layout()
    plt.show()

# Widgets & Interactive Dashboard

In [6]:
jd_input = widgets.Textarea(
    value='Python, Machine Learning, SQL, Django, REST APIs, Data Analysis, AWS, Azure, TensorFlow, NLP, Scikit-learn',
    placeholder='Enter JD skills here, separated by commas',
    description='Job Description:',
    layout=widgets.Layout(width='80%', height='100px')
)
upload = widgets.FileUpload(accept='.pdf,.txt', multiple=True)
process_button = widgets.Button(description="Process Resumes")
output = widgets.Output()
display(jd_input, upload, process_button, output)

def combined_process_two_graphs(b):
    with output:
        output.clear_output()
        jd_skills_list = [skill.strip() for skill in jd_input.value.split(",") if skill.strip()]
        if not jd_skills_list:
            print("Please enter Job Description skills!")
            return
        if not upload.value:
            print("Please upload at least one resume!")
            return
        
        uploaded_files = list(upload.value.values())
        df_results = process_multiple_resumes(jd_skills_list, uploaded_files)
        display(df_results)
        
        # Two bar charts side by side
        plot_two_bar_charts_side_by_side(df_results)
        
        # Clear upload for next run
        upload.value.clear()
        upload._counter = 0

process_button.on_click(combined_process_two_graphs)

Textarea(value='Python, Machine Learning, SQL, Django, REST APIs, Data Analysis, AWS, Azure, TensorFlow, NLP, â€¦

FileUpload(value={}, accept='.pdf,.txt', description='Upload', multiple=True)

Button(description='Process Resumes', style=ButtonStyle())

Output()