In [None]:
# Install required libraries

In [None]:
!pip install pdfplumber scikit-learn  # Install pdfplumber for PDF text extraction and scikit-learn for machine learning




In [None]:
# Import necessary libraries

In [None]:
import pdfplumber  # Library for extracting text from PDF files

In [None]:
import io  # Library for handling in-memory byte streams

In [None]:
import numpy as np  # Library for numerical operations

In [None]:
import pandas as pd  # Library for data manipulation and analysis

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer  # Tool for converting text data to TF-IDF features


In [None]:
from sklearn.metrics.pairwise import cosine_similarity  # Tool for computing similarity between text data


In [None]:
from google.colab import files  # Google Colab utility for file upload and download

In [None]:
from IPython.display import display, HTML  # Tools for displaying HTML content in Colab

In [None]:
import os  # Library for interacting with the operating system

In [None]:
import shutil  # Library for high-level file operations, like creating zip files

In [None]:
# Function to upload PDF files

In [None]:
def upload_files():
    uploaded = files.upload()  # Upload files using Google Colab's file upload widget
    return uploaded  # Return the dictionary of uploaded files

In [None]:
# Function to extract text from a PDF file

In [None]:
def extract_text_from_pdf(pdf_file):
    try:
        with pdfplumber.open(io.BytesIO(pdf_file)) as pdf:  # Open the PDF file from an in-memory byte stream
            text = ""
            for page in pdf.pages:  # Iterate through each page in the PDF
                text += page.extract_text() or ""  # Extract text from the page, or use empty string if no text is found
        return text  # Return the concatenated text from all pages
    except Exception as e:
        print(f"An error occurred while extracting text: {e}")  # Print error message if text extraction fails
        return ""  # Return an empty string in case of error

In [None]:
# Function to analyze resumes based on job description

In [None]:
def analyze_resumes(job_description, pdf_files, pasted_resumes):
    # Extract text from uploaded PDF files
    resumes_from_files = [extract_text_from_pdf(pdf_files[file]) for file in pdf_files]  # Extract text from each uploaded PDF

    # Combine resumes from files and pasted resumes
    resumes = resumes_from_files + pasted_resumes  # Combine both sources of resumes

    # Create a TF-IDF Vectorizer and compute the TF-IDF matrix
    vectorizer = TfidfVectorizer()  # Initialize TF-IDF vectorizer
    tfidf_matrix = vectorizer.fit_transform([job_description] + resumes)  # Compute TF-IDF features for job description and resumes

    # Compute cosine similarity between job description and resumes
    similarity_scores = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()  # Compute similarity scores

    # Create a list of resumes and their similarity scores
    results = list(zip(list(pdf_files.keys()) + ['Pasted Resume ' + str(i + 1) for i in range(len(pasted_resumes))], similarity_scores))  # Pair resume names with their similarity scores
    results.sort(key=lambda x: x[1], reverse=True)  # Sort resumes by similarity score in descending order

    return results  # Return the sorted list of results

In [None]:
# Function to save the analysis results and resumes

In [None]:
def save_results_and_resumes(results, pdf_files, pasted_resumes):
    # Create a DataFrame for results
    results_df = pd.DataFrame(results, columns=['Resume', 'Score'])  # Create a DataFrame from the results
    results_df = results_df.sort_values(by='Score', ascending=False).reset_index(drop=True)  # Sort DataFrame by score in descending order

    # Save the results DataFrame to a CSV file
    results_csv_path = "/content/resume_analysis_results.csv"  # Define file path for results CSV
    results_df.to_csv(results_csv_path, index=False)  # Save DataFrame to CSV

    # Create a directory to save resumes
    resumes_dir = "/content/resumes"  # Define directory path for resumes
    os.makedirs(resumes_dir, exist_ok=True)  # Create directory if it does not already exist

    # Save PDF resumes in the order of their ranking
    pdf_files_sorted = [file for file, _ in results]  # Get list of PDF files sorted by ranking
    for file_name in pdf_files_sorted:
        if file_name in pdf_files:
            file_path = os.path.join(resumes_dir, file_name)  # Define file path for each resume
            with open(file_path, "wb") as f:  # Open file in binary write mode
                f.write(pdf_files[file_name])  # Write the PDF content to the file

    # Save pasted resumes in the order of their ranking
    pasted_resumes_sorted = [resume for _, resume in sorted(zip(results, pasted_resumes), key=lambda x: x[0][1], reverse=True)]  # Sort pasted resumes by ranking
    pasted_resumes_path = os.path.join(resumes_dir, "pasted_resumes.txt")  # Define path for text file with pasted resumes
    with open(pasted_resumes_path, "w") as f:  # Open file in write mode
        for i, resume in enumerate(pasted_resumes_sorted):  # Iterate through sorted pasted resumes
            f.write(f"--- Pasted Resume {i + 1} ---\n")  # Write header for each resume
            f.write(resume)  # Write resume content
            f.write("\n\n")  # Add new line after each resume

    return results_csv_path, resumes_dir  # Return paths for results CSV and resumes directory

In [None]:
# Upload files and paste resume content

In [None]:
print("Upload your PDF resumes:")  # Prompt user to upload PDF files
uploaded_files = upload_files()  # Call function to upload files

Upload your PDF resumes:


Saving amazon-data-science-resume-example.pdf to amazon-data-science-resume-example (3).pdf
Saving associate-data-scientist-resume-example.pdf to associate-data-scientist-resume-example (3).pdf
Saving data-science-manager-resume-example.pdf to data-science-manager-resume-example (3).pdf
Saving data-science-student-resume-example.pdf to data-science-student-resume-example (3).pdf
Saving data-scientist-analytics-resume-example.pdf to data-scientist-analytics-resume-example (3).pdf
Saving data-scientist-intern-resume-example.pdf to data-scientist-intern-resume-example (3).pdf
Saving data-scientist-machine-learning-resume-example.pdf to data-scientist-machine-learning-resume-example (3).pdf
Saving data-scientist-resume-example.pdf to data-scientist-resume-example (3).pdf
Saving data-visualization-resume-example.pdf to data-visualization-resume-example (3).pdf
Saving educational-data-scientist-resume-example.pdf to educational-data-scientist-resume-example (3).pdf
Saving entry-level-data-sc

In [None]:
pdf_files = uploaded_files  # Store uploaded files

In [None]:
# Paste resume content for analysis

In [None]:
print("Paste resume content below (one per line):")  # Prompt user to paste resume content
pasted_resumes = [
    "Paste resume content here, separated by line breaks."  # Example placeholder for pasted resumes
]

Paste resume content below (one per line):


# Define job description for analysis

In [None]:
job_description = "Software engineer with experience in Python, machine learning, and data analysis."  # Example job description


In [None]:
# Analyze resumes and save results

In [None]:
results = analyze_resumes(job_description, pdf_files, pasted_resumes)  # Call function to analyze resumes


In [None]:
results_csv_path, resumes_dir = save_results_and_resumes(results, pdf_files, pasted_resumes)  # Save analysis results and resumes


In [None]:
# Display results

In [None]:
results_df = pd.read_csv(results_csv_path)  # Read the results CSV into a DataFrame

In [None]:
display(HTML(results_df.to_html(index=False)))  # Display the results in an HTML table format

Resume,Score
data-scientist-machine-learning-resume-example (3).pdf,0.26881
amazon-data-science-resume-example (3).pdf,0.22381
associate-data-scientist-resume-example (3).pdf,0.211646
educational-data-scientist-resume-example (3).pdf,0.173355
data-scientist-analytics-resume-example (3).pdf,0.170449
senior-data-scientist-resume-example (3).pdf,0.170337
experienced-data-scientist-resume-example (3).pdf,0.167907
data-science-manager-resume-example (3).pdf,0.165832
python-data-scientist-resume-example (3).pdf,0.162386
metadata-scientist-resume-example (3).pdf,0.160388


In [None]:
# Provide download links for results and resumes

In [None]:
print(f"Download the results CSV file from: {results_csv_path}")  # Print path to the results CSV
print(f"Download the resumes directory from: {resumes_dir}")  # Print path to the resumes directory

Download the results CSV file from: /content/resume_analysis_results.csv
Download the resumes directory from: /content/resumes


In [None]:
# Allow downloading the results CSV file

In [None]:
files.download(results_csv_path)  # Provide download link for the results CSV

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# Create a zip archive of the resumes directory and provide download link

In [None]:
shutil.make_archive('/content/resumes_archive', 'zip', resumes_dir)  # Create a zip file of the resumes directory


'/content/resumes_archive.zip'

In [None]:
files.download('/content/resumes_archive.zip')  # Provide download link for the zip archive

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>