In [3]:
!pip install PyPDF2

Collecting PyPDF2
  Downloading pypdf2-3.0.1-py3-none-any.whl.metadata (6.8 kB)
Downloading pypdf2-3.0.1-py3-none-any.whl (232 kB)
   ---------------------------------------- 0.0/232.6 kB ? eta -:--:--
   ---------------------------------------- 0.0/232.6 kB ? eta -:--:--
   - -------------------------------------- 10.2/232.6 kB ? eta -:--:--
   ----- --------------------------------- 30.7/232.6 kB 445.2 kB/s eta 0:00:01
   ---------- ---------------------------- 61.4/232.6 kB 469.7 kB/s eta 0:00:01
   ----------------------- -------------- 143.4/232.6 kB 853.3 kB/s eta 0:00:01
   ---------------------------------------- 232.6/232.6 kB 1.1 MB/s eta 0:00:00
Installing collected packages: PyPDF2
Successfully installed PyPDF2-3.0.1


In [29]:
import streamlit as st
from PyPDF2 import PdfReader
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Function to extract text from a PDF file
def extract_text_from_pdf(file):
    pdf = PdfReader(file)
    text = ""
    for page in pdf.pages:
        if page.extract_text():
            text += page.extract_text() + " "
    return text.strip()

# Function to rank resumes based on job description
def rank_resumes(job_description, resumes):
    documents = [job_description] + resumes
    vectorizer = TfidfVectorizer().fit_transform(documents)
    vectors = vectorizer.toarray()

    # Calculate cosine similarity
    job_description_vector = vectors[0]
    resume_vectors = vectors[1:]
    cosine_similarities = cosine_similarity([job_description_vector], resume_vectors).flatten()
    
    return cosine_similarities

# Streamlit UI
st.title("📄 AI Resume Screening & Ranking System")

# Job description input
st.header("📝 Job Description")
job_description = st.text_area("Enter the job description", height=150)

# File uploader
st.header("📂 Upload Resumes (PDF)")
uploaded_files = st.file_uploader("Upload PDF files", type=["pdf"], accept_multiple_files=True)

# Process resumes and rank them
if uploaded_files and job_description:
    st.header("📊 Ranked Resumes")
    
    resumes = []
    file_names = []
    
    for file in uploaded_files:
        text = extract_text_from_pdf(file)
        resumes.append(text)
        file_names.append(file.name)

    # Rank resumes
    scores = rank_resumes(job_description, resumes)

    # Create results DataFrame
    results = pd.DataFrame({"Resume": file_names, "Score": scores})
    results = results.sort_values(by="Score", ascending=False)

    # Display results
    st.dataframe(results)
