In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import re
import nltk
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.neighbors import NearestNeighbors

# Download stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Sample dataset (Resumes and Job Descriptions)
data = {
    "Resume": [
        "Software Engineer with experience in Python, Django, and Machine Learning.",
        "Data Scientist skilled in Python, TensorFlow, and Big Data analytics.",
        "Frontend Developer proficient in React, JavaScript, and UI/UX Design.",
        "Backend Developer with expertise in Node.js, Express, and SQL.",
        "Cybersecurity Analyst with skills in penetration testing and threat analysis."
    ]
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Function to clean and preprocess text
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text

df["Cleaned_Resume"] = df["Resume"].apply(preprocess_text)

# Streamlit UI
st.title("Automated Resume Screening System")
st.subheader("Upload a Job Description")

# Input Job Description
job_desc = st.text_area("Enter Job Description:")

def rank_resumes(job_desc, df):
    job_desc = preprocess_text(job_desc)
    
    # TF-IDF Vectorization
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(df["Cleaned_Resume"].tolist() + [job_desc])
    
    # Compute Cosine Similarity
    similarity = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1])
    
    # Ranking using KNN
    knn = NearestNeighbors(n_neighbors=3, metric='cosine')
    knn.fit(tfidf_matrix[:-1])
    distances, indices = knn.kneighbors(tfidf_matrix[-1])
    
    ranked_resumes = df.iloc[indices[0]]
    ranked_resumes["Similarity_Score"] = (1 - distances[0]) * 100
    
    return ranked_resumes.sort_values(by="Similarity_Score", ascending=False)

if st.button("Find Best Resumes"):
    if job_desc:
        ranked_resumes = rank_resumes(job_desc, df)
        st.subheader("Top Matching Resumes")
        st.dataframe(ranked_resumes[["Resume", "Similarity_Score"]])
    else:
        st.warning("Please enter a job description.")
