<a href="https://colab.research.google.com/github/Preethikuppuri/Preethikuppuri/blob/main/AI%20res%20matcher%20with%20description%20.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gradio --quiet


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.5/46.5 MB[0m [31m15.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.2/322.2 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m95.2/95.2 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m53.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.0/72.0 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.3/62.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
import gradio as gr
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Matching logic (same as before)
def match_resumes_to_jobs(resume_texts, job_texts):
    processed_resumes = {f"Resume {i+1}": resume_texts[i] for i in range(len(resume_texts))}
    processed_jobs = {f"Job {i+1}": job_texts[i] for i in range(len(job_texts))}

    results = []
    for job_name, job_text in processed_jobs.items():
        corpus = [job_text] + list(processed_resumes.values())
        vectorizer = TfidfVectorizer()
        tfidf_matrix = vectorizer.fit_transform(corpus)
        scores = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:])[0]
        for i, resume_name in enumerate(processed_resumes):
            results.append({
                "Job": job_name,
                "Resume": resume_name,
                "Score": round(scores[i], 4)
            })
    return pd.DataFrame(results).sort_values(by="Score", ascending=False)

# Gradio UI function
def match_interface(resumes, jobs):
    resume_texts = [r.decode("utf-8") for r in resumes]
    job_texts = [j.decode("utf-8") for j in jobs]
    df = match_resumes_to_jobs(resume_texts, job_texts)
    return df

# Gradio inputs
resume_input = gr.File(file_types=[".txt"], file_count="multiple", label="Upload Resumes (.txt)")
job_input = gr.File(file_types=[".txt"], file_count="multiple", label="Upload Job Descriptions (.txt)")

# Launch app
gr.Interface(
    fn=match_interface,
    inputs=[resume_input, job_input],
    outputs=gr.Dataframe(headers=["Job", "Resume", "Score"]),
    title="🧠 AI Resume Matcher",
    description="Upload multiple resumes and job descriptions to see how well they match!"
).launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://0c55cb5d53f6fbe5a3.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
import string
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Simple stopword list
stop_words = set("""
a about above after again against all am an and any are as at be because been before being below between
both but by can cannot could did do does doing down during each few for from further had has have having he her here
hers herself him himself his how i if in into is it its itself me more most my myself no nor not of off on once
only or other our ours ourselves out over own same she should so some such than that the their theirs them themselves
then there these they this those through to too under until up very was we were what when where which while who whom why
with you your yours yourself yourselves
""".split())

# Text cleaning
def preprocess(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    tokens = [word for word in text.split() if word not in stop_words]
    return " ".join(tokens)

# Load your files
resume_path = "resume.txt"  # <- make sure resume.txt is uploaded
job_path = "job.txt"        # <- make sure job.txt is uploaded

with open(resume_path, "r") as f:
    resume_text = f.read()

with open(job_path, "r") as f:
    job_text = f.read()

# Preprocess
resumes = {"resume.txt": preprocess(resume_text)}
jobs = {"job.txt": preprocess(job_text)}

# Matching logic
def match_resumes_to_jobs(resumes, jobs):
    all_texts = list(resumes.values()) + list(jobs.values())
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(all_texts)

    resume_vectors = tfidf_matrix[:len(resumes)]
    job_vectors = tfidf_matrix[len(resumes):]

    results = []
    for r_idx, (r_name, _) in enumerate(resumes.items()):
        for j_idx, (j_name, _) in enumerate(jobs.items()):
            score = cosine_similarity(resume_vectors[r_idx], job_vectors[j_idx])[0][0]
            results.append({
                "Resume": r_name,
                "Job Description": j_name,
                "Match Score": round(score * 100, 2)
            })

    return pd.DataFrame(results)

# Run it!
df = match_resumes_to_jobs(resumes, jobs)
print(df)


FileNotFoundError: [Errno 2] No such file or directory: 'resume.txt'

In [None]:
from google.colab import files
files.upload()  # <- select and upload job.txt


Saving job.txt to job.txt


{'job.txt': b'\nPosition: Data Analyst\n\nWe are looking for a detail-oriented Data Analyst to support business decision-making.\n\nResponsibilities:\n- Analyze large datasets to identify trends and insights\n- Create reports and visualizations using tools like Excel, Tableau\n- Collaborate with cross-functional teams to understand data needs\n\nRequirements:\n- Proficiency in Python and SQL\n- Experience with data visualization (Tableau or Power BI)\n- Strong understanding of statistics and data modeling\n- Excellent problem-solving and communication skills\n\nPreferred:\n- Prior internship or project experience in data analysis\n- Familiarity with cloud platforms like AWS or GCP\n'}

In [None]:
# Upload required files
from google.colab import files
uploaded = files.upload()

import string
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Simple stopword list
stop_words = set("""
a about above after again against all am an and any are as at be because been before being below between
both but by can cannot could did do does doing down during each few for from further had has have having he her here
hers herself him himself his how i if in into is it its itself me more most my myself no nor not of off on once
only or other our ours ourselves out over own same she should so some such than that the their theirs them themselves
then there these they this those through to too under until up very was we were what when where which while who whom why
with you your yours yourself yourselves
""".split())

# Text cleaning
def preprocess(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    tokens = [word for word in text.split() if word not in stop_words]
    return " ".join(tokens)

# Load uploaded files
with open("resume.txt", "r") as f:
    resume_text = f.read()

with open("job.txt", "r") as f:
    job_text = f.read()

# Preprocess
resumes = {"resume.txt": preprocess(resume_text)}
jobs = {"job.txt": preprocess(job_text)}

# Matching logic
def match_resumes_to_jobs(resumes, jobs):
    all_texts = list(resumes.values()) + list(jobs.values())
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(all_texts)

    resume_vectors = tfidf_matrix[:len(resumes)]
    job_vectors = tfidf_matrix[len(resumes):]

    results = []
    for r_idx, (r_name, _) in enumerate(resumes.items()):
        for j_idx, (j_name, _) in enumerate(jobs.items()):
            score = cosine_similarity(resume_vectors[r_idx], job_vectors[j_idx])[0][0]
            results.append({
                "Resume": r_name,
                "Job Description": j_name,
                "Match Score": round(score * 100, 2)
            })

    return pd.DataFrame(results)

# Run it!
df = match_resumes_to_jobs(resumes, jobs)
print(df)


Saving resume.txt to resume (1).txt
       Resume Job Description  Match Score
0  resume.txt         job.txt        35.32


In [None]:
# Upload this cell after resume.txt and job.txt are uploaded

import string
import pandas as pd
import gradio as gr
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Stopwords
stop_words = set("""
a about above after again against all am an and any are as at be because been before being below between
both but by can cannot could did do does doing down during each few for from further had has have having he her here
hers herself him himself his how i if in into is it its itself me more most my myself no nor not of off on once
only or other our ours ourselves out over own same she should so some such than that the their theirs them themselves
then there these they this those through to too under until up very was we were what when where which while who whom why
with you your yours yourself yourselves
""".split())

# Preprocessing
def preprocess(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    tokens = [word for word in text.split() if word not in stop_words]
    return " ".join(tokens)

# Matching function
def match_resume(resume_text, job_text):
    resumes = {"Uploaded Resume": preprocess(resume_text)}
    jobs = {"Uploaded Job": preprocess(job_text)}

    all_texts = list(resumes.values()) + list(jobs.values())
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(all_texts)

    resume_vector = tfidf_matrix[0]
    job_vector = tfidf_matrix[1]

    score = cosine_similarity(resume_vector, job_vector)[0][0]
    return f"🔍 Match Score: {round(score * 100, 2)}%"

# Launch Gradio app
gr.Interface(
    fn=match_resume,
    inputs=[
        gr.Textbox(lines=15, label="📄 Paste Your Resume Text"),
        gr.Textbox(lines=10, label="📝 Paste Job Description"),
    ],
    outputs="text",
    title="AI Resume Matcher 💼🤖",
    description="Paste your resume and a job description to see how well they match!"
).launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://9ac9917eec46d4ec36.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


