**Creating Project Directory**

In [1]:
import os

os.makedirs("resume_matcher/data/sample_resumes", exist_ok=True)
os.makedirs("resume_matcher/data/sample_jds", exist_ok=True)
os.makedirs("resume_matcher/assets", exist_ok=True)

print("Project folders created!")


Project folders created!


**Creating a Skills List File**

In [2]:
skills = [
    "Python", "Java", "C++", "SQL", "JavaScript", "React", "Node.js", "Machine Learning",
    "Deep Learning", "NLP", "Flask", "Django", "Git", "HTML", "CSS", "AWS",
    "Docker", "Kubernetes", "Linux", "REST API", "Data Analysis", "Power BI", "Tableau",
    "TensorFlow", "PyTorch", "NumPy", "Pandas", "Spring Boot", "MongoDB", "MySQL"
]

with open("resume_matcher/skills_list.txt", "w") as f:
    for skill in skills:
        f.write(f"{skill}\n")

print("skills_list.txt created!")


skills_list.txt created!


**Extracting Text from Resume PDF**

In [3]:
!pip install pymupdf


Collecting pymupdf
  Downloading pymupdf-1.26.1-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (3.4 kB)
Downloading pymupdf-1.26.1-cp39-abi3-manylinux_2_28_x86_64.whl (24.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.1/24.1 MB[0m [31m80.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pymupdf
Successfully installed pymupdf-1.26.1


In [4]:
import fitz  # PyMuPDF

def extract_resume_text(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text


In [5]:
from google.colab import files

uploaded = files.upload()

# Get the first uploaded file name
resume_pdf = list(uploaded.keys())[0]

# Extract text
resume_text = extract_resume_text(resume_pdf)

# Display part of the resume text
print(resume_text[:1000])  # Just printing first 1000 chars


Saving R Prawin.pdf to R Prawin.pdf
PRAWIN R
Artificial Intelligence and Data Science Student
+917010855405
prawinramesh123@gmail.com
LinkedIn
Chennai, India
OBJECTIVE
Aspiring Data Scientist with a strong foundation in machine learning
and data-driven decision making. Proficient in Java, Python, SQL, and
data visualization techniques, with a keen interest in leveraging data
to solve complex business problems. Seeking an opportunity to apply
analytical skills and innovative problem-solving approaches in a
dynamic environment.
PROJECTS
Sentiment Analysis on IMDB Reviews With LSTM:
• Built a deep learning model to analyze sentiment in IMDB movie
reviews using LSTM networks.
• Preprocessed text data with tokenization, padding, and word
embeddings for improved accuracy.
• Evaluated model performance using precision, recall, and F1-score
metrics.
• Tech Stack: Python, TensorFlow, Keras, NLP, LSTM
AI-Driven PlantDisease Prediction and Management
System:
• Engineered a machine learning pipeli

**Loading Skills List**

In [6]:
# Load skill keywords from file
def load_skills(file_path="resume_matcher/skills_list.txt"):
    with open(file_path, "r") as f:
        skills = [line.strip() for line in f.readlines()]
    return skills

skills_list = load_skills()
print(f"Loaded {len(skills_list)} skills.")


Loaded 30 skills.


In [7]:
def extract_skills_from_text(text, skills_list):
    found_skills = []
    text_lower = text.lower()

    for skill in skills_list:
        if skill.lower() in text_lower:
            found_skills.append(skill)

    return list(set(found_skills))  # Remove duplicates


In [8]:
extracted_skills = extract_skills_from_text(resume_text, skills_list)
print("✅ Extracted Skills from Resume:\n", extracted_skills)


✅ Extracted Skills from Resume:
 ['SQL', 'C++', 'CSS', 'Pandas', 'HTML', 'Deep Learning', 'Machine Learning', 'Java', 'TensorFlow', 'NLP', 'JavaScript', 'Python']


**Getting Job Description**

In [9]:
jd_text = """
We are looking for a Python developer with experience in Django, REST APIs, and SQL databases.
Familiarity with Docker, Git, and AWS is a plus.
"""


**Match Score Calculation**

In [10]:
!pip install scikit-learn




In [11]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def compute_match_score(resume_text, jd_text):
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform([resume_text, jd_text])
    score = cosine_similarity(vectors[0], vectors[1])[0][0]
    return round(score * 100, 2)  # Return as percentage


In [12]:
match_score = compute_match_score(resume_text, jd_text)
print(f"Resume-JD Match Score: {match_score}%")


Resume-JD Match Score: 22.88%


**Extracting Skills from JD**

In [13]:
jd_skills = extract_skills_from_text(jd_text, skills_list)
print("Skills Required in JD:\n", jd_skills)


Skills Required in JD:
 ['AWS', 'SQL', 'Git', 'REST API', 'Docker', 'Python', 'Django']


In [14]:
# Extracted earlier
resume_skills = extract_skills_from_text(resume_text, skills_list)

# Find missing skills
missing_skills = list(set(jd_skills) - set(resume_skills))

print("✅ Skills in Resume:\n", resume_skills)
print("❌ Missing Skills from Resume:\n", missing_skills)


✅ Skills in Resume:
 ['SQL', 'C++', 'CSS', 'Pandas', 'HTML', 'Deep Learning', 'Machine Learning', 'Java', 'TensorFlow', 'NLP', 'JavaScript', 'Python']
❌ Missing Skills from Resume:
 ['AWS', 'Git', 'REST API', 'Docker', 'Django']


**Deploying in Streamlit**

In [15]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.45.1-py3-none-any.whl.metadata (8.9 kB)
Collecting watchdog<7,>=2.1.5 (from streamlit)
  Downloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.9 MB/s[0m eta [36m0:00:00[0m
Collecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl.metadata (4.1 kB)
Downloading streamlit-1.45.1-py3-none-any.whl (9.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.9/9.9 MB[0m [31m39.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m47.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl (79 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hInst

In [16]:
!pip install -q google-generativeai


In [17]:
%%writefile app.py
import streamlit as st
import fitz  # PyMuPDF
import os
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import google.generativeai as gen_ai

# ================= Gemini Setup =================
gen_ai.configure(api_key="AIzaSyBnNd1zTHjkWjnuAxMiy22e61ZsuHQskVY")  # 🔐 Replace with your key
gemini_model = gen_ai.GenerativeModel('gemini-1.5-flash')

# ================= UI Styling =================
st.set_page_config(page_title="Resume Matcher AI", layout="centered")

st.markdown("""
    <style>
        .stButton>button {
            background-color: #FF4B4B;
            color: white;
            font-weight: bold;
        }
        .centered-title {
            text-align: center;
            color: #FF4B4B;
        }
    </style>
""", unsafe_allow_html=True)

st.markdown("""
    <h1 class='centered-title'>📄 Resume Matcher AI</h1>
    <p style='text-align: center; font-size:18px;'>Compare your resume with any job description and find missing skills</p>
""", unsafe_allow_html=True)

with st.sidebar:
    st.markdown("## 🧠 Tips")
    st.write("- Upload a PDF resume")
    st.write("- Paste a job description")
    st.write("- Click **Analyze** to view match results and Gemini suggestions")
    st.markdown("---")
    st.write("🚀 Built with Streamlit + Gemini AI")

# ================= Core Functions =================
def load_skills(file_path="resume_matcher/skills_list.txt"):
    if not os.path.exists(file_path):
        st.error(f"Error: Skills list file not found at {file_path}")
        raise FileNotFoundError(f"Skills list file not found at {file_path}")
    with open(file_path, "r") as f:
        return [line.strip() for line in f.readlines()]

def extract_text_from_pdf(uploaded_file):
    if uploaded_file is None:
        return ""
    doc = fitz.open(stream=uploaded_file.getvalue(), filetype="pdf")
    text = ""
    for page in doc:
        text += page.get_text()
    return text

def extract_skills(text, skills_list):
    found = []
    if not skills_list:
        return []
    for skill in skills_list:
        if skill.lower() in text.lower():
            found.append(skill)
    return list(set(found))

def compute_match_score(resume_text, jd_text):
    if not resume_text.strip() or not jd_text.strip():
        return 0.0
    vectorizer = TfidfVectorizer()
    vectors = vectorizer.fit_transform([resume_text, jd_text])
    score = cosine_similarity(vectors[0], vectors[1])[0][0]
    return round(score * 100, 2)

def get_gemini_suggestions(resume_text, jd_text):
    prompt = f"""
You're an AI Resume Advisor. Based on the resume and job description below, give exactly 3 short, bullet-point suggestions to improve the resume. Be concise (1 line each).

Resume:
{resume_text}

Job Description:
{jd_text}
"""
    try:
        response = gemini_model.generate_content(prompt)
        return response.text.strip()
    except Exception as e:
        return f"Error getting suggestions: {e}"

# ================= Main UI =================
try:
    skills_list = load_skills()
except FileNotFoundError as e:
    st.error(str(e))
    st.stop()

resume_file = st.file_uploader("📄 Upload Resume (PDF)", type=["pdf"])
jd_text = st.text_area("📝 Paste Job Description Here")

if st.button("🔍 Analyze") and resume_file and jd_text.strip():
    with st.spinner("🔄 Processing..."):
        resume_text = extract_text_from_pdf(resume_file)
        resume_skills = extract_skills(resume_text, skills_list)
        jd_skills = extract_skills(jd_text, skills_list)
        match_score = compute_match_score(resume_text, jd_text)
        missing_skills = list(set(jd_skills) - set(resume_skills))
        gemini_tips = get_gemini_suggestions(resume_text, jd_text)

    st.metric(label="🎯 Match Score", value=f"{match_score}%")

    st.subheader("✅ Skills Found in Resume")
    st.markdown(", ".join([f"`{skill}`" for skill in resume_skills]) or "None")

    st.subheader("❌ Missing Skills from Resume")
    st.markdown(", ".join([f"`{skill}`" for skill in missing_skills]) or "None 🎉")

    st.subheader("💡 Gemini AI Suggestions")
    st.markdown(gemini_tips or "No suggestions returned.")

else:
    st.info("📂 Please upload a resume and paste a job description to begin.")


Writing app.py


In [18]:
!pip install streamlit pyngrok PyMuPDF scikit-learn


Collecting pyngrok
  Downloading pyngrok-7.2.11-py3-none-any.whl.metadata (9.4 kB)
Downloading pyngrok-7.2.11-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.11


In [19]:
from pyngrok import ngrok

# Replace with your token
ngrok.set_auth_token("2yayQZlZOMV5jsZtu91wvCIh7E4_2iedRFh7pDRp7JgVpDpp8")



In [20]:
# Run your app
# Changed 'port' to 'addr' to correctly specify the local address/port
public_url = ngrok.connect(addr=8501)
print("🔗 Streamlit app is live at:", public_url)

!streamlit run app.py &>/content/logs.txt &

🔗 Streamlit app is live at: NgrokTunnel: "https://6670-34-136-52-44.ngrok-free.app" -> "http://localhost:8501"
