# 🧠 Experiment 10: Mini Project Based on NLP Applications

## ✨ Project Title: Resume Matcher – AI-Powered Resume Analysis Tool

This mini project showcases how **Natural Language Processing (NLP)** can be applied in a real-world scenario: **matching a resume with a job description**.

### 🔍 Objectives:
- Extract text from a resume (PDF format)
- Preprocess text data using NLP techniques (tokenization, lemmatization, stopword removal)
- Compute **cosine similarity** between resume and job description to find match percentage
- Identify top keywords from both texts
- Visualize keyword comparison using **Plotly**
- Provide suggestions for resume improvement based on missing keywords

This tool assists job applicants in evaluating how well their resume aligns with a specific job description.

In [ ]:
# Install dependencies (if required)
# !pip install streamlit PyPDF2 plotly nltk

In [ ]:
import streamlit as st
import PyPDF2
import re
import io
import plotly.graph_objects as go
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from collections import Counter
import math

### 📦 Download Required NLTK Resources

### 📚 Why NLTK?
**Natural Language Toolkit (NLTK)** is a leading platform for building Python programs to work with human language data. We use it here for:
- **Tokenization**: Splitting text into individual words
- **Stopword Removal**: Filtering out common words like "and", "the", etc.
- **Lemmatization**: Reducing words to their base/root form (e.g., 'running' → 'run')

In [ ]:
def download_nltk_data():
    try:
        nltk.data.find('tokenizers/punkt')
        nltk.data.find('corpora/stopwords')
        nltk.data.find('corpora/wordnet')
    except LookupError:
        nltk.download('punkt')
        nltk.download('stopwords')
        nltk.download('wordnet')

download_nltk_data()

def process_text(text):
    # Load standard English stop words
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    # Tokenize text and convert to lowercase
    tokens = word_tokenize(text.lower())
    # Keep only alphanumeric words and lemmatize them
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalnum()]
    # Remove stopwords from token list
    tokens = [token for token in tokens if token not in stop_words]
    return tokens

In [ ]:
def process_text(text):
    stop_words = set(stopwords.words('english'))
    lemmatizer = WordNetLemmatizer()
    tokens = word_tokenize(text.lower())
    tokens = [lemmatizer.lemmatize(token) for token in tokens if token.isalnum()]
    tokens = [token for token in tokens if token not in stop_words]
    return tokens

### 📥 Extract Text from Resume PDF

In [ ]:
def extract_text_from_pdf(pdf_file):
    try:
        with io.BytesIO(pdf_file.read()) as pdf_stream:
            pdf_reader = PyPDF2.PdfReader(pdf_stream)
            text = ""
            for page in pdf_reader.pages:
                page_text = page.extract_text()
                if page_text:
                    text += " " + page_text
            text = re.sub(r'\s+', ' ', text)
            text = re.sub(r'[^\w\s@.,-]', '', text)
            return text.lower().strip()
    except Exception as e:
        return None

### 🔍 Compare Resume with Job Description

In [ ]:
def compare_resume_with_job(resume_text, job_description):
    resume_tokens = process_text(resume_text)
    job_tokens = process_text(job_description)
    resume_freq = Counter(resume_tokens)
    job_freq = Counter(job_tokens)
    all_terms = set(resume_tokens + job_tokens)
    dot_product = sum(resume_freq[term] * job_freq[term] for term in all_terms)
    resume_magnitude = math.sqrt(sum(freq * freq for freq in resume_freq.values()))
    job_magnitude = math.sqrt(sum(freq * freq for freq in job_freq.values()))
    similarity = dot_product / (resume_magnitude * job_magnitude) if resume_magnitude and job_magnitude else 0
    return {
        'match_percentage': similarity * 100,
        'resume_keywords': resume_freq.most_common(10),
        'job_keywords': job_freq.most_common(10)
    }

### 📊 Create Keyword Visualization Using Plotly

In [ ]:
def create_keyword_visualization(results):
    all_words = list(set([w for w, _ in results['resume_keywords']] + [w for w, _ in results['job_keywords']]))
    resume_dict = dict(results['resume_keywords'])
    job_dict = dict(results['job_keywords'])
    max_score = max([score for _, score in results['resume_keywords'] + results['job_keywords']] + [1])
    resume_scores = [resume_dict.get(word, 0)/max_score * 100 for word in all_words]
    job_scores = [job_dict.get(word, 0)/max_score * 100 for word in all_words]
    fig = go.Figure()
    fig.add_trace(go.Bar(x=resume_scores, y=all_words, name='Resume', orientation='h'))
    fig.add_trace(go.Bar(x=job_scores, y=all_words, name='Job Description', orientation='h'))
    fig.update_layout(title='Keyword Relevance Comparison', xaxis_title='Relevance Score (%)', yaxis_title='Keywords', barmode='group')
    return fig

### 🖥️ Main Application Logic

In [ ]:
def main():
    st.title("📄 Resume Matcher – NLP Based Analysis")
    job_description = st.text_area("Paste Job Description here 👇", height=200)
    uploaded_file = st.file_uploader("Upload Resume (PDF only)", type=["pdf"])
    if uploaded_file and job_description:
        resume_text = extract_text_from_pdf(uploaded_file)
        if resume_text:
            results = compare_resume_with_job(resume_text, job_description)
            st.metric("Match Score (%)", f"{results['match_percentage']:.2f}")
            st.subheader("🔑 Top Keywords Comparison")
            st.plotly_chart(create_keyword_visualization(results), use_container_width=True)
            missing_keywords = set([word for word, _ in results['job_keywords']]) - set([word for word, _ in results['resume_keywords']])
            if missing_keywords:
                st.warning("Consider including these keywords in your resume for better match:")
                st.markdown(", ".join(missing_keywords))
            else:
                st.success("Your resume aligns well with the job description!")
        else:
            st.error("Failed to extract text from PDF.")

In [ ]:
if __name__ == '__main__':
    main()

### ✅ Conclusion

This mini project integrates several concepts of **Natural Language Processing** and **Data Visualization** to build an interactive, useful application. It demonstrates how text data from resumes and job descriptions can be compared to:
- Identify alignment between a candidate's skills and job requirements
- Provide actionable feedback for improving resumes
- Use vector space modeling (TF & cosine similarity) to assess text similarity

**Future Enhancements:**
- Support for DOCX format in addition to PDFs
- Keyword highlighting directly on resume text
- Integration with job portals or resume builders

This project is a practical demonstration of the power of NLP in job matching scenarios.