In [2]:
!pip install pymupdf python-docx nltk

import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt_tab')

from google.colab import files
# Upload resume file
uploaded = files.upload()
# Get file path
file_path = list(uploaded.keys())[0]
print(f" File uploaded: {file_path}")

import os
import re
import fitz  # PyMuPDF
from docx import Document
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
# 1. Extract Text
def extract_text_from_file(file_path):
    ext = os.path.splitext(file_path)[1].lower()
    if ext == ".pdf":
        return extract_text_from_pdf(file_path)
    elif ext == ".docx":
        return extract_text_from_docx(file_path)
    else:
        raise ValueError(" Unsupported file format. Please upload PDF or DOCX.")
def extract_text_from_pdf(file_path):
    doc = fitz.open(file_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text
def extract_text_from_docx(file_path):
    doc = Document(file_path)
    return "\n".join([para.text for para in doc.paragraphs])
# 2. Preprocess Text
def preprocess_text(text):
    cleaned_text = re.sub(r'[^a-zA-Z\s]', '', text.lower())
    tokens = word_tokenize(cleaned_text)
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if word not in stop_words]
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(word) for word in filtered_tokens]
    return lemmatized_tokens
# 3. Analyze Resume
def analyze_resume(text):
    required_skills = {
        "python", "sql", "excel", "machine", "learning", "data", "analysis",
        "communication", "tableau", "statistics", "presentation",
        "deep learning", "natural language processing", "computer vision",
        "big data", "cloud computing", "model deployment", "a/b testing",
        "experimental design", "statistical modeling", "bayesian statistics",
        "time series analysis", "reinforcement learning", "neural networks",
        "gradient boosting", " dimensionality reduction", "feature engineering"
    }
    tokens = preprocess_text(text)
    matched_skills = required_skills.intersection(tokens)
    missing_skills = required_skills.difference(tokens)
    print("\n=============================")
    print(" RESUME ANALYSIS REPORT")
    print("=============================")
    print(f" Skills Found ({len(matched_skills)}):")
    print(", ".join(sorted(matched_skills)) if matched_skills else "None")
    print(f"\n Skills Missing ({len(missing_skills)}):")
    print(", ".join(sorted(missing_skills)) if missing_skills else "None")
    if missing_skills:
        print("\n Suggestions to Improve Resume:")
        for skill in sorted(missing_skills):
            print(f" Add or highlight: {skill}")
    else:
        print("\n Your resume covers all required skills. Excellent!")
# 4. Run Analyzer
resume_text = extract_text_from_file(file_path)
analyze_resume(resume_text)



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


Saving Akaramsetty Swapna Resume (1).pdf to Akaramsetty Swapna Resume (1) (1).pdf
 File uploaded: Akaramsetty Swapna Resume (1) (1).pdf

 RESUME ANALYSIS REPORT
 Skills Found (7):
analysis, data, excel, learning, machine, python, sql

 Skills Missing (20):
 dimensionality reduction, a/b testing, bayesian statistics, big data, cloud computing, communication, computer vision, deep learning, experimental design, feature engineering, gradient boosting, model deployment, natural language processing, neural networks, presentation, reinforcement learning, statistical modeling, statistics, tableau, time series analysis

 Suggestions to Improve Resume:
 Add or highlight:  dimensionality reduction
 Add or highlight: a/b testing
 Add or highlight: bayesian statistics
 Add or highlight: big data
 Add or highlight: cloud computing
 Add or highlight: communication
 Add or highlight: computer vision
 Add or highlight: deep learning
 Add or highlight: experimental design
 Add or highlight: feature eng