In [None]:
# -----------------------------
# Step 1: Install dependencies
# -----------------------------
# !pip install nltk spacy pyresparser scikit-learn pandas
!pip install pyresparser
!apt-get install -y poppler-utils
!python -m nltk.downloader all
!python -m spacy download en_core_web_sm


# Download NLTK and spaCy resources
# !python -m spacy download en_core_web_sm
import nltk
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

In [None]:
# -----------------------------
# Step 2: Resume Parsing
# -----------------------------
from pyresparser import ResumeParser

resume_path = 'My Resume.pdf'

data = ResumeParser(resume_path).get_extracted_data()
print("Extracted Resume Data:")
print(data)

In [None]:
# -----------------------------
# Step 3: Sample Job Dataset
# -----------------------------
import pandas as pd

# Example job descriptions (in real case, load from DB or CSV)
job_data = [
    {
        "job_title": "Data Scientist",
        "description": "Looking for a data scientist skilled in Python, machine learning, and statistics.",
        "skills_required": "Python, Machine Learning, Statistics"
    },
    {
        "job_title": "Software Engineer",
        "description": "Developer needed with strong Java and system design experience.",
        "skills_required": "Java, System Design, OOP"
    },
    {
        "job_title": "ML Engineer",
        "description": "We require an ML Engineer with experience in TensorFlow and deployment.",
        "skills_required": "TensorFlow, Deployment, Docker"
    },
    {
        "job_title": "Data Analyst",
        "description": "Role involves SQL, data visualization, and business intelligence tools.",
        "skills_required": "SQL, Tableau, Excel"
    }
]

df_jobs = pd.DataFrame(job_data)

In [None]:
# -----------------------------
# Step 4: Text Preprocessing
# -----------------------------
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Combine job description and skills into one field
df_jobs['combined_text'] = df_jobs['description'] + ' ' + df_jobs['skills_required']

# Combine user's skills and other fields
user_text = ''
if data.get('skills'):
    user_text += ' '.join(data['skills'])
if data.get('experience'):
    user_text += ' ' + str(data['experience'])

In [None]:
# -----------------------------
# Step 5: Compute Similarities
# -----------------------------
all_texts = [user_text] + list(df_jobs['combined_text'].values)

tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(all_texts)

# Cosine similarity: user (index 0) vs all job descriptions
cosine_sim = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:])[0]

# Add similarity scores to DataFrame
df_jobs['match_score'] = cosine_sim

# Sort by match score
recommended_jobs = df_jobs.sort_values(by='match_score', ascending=False)

In [None]:
# -----------------------------
# Step 6: Show Recommendations
# -----------------------------
print("\nTop Job Recommendations:")
print(recommended_jobs[['job_title', 'match_score']])