In [1]:
from src.data_loader import load_resumes,load_job_descriptions
from src.preprocessing import preprocess_text
from src.feature_engineering import ResumeMatcher
from src.llm_utils import get_resume_feedback
import pandas as pd

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\itzku\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\itzku\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
resumes_raw = load_resumes('data/resumes')
job_descriptions_raw = load_job_descriptions('data/job_descriptions')
print(f"Loaded {len(resumes_raw)} resumes and {len(job_descriptions_raw)} job descriptions.")

Loaded 5 resumes and 1 job descriptions.


In [3]:
import nltk

# Download to a known folder
nltk.download('punkt', download_dir='./nltk_data')
nltk.download('stopwords', download_dir='./nltk_data')

# Set the NLTK data path
nltk.data.path.append('./nltk_data')


[nltk_data] Downloading package punkt to ./nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to ./nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [4]:
import spacy
nlp = spacy.load("en_core_web_sm")  # Run: python -m spacy download en_core_web_sm

def preprocess_text(text):
    doc = nlp(text.lower())
    tokens = [token.text for token in doc if token.is_alpha and not token.is_stop]
    return " ".join(tokens)


In [5]:
resumes_cleaned = [preprocess_text(text) for text in resumes_raw.values()]
jds_cleaned = [preprocess_text(text) for text in job_descriptions_raw.values()]

print(resumes_cleaned[0][:500])  

abhilash b r portfolio website data enthusiast seeking opportunities field data analytics drive strategic business decisions impactful insights data driven decision making project experience business insights data analyst challenge sql data exploration power bi dashboard analyzed sales data hardware manufacturing company generated insights related finance sales marketing supply chain analytics bulk loading data establishing relationships snowflake schema format tables erd mysql data ready analys


In [6]:
matcher = ResumeMatcher()
matcher.fit(jds_cleaned)
similarity_scores = matcher.match(resumes_cleaned)

# Show similarity matrix
df = pd.DataFrame(
    similarity_scores,
    index=resumes_raw.keys(),
    columns=job_descriptions_raw.keys()
)
df.style.background_gradient(cmap='Greens')

Unnamed: 0,jd_data_analyst.txt
data_analyst_resume.pdf,0.5904
intern_resume.txt,0.501745
marketing_resume.docx,0.490454
project_manager_resume.docx,0.391293
software_engineer_resume.pdf,0.404415


In [7]:
resume_sample_text = list(resumes_raw.values())[0]
job_sample_text = list(job_descriptions_raw.values())[0]

feedback = get_resume_feedback(resume_sample_text, job_sample_text)
print("💬 LLM Feedback:\n")
print(feedback)

💬 LLM Feedback:

The resume presents a well-structured and comprehensive showcase of the candidate's skills and experiences relevant to the Data Analyst position. Here is a detailed analysis:

Strengths:
1. The candidate has demonstrated hands-on experience with querying large datasets using SQL and building dashboards with Power BI, which aligns well with the job responsibilities.
2. The projects mentioned in the resume showcase the candidate's ability to work with data from various industries, including eCommerce (Maven Analytics), hospitality (Codebasics.io), and a hardware manufacturing company, presenting valuable insights for stakeholders.
3. Strong technical skills, such as using Python, pandas, NumPy, SQL, Matplotlib, Seaborn, MySQL, and Power Query are highlighted in the resume, making the candidate a potential fit for the job description requirements.
4. The candidate's proficiency in working collaboratively with cross-functional teams, communicating insights effectively to b