In [None]:
!pip install python-docx

In [None]:
import os
import pandas as pd
import docx
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('wordnet')


In [None]:
import pandas as pd
import os
from docx import Document


job_data = pd.read_csv("jobs.csv")


def extract_text_from_docx(file_path):
    doc = Document(file_path)
    return " ".join([para.text for para in doc.paragraphs])

resume_folder = "/content/drive/MyDrive/Resumes"

resumes = {}
for resume_file in os.listdir(resume_folder):
    if resume_file.endswith(".docx"):
        resume_text = extract_text_from_docx(os.path.join(resume_folder, resume_file))
        resumes[resume_file] = resume_text

print("Job Data Sample:")
print(job_data.head())
print("\nSample Resumes:")
for name, text in resumes.items():
    print(f"{name}: {text[:100]}...") 

In [None]:
import re
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('punkt_tab') 


def preprocess_text(text):
   
    text = text.lower()
    
    text = re.sub(r'[^a-zA-Z0-9\s]', '', text)
    
    words = word_tokenize(text)
    
    stop_words = set(stopwords.words('english'))
    words = [word for word in words if word not in stop_words]
    return words


job_data['cleaned_skills'] = job_data['Key Skills'].apply(preprocess_text)


for resume_file in resumes.keys():
    resumes[resume_file] = preprocess_text(resumes[resume_file])


print("Preprocessed Job Skills:")
print(job_data[['Job Title', 'cleaned_skills']].head())

print("\nPreprocessed Resume Samples:")
for name, text in resumes.items():
    print(f"{name}: {text[:10]}...")

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


job_skills = job_data['cleaned_skills'].apply(lambda x: ' '.join(x)).tolist()
resume_skills = [' '.join(text) for text in resumes.values()]


vectorizer = TfidfVectorizer()
job_vectors = vectorizer.fit_transform(job_skills)
resume_vectors = vectorizer.transform(resume_skills)


similarity_scores = cosine_similarity(resume_vectors, job_vectors)


for i, resume_name in enumerate(resumes.keys()):
    print(f"\nSimilarity Scores for {resume_name}:")
    for j, job_title in enumerate(job_data['Job Title']):
        print(f"  Job: {job_title} - Score: {similarity_scores[i, j]:.2f}")


In [None]:

def extract_experience(text):
    
    match = re.search(r'(\d+)\s*(years|yrs)', text.lower())
    return int(match.group(1)) if match else 0


job_data['experience_required'] = job_data['Job Experience Required'].apply(extract_experience)


resume_experience = {}
for resume_file, text in resumes.items():
    resume_experience[resume_file] = extract_experience(' '.join(text))


print("\nExtracted Job Experience:")
print(job_data[['Job Title', 'experience_required']].head())

print("\nExtracted Resume Experience:")
for name, exp in resume_experience.items():
    print(f"{name}: {exp} years")


In [None]:

recommendations = {}

for job_index, job in job_data.iterrows():
    job_experience = job['experience_required']
    job_title = job['Job Title']
    scores = []

    for resume_index, resume_name in enumerate(resumes.keys()):
        skill_score = similarity_scores[resume_index, job_index]
        resume_experience_years = resume_experience[resume_name]
        
        experience_score = min(resume_experience_years, job_experience) / max(job_experience, 1)
        total_score = (0.7 * skill_score) + (0.3 * experience_score)  
        scores.append((resume_name, total_score))

    
    recommendations[job_title] = sorted(scores, key=lambda x: x[1], reverse=True)


for job_title, ranked_resumes in recommendations.items():
    print(f"\nJob: {job_title}")
    for resume_name, score in ranked_resumes:
        print(f"  Resume: {resume_name} - Score: {score:.2f}")


In [None]:
import pandas as pd

recommendation_data = []

for job_title, ranked_resumes in recommendations.items():
    for resume_name, score in ranked_resumes:
        recommendation_data.append({
            'Job Title': job_title,
            'Resume Name': resume_name,
            'Match Score': score
        })

recommendation_df = pd.DataFrame(recommendation_data)

recommendation_df.to_csv('job_recommendations.csv', index=False)

print("\nSaved Recommendations Preview:")
print(recommendation_df.head())


In [None]:
import matplotlib.pyplot as plt

def plot_top_recommendations(recommendations, top_n=5):
    for job_title, ranked_resumes in recommendations.items():
        top_resumes = ranked_resumes[:top_n]
        resume_names = [x[0] for x in top_resumes]
        scores = [x[1] for x in top_resumes]

        job_title = job_title.replace('\n', ' ')  
        job_title = job_title.replace('\r', ' ')  

        
        plt.figure(figsize=(8, 5))
        plt.barh(resume_names, scores, color='skyblue')
        plt.xlabel('Match Score')
        plt.title(f'Top {top_n} Recommendations for {job_title}')
        plt.show()


plot_top_recommendations(recommendations, top_n=5)