In [1]:
import pandas as pd  
import numpy as np
import pickle

In [2]:
std = pd.read_csv("Data\\students.csv")
std

Unnamed: 0,Student_ID,Name,Skills,Education,Experience
0,1,Amit Verma,"Python, Machine Learning, Cybersecurity, Web D...",M.Tech,5+ years
1,2,Rahul Sharma,"Python, Cloud Computing, Java, Web Development...",M.Tech,3 years
2,3,Ananya Chopra,"Java, DevOps, C++, Cloud Computing",B.Tech,2 years
3,4,Ananya Verma,"Machine Learning, Data Science, SQL, DevOps, C...",M.Sc,2 years
4,5,Rohan Kulkarni,"Cloud Computing, SQL, Cybersecurity, DevOps",B.Sc,3 years
...,...,...,...,...,...
95,96,Kavya Nair,"Python, Data Science, Web Development, Cyberse...",M.Tech,2 years
96,97,Kavya Mehta,"Machine Learning, Web Development, Cloud Compu...",B.Sc,2 years
97,98,Rohan Bose,"Cybersecurity, DevOps, Web Development, Cloud ...",B.Tech,3 years
98,99,Priya Verma,"Cybersecurity, C++, Java, DevOps, SQL",M.Sc,3 years


In [3]:
jobs = pd.read_csv("Data\\jobs.csv")
jobs

Unnamed: 0,Job_ID,Company,Job_Role,Required_Skills,Experience_Level
0,1,Tesla,Cloud Engineer,"C++, Machine Learning, Cloud Computing",3 years
1,2,Microsoft,Software Engineer,"Cloud Computing, Cybersecurity",Fresher
2,3,Infosys,ML Engineer,"Web Development, C++",2 years
3,4,Oracle,Data Analyst,"Data Science, DevOps, Java",Fresher
4,5,Microsoft,Cloud Engineer,"Cybersecurity, Cloud Computing, SQL",5+ years
5,6,Oracle,Software Engineer,"Cloud Computing, Web Development",5+ years
6,7,Amazon,Cybersecurity Specialist,"Java, DevOps",5+ years
7,8,Infosys,Data Analyst,"Data Science, Python, SQL, Java",2 years
8,9,TCS,ML Engineer,"DevOps, Java, Python",5+ years
9,10,Facebook,Software Engineer,"Machine Learning, Cloud Computing, Java",3 years


In [4]:
applications = pd.read_csv("Data\\applications.csv")
applications

Unnamed: 0,Student_ID,Job_ID,Applied_Company,Job_Role,Application_Status
0,69,7,Amazon,Cybersecurity Specialist,Rejected
1,37,9,TCS,ML Engineer,Rejected
2,72,47,Facebook,Cybersecurity Specialist,Pending
3,60,8,Infosys,Data Analyst,Pending
4,45,21,Amazon,Cloud Engineer,Rejected
...,...,...,...,...,...
195,12,1,Tesla,Cloud Engineer,Pending
196,9,33,Oracle,Cybersecurity Specialist,Accepted
197,70,7,Amazon,Cybersecurity Specialist,Pending
198,34,45,Microsoft,Software Engineer,Accepted


In [5]:
experience_maping = {
                        "Fresher" :  0,
                        "1 Year" :   1,
                        "2 Years" :  2,
                        "3 Years" :  3,
                        "4 Years" :  4,
                        "5+ Years":  5
}

In [6]:
#apply job maping
std["Experience"] = std["Experience"].map(experience_maping)
jobs['Experience_Level'] = jobs["Experience_Level"].map(experience_maping)

#Makes it easier to compare student experience with job requirements

In [7]:
#convert text based skill into numerical formation 
#we use tf-idf vectorization of the skils
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer()
std_skills_matrix = vectorizer.fit_transform(std['Skills'])
job_skills_matrix = vectorizer.fit_transform(jobs['Required_Skills'])

In [8]:
#now create common matrix for vectorizer to use help of cosine function
from sklearn.metrics.pairwise import cosine_similarity
similarity_matrix = cosine_similarity(std_skills_matrix,job_skills_matrix)

In [9]:
def recommend_students(job_id,top_n = 5):
    #top_n = 5 best matched students for the job
    job_idx = jobs[jobs['Job_ID']==job_id].index[0]
    similarity_scores = similarity_matrix[:, job_idx]
    """The similarity_matrix contains cosine similarity scores for every student-job combination.
Extracts the column for the specific job to get similarity scores for all students"""

    # Add Similarity Scores to the Students DataFrame
    std["Match_Score"] = similarity_scores
    #Creates a new column "Match_Score" in students_df.
    #Each student gets their similarity score with the selected job.


    #now it is time to filteration eligable  students 
    eligable_students = std[std['Experience'] >= jobs.loc[job_id,'Experience_Level']]

    #Sort Students by Best Match Score & Return Top N
    return eligable_students.sort_values(by="Match_Score", ascending=False).head(top_n)
    #Sorts eligible students by "Match_Score" in descending order (highest similarity first).
    #Returns top_n best-matched students (default 5).

    



In [10]:
job_id = 3  # Example job ID
recommended_students = recommend_students(job_id)
print(recommended_students[["Name", "Skills", "Experience", "Match_Score"]])


            Name                                             Skills  \
60   Ananya Iyer             Machine Learning, C++, Web Development   
46  Aarav Sharma  Web Development, Cloud Computing, SQL, Machine...   
57     Amit Iyer  Cloud Computing, Machine Learning, Web Develop...   
94    Neha Reddy  Web Development, DevOps, Data Science, Machine...   
20    Neha Patel                   C++, Cybersecurity, DevOps, Java   

    Experience  Match_Score  
60         0.0     0.730698  
46         0.0     0.572807  
57         0.0     0.564606  
94         0.0     0.544497  
20         0.0     0.000000  


In [11]:
with open("model.pkl", "wb") as f:
    pickle.dump((vectorizer, similarity_matrix, std, jobs), f)