In [35]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [36]:
file_path = 'Downloads/CLAT_Mentor_Recommendation_Dataset.xlsx'
aspirants_df = pd.read_excel(file_path, sheet_name='Aspirants')
mentors_df = pd.read_excel(file_path, sheet_name='Mentors')

In [37]:
aspirants_df.head()

Unnamed: 0,AspirantID,Name,Age,City,PreferredSubjects,StudyTimePerDay (hrs),EnglishProficiency (1-10),CurrentPrepStage,Preffered College
0,A0000,Aspirant_0,16,Delhi,"English, Logical Reasoning, Legal Reasoning",3,2,Completed Mock Tests,NLU Delhi
1,A0001,Aspirant_1,20,Delhi,"GK, Legal Reasoning, Maths",2,4,Just Started,NUJS
2,A0002,Aspirant_2,20,Chennai,Maths,3,9,Midway,NALSAR
3,A0003,Aspirant_3,17,Hyderabad,"English, Legal Reasoning, GK",7,7,Midway,NLU Delhi
4,A0004,Aspirant_4,18,Mumbai,English,2,2,Midway,NLSIU


In [38]:
mentors_df.head()

Unnamed: 0,MentorID,Name,Rank,City,StrongSubjects,AvgMentoringHours/week,EnglishProficiency (1-10),CurrentCollege
0,M0000,Mentor_0,249,Chennai,"Legal Reasoning, GK, English",7,10,NLU Jodhpur
1,M0001,Mentor_1,189,Pune,"Legal Reasoning, Logical Reasoning, GK",4,7,NUJS
2,M0002,Mentor_2,460,Mumbai,"Legal Reasoning, Maths",5,9,NLSIU
3,M0003,Mentor_3,357,Mumbai,"Logical Reasoning, Maths",3,9,NUJS
4,M0004,Mentor_4,497,Kolkata,"Logical Reasoning, GK, Maths",9,10,NLSIU


In [39]:
aspirants_df['Profile'] = (
    aspirants_df['PreferredSubjects'] + " " +
    aspirants_df['City'] + " " +
    aspirants_df['EnglishProficiency (1-10)'].astype(str) + " " +
    aspirants_df['Preffered College'].astype(str)
)

mentors_df['Profile'] = (
    mentors_df['StrongSubjects'] + " " +
    mentors_df['City'] + " " +
    mentors_df['EnglishProficiency (1-10)'].astype(str)+ " " +
    mentors_df['CurrentCollege']
)

In [40]:
aspirants_df['Profile']

0      English, Logical Reasoning, Legal Reasoning De...
1                GK, Legal Reasoning, Maths Delhi 4 NUJS
2                                 Maths Chennai 9 NALSAR
3      English, Legal Reasoning, GK Hyderabad 7 NLU D...
4                                 English Mumbai 2 NLSIU
                             ...                        
995                                    GK Delhi 8 NALSAR
996    Logical Reasoning, Legal Reasoning Bhopal 4 NA...
997     Legal Reasoning, GK, English Mumbai 10 NLU Delhi
998      Logical Reasoning, English, Maths Pune 9 NALSAR
999                       Legal Reasoning Bhopal 6 NLSIU
Name: Profile, Length: 1000, dtype: object

In [41]:
vectorizer = CountVectorizer().fit(aspirants_df['Profile'].tolist() + mentors_df['Profile'].tolist())
aspirant_vectors = vectorizer.transform(aspirants_df['Profile'])
mentor_vectors = vectorizer.transform(mentors_df['Profile'])

In [42]:
similarity_matrix = cosine_similarity(aspirant_vectors, mentor_vectors)

In [43]:
recommendations = []

for i, similarities in enumerate(similarity_matrix):
    top_mentors_list = similarities.argsort()[-3:][::-1]  
    top_mentors = mentors_df.iloc[top_mentors_list][['MentorID', 'Name', 'CurrentCollege']]
    aspirant_id = aspirants_df.iloc[i]['AspirantID']
    
    for _, row in top_mentors.iterrows():
        recommendations.append([aspirant_id, row['MentorID'], row['Name'], row['CurrentCollege']])

recommendations_df = pd.DataFrame(recommendations, columns=[
    'AspirantID', 'RecommendedMentorID', 'MentorName', 'MentorCollege'
])

In [44]:
recommendations

[['A0000', 'M0126', 'Mentor_126', 'NLU Delhi'],
 ['A0000', 'M0036', 'Mentor_36', 'NLU Delhi'],
 ['A0000', 'M0129', 'Mentor_129', 'NLU Delhi'],
 ['A0001', 'M0024', 'Mentor_24', 'NUJS'],
 ['A0001', 'M0157', 'Mentor_157', 'NUJS'],
 ['A0001', 'M0018', 'Mentor_18', 'NUJS'],
 ['A0002', 'M0159', 'Mentor_159', 'NALSAR'],
 ['A0002', 'M0181', 'Mentor_181', 'NALSAR'],
 ['A0002', 'M0112', 'Mentor_112', 'NALSAR'],
 ['A0003', 'M0095', 'Mentor_95', 'NLU Delhi'],
 ['A0003', 'M0027', 'Mentor_27', 'NLU Delhi'],
 ['A0003', 'M0109', 'Mentor_109', 'NALSAR'],
 ['A0004', 'M0081', 'Mentor_81', 'NLSIU'],
 ['A0004', 'M0100', 'Mentor_100', 'NLSIU'],
 ['A0004', 'M0051', 'Mentor_51', 'NLSIU'],
 ['A0005', 'M0119', 'Mentor_119', 'NUJS'],
 ['A0005', 'M0122', 'Mentor_122', 'NUJS'],
 ['A0005', 'M0058', 'Mentor_58', 'NUJS'],
 ['A0006', 'M0033', 'Mentor_33', 'NLU Delhi'],
 ['A0006', 'M0066', 'Mentor_66', 'NLU Delhi'],
 ['A0006', 'M0048', 'Mentor_48', 'NLU Delhi'],
 ['A0007', 'M0085', 'Mentor_85', 'NLU Delhi'],
 ['A0007',