In [1]:
import pandas as pd
import numpy as np

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


In [2]:
import pandas as pd
import random

# Define subjects
all_subjects = ['Law', 'Politics', 'English', 'Constitution', 'Current Affairs',
                'History', 'Economics', 'Reasoning', 'Quantitative Aptitude', 
                'Legal Aptitude', 'Sociology', 'International Law']

# Generate mentors
mentors = pd.DataFrame({
    'mentor_id': range(1001, 1021),
    'name': [f'Mentor_{i}' for i in range(1, 21)],
    'subjects': [random.sample(all_subjects, k=random.randint(2, 4)) for _ in range(20)]
})

# Generate aspirants
aspirants = pd.DataFrame({
    'aspirant_id': range(2001, 2101),
    'name': [f'Aspirant_{i}' for i in range(1, 101)],
    'preferred_subjects': [random.sample(all_subjects, k=random.randint(2, 4)) for _ in range(100)]
})


In [3]:
def split_subject_list(df, entity_id, subject_column):
    exploded_df = df[[entity_id, subject_column]].explode(subject_column)
    return exploded_df

def generate_subject_matrix(df, unique_id, subject_col):
    normalized = split_subject_list(df, unique_id, subject_col)
    one_hot = pd.get_dummies(normalized[subject_col])
    combined = pd.concat([normalized[unique_id], one_hot], axis=1)
    subject_profile = combined.groupby(unique_id).sum()
    return subject_profile


In [4]:
# Create subject vectors using the given function
mentor_vectors = generate_subject_matrix(mentors, 'mentor_id', 'subjects')
aspirant_vectors = generate_subject_matrix(aspirants, 'aspirant_id', 'preferred_subjects')


In [5]:
mentor_vectors

Unnamed: 0_level_0,Constitution,Current Affairs,Economics,English,History,International Law,Law,Legal Aptitude,Politics,Quantitative Aptitude,Reasoning,Sociology
mentor_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1001,0,0,0,0,0,1,0,1,1,0,1,0
1002,0,0,1,0,0,0,1,1,0,0,0,1
1003,0,0,0,0,0,0,0,0,1,0,0,1
1004,1,1,0,0,0,1,0,0,0,1,0,0
1005,0,0,0,0,0,0,1,0,1,0,1,1
1006,0,0,0,0,0,0,1,0,0,1,0,0
1007,0,0,1,1,0,0,0,0,0,1,0,1
1008,0,0,0,0,1,0,0,1,0,0,0,0
1009,0,0,1,0,0,0,0,1,0,1,1,0
1010,0,0,1,0,1,0,0,1,0,0,0,0


In [6]:
aspirant_vectors

Unnamed: 0_level_0,Constitution,Current Affairs,Economics,English,History,International Law,Law,Legal Aptitude,Politics,Quantitative Aptitude,Reasoning,Sociology
aspirant_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2001,1,0,0,0,0,0,1,0,0,1,0,1
2002,0,0,1,0,0,0,0,0,0,0,0,1
2003,0,1,0,0,0,1,0,0,1,0,0,1
2004,0,0,1,1,0,1,0,0,0,0,0,0
2005,0,0,0,0,0,0,0,0,0,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...
2096,0,0,0,0,0,0,1,0,0,0,0,1
2097,1,0,0,0,1,0,1,1,0,0,0,0
2098,0,0,0,1,1,0,0,0,0,0,0,0
2099,0,0,0,0,0,0,1,1,0,0,0,0


# Cosine Similarity

In [7]:
from sklearn.metrics.pairwise import cosine_similarity


In [8]:
def recommend_top_mentor(aspirant_matrix, mentor_matrix):
    similarity_scores = cosine_similarity(aspirant_matrix, mentor_matrix)
    best_mentor_idx = np.argmax(similarity_scores, axis=1)
    aspirant_ids = aspirant_matrix.index
    mentor_ids = mentor_matrix.index
    recommendations = pd.DataFrame({
        'aspirant_id': aspirant_ids,
        'recommended_mentor': [mentor_ids[i] for i in best_mentor_idx],
        'similarity_score': [similarity_scores[j, i] for j, i in enumerate(best_mentor_idx)]
    })

    return recommendations


In [9]:
recommendations = recommend_top_mentor(
    aspirant_vectors,
    mentor_vectors
)

recommendations


Unnamed: 0,aspirant_id,recommended_mentor,similarity_score
0,2001,1014,0.866025
1,2002,1002,0.707107
2,2003,1018,0.750000
3,2004,1007,0.577350
4,2005,1007,0.707107
...,...,...,...
95,2096,1014,0.816497
96,2097,1008,0.707107
97,2098,1008,0.500000
98,2099,1012,0.816497


# KNN


In [10]:
from sklearn.neighbors import NearestNeighbors

knn_model = NearestNeighbors(n_neighbors=3, metric='cosine')
knn_model.fit(mentor_vectors)

def knn_recommendation(asp_matrix, men_matrix, k=3):
    distances, indices = knn_model.kneighbors(asp_matrix, n_neighbors=k)
    results = []
    
    for idx, asp_id in enumerate(asp_matrix.index):
        for i in range(k):
            mentor_id = men_matrix.index[indices[idx][i]]
            similarity = 1 - distances[idx][i]
            results.append({
                'aspirant_id': asp_id,
                'mentor_id': mentor_id,
                'similarity': similarity
            })
    
    return pd.DataFrame(results)


In [11]:
knn_results = knn_recommendation(aspirant_vectors, mentor_vectors, k=3)
knn_results



Unnamed: 0,aspirant_id,mentor_id,similarity
0,2001,1014,0.866025
1,2001,1006,0.707107
2,2001,1012,0.577350
3,2002,1002,0.707107
4,2002,1007,0.707107
...,...,...,...
295,2099,1002,0.707107
296,2099,1006,0.500000
297,2100,1006,0.816497
298,2100,1012,0.666667
