In [3]:
# Step 1: Import Required Libraries
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Step 2: Create Mock Mentor Data

mentors_df = pd.DataFrame({
    'mentor_id': ['M001', 'M002', 'M003', 'M004'],
    'name': ['Samir Shaikh', 'Hossainur Rahaman', 'Agnibha Chowdhury', 'Sumit Thakur'],
    'subjects': ['Legal Aptitude', 'English', 'GK Reasoning', 'Logical Reasoning'],
    'target_college': ['NLSIU', 'NALSAR', 'NUJS', 'NLU Delhi'],
    'prep_level': ['Advanced', 'Intermediate', 'Advanced', 'Beginner'],
    'learning_style': ['Visual', 'Reading', 'Auditory', 'Kinesthetic']
})

print("Mentors DataFrame:")
print(mentors_df.head())

Mentors DataFrame:
  mentor_id               name           subjects target_college  \
0      M001       Samir Shaikh     Legal Aptitude          NLSIU   
1      M002  Hossainur Rahaman            English         NALSAR   
2      M003  Agnibha Chowdhury       GK Reasoning           NUJS   
3      M004       Sumit Thakur  Logical Reasoning      NLU Delhi   

     prep_level learning_style  
0      Advanced         Visual  
1  Intermediate        Reading  
2      Advanced       Auditory  
3      Beginner    Kinesthetic  


In [4]:
# Step 3: Create a Sample Aspirant Profile

aspirant_profile = {
    'preferred_subjects': 'GK Reasoning',
    'target_college': 'NUJS',
    'prep_level': 'Advanced',
    'learning_style': 'Auditory'
}

aspirant_df = pd.DataFrame([aspirant_profile])

print("Aspirant Profile DataFrame:")
print(aspirant_df.head())

Aspirant Profile DataFrame:
  preferred_subjects target_college prep_level learning_style
0       GK Reasoning           NUJS   Advanced       Auditory


In [5]:
# Step 4: Combine Relevant Features for Matching

mentors_df['combined'] = mentors_df['subjects'] + ' ' + mentors_df['target_college'] + ' ' + mentors_df['prep_level'] + ' ' + mentors_df['learning_style']
aspirant_df['combined'] = aspirant_df['preferred_subjects'] + ' ' + aspirant_df['target_college'] + ' ' + aspirant_df['prep_level'] + ' ' + aspirant_df['learning_style']

print("Mentors Combined Features:")
print(mentors_df[['name', 'combined']].head())

print("\nAspirant Combined Features:")
print(aspirant_df[['combined']].head())

Mentors Combined Features:
                name                                          combined
0       Samir Shaikh              Legal Aptitude NLSIU Advanced Visual
1  Hossainur Rahaman               English NALSAR Intermediate Reading
2  Agnibha Chowdhury               GK Reasoning NUJS Advanced Auditory
3       Sumit Thakur  Logical Reasoning NLU Delhi Beginner Kinesthetic

Aspirant Combined Features:
                              combined
0  GK Reasoning NUJS Advanced Auditory


In [6]:
# Step 5: Vectorize Combined Texts

all_profiles = mentors_df['combined'].tolist() + aspirant_df['combined'].tolist()

vectorizer = CountVectorizer()
vector_matrix = vectorizer.fit_transform(all_profiles)

print("Vector Matrix Shape:")
print(vector_matrix.shape)

Vector Matrix Shape:
(5, 18)


In [7]:
# Step 6: Calculate Cosine Similarity Between Aspirant and Mentors

similarity_scores = cosine_similarity(vector_matrix[-1], vector_matrix[:-1])
similarity_scores = similarity_scores.flatten()

mentors_df['similarity_score'] = similarity_scores

print("Mentors with Similarity Scores:")
print(mentors_df[['name', 'similarity_score']].head())

Mentors with Similarity Scores:
                name  similarity_score
0       Samir Shaikh          0.200000
1  Hossainur Rahaman          0.000000
2  Agnibha Chowdhury          1.000000
3       Sumit Thakur          0.182574


In [8]:
# Step 7: Recommend Top 3 Mentors

top_mentors = mentors_df.sort_values(by='similarity_score', ascending=False).head(3)

print("🎯 Top 3 Mentor Recommendations:")
print(top_mentors[['name', 'subjects', 'target_college', 'similarity_score']])

🎯 Top 3 Mentor Recommendations:
                name           subjects target_college  similarity_score
2  Agnibha Chowdhury       GK Reasoning           NUJS          1.000000
0       Samir Shaikh     Legal Aptitude          NLSIU          0.200000
3       Sumit Thakur  Logical Reasoning      NLU Delhi          0.182574


How the System Can Improve Over Time

- Add a feedback column like `user_rating` to track session feedback
- Recalculate final recommendation score as a weighted average of:
    - Content similarity
    - User feedback
- Collect more data over time (e.g., success rates, chat frequency, etc.)
- Gradually incorporate NLP-based or GPT-based profiling