In [None]:
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
aspirants = pd.DataFrame([
    {"id": i, "preferred_subjects": subs, "target_college": col, "prep_level": lvl, "learning_style": style}
    for i, (subs, col, lvl, style) in enumerate([
        (["Constitutional Law", "Torts"], "NLSIU", "Intermediate", "Visual"),
        (["Criminal Law", "Family Law"], "NLU Delhi", "Beginner", "Auditory"),
        (["Contract Law", "IP Law"], "GNLU", "Advanced", "Reading/Writing"),
        (["Torts", "Criminal Law"], "NLU Jodhpur", "Intermediate", "Kinesthetic"),
        (["Family Law", "Constitutional Law"], "NALSAR", "Beginner", "Visual"),
        (["Environmental Law"], "NLIU Bhopal", "Advanced", "Auditory"),
        (["Criminal Law", "IP Law"], "NLU Odisha", "Intermediate", "Reading/Writing"),
        (["Cyber Law", "Torts"], "RMLNLU", "Beginner", "Visual"),
        (["International Law"], "NUJS", "Advanced", "Kinesthetic"),
        (["Company Law"], "CNLU", "Intermediate", "Auditory"),
        (["Contract Law", "IP Law"], "NLU Assam", "Beginner", "Visual"),
        (["Environmental Law", "Cyber Law"], "NLU Patna", "Advanced", "Reading/Writing"),
        (["Constitutional Law"], "NLU Kochi", "Beginner", "Kinesthetic"),
        (["Criminal Law", "Cyber Law"], "NLU Ranchi", "Intermediate", "Visual"),
        (["Family Law"], "NLU Shimla", "Advanced", "Auditory"),
        (["Contract Law", "International Law"], "NLU Tripura", "Intermediate", "Kinesthetic"),
        (["IP Law"], "NLU Nagpur", "Beginner", "Reading/Writing"),
        (["Torts", "Cyber Law"], "NLU Punjab", "Intermediate", "Auditory"),
        (["Constitutional Law", "International Law"], "NLU Sikkim", "Advanced", "Visual"),
        (["Company Law", "Family Law"], "NLU Meghalaya", "Beginner", "Kinesthetic"),
        (["Environmental Law", "Criminal Law"], "NLU Tamil Nadu", "Intermediate", "Reading/Writing"),
    ])
])

In [None]:
mentors = pd.DataFrame([
    {"id": 100+i, "name": f"Mentor {chr(65+i)}", "expertise_subjects": subs, "college": col,
     "prep_level_mastery": lvl, "teaching_style": style}
    for i, (subs, col, lvl, style) in enumerate([
        (["Constitutional Law", "Criminal Law"], "NLSIU", "Advanced", "Visual"),
        (["Torts", "Contract Law"], "NLU Delhi", "Intermediate", "Auditory"),
        (["IP Law", "Contract Law"], "GNLU", "Advanced", "Reading/Writing"),
        (["Criminal Law", "Cyber Law"], "NLU Jodhpur", "Beginner", "Kinesthetic"),
        (["Environmental Law"], "NALSAR", "Intermediate", "Auditory"),
        (["Family Law", "Torts"], "NLIU Bhopal", "Beginner", "Visual"),
        (["IP Law", "Cyber Law"], "NLU Odisha", "Advanced", "Reading/Writing"),
        (["International Law", "Constitutional Law"], "RMLNLU", "Intermediate", "Kinesthetic"),
        (["Company Law"], "NUJS", "Beginner", "Auditory"),
        (["Contract Law"], "CNLU", "Advanced", "Visual"),
        (["Cyber Law"], "NLU Assam", "Intermediate", "Reading/Writing"),
        (["Criminal Law"], "NLU Patna", "Beginner", "Kinesthetic"),
        (["Family Law"], "NLU Kochi", "Advanced", "Visual"),
        (["Constitutional Law", "Torts"], "NLU Ranchi", "Intermediate", "Auditory"),
        (["IP Law", "International Law"], "NLU Shimla", "Beginner", "Kinesthetic"),
        (["Environmental Law", "Company Law"], "NLU Tripura", "Advanced", "Visual"),
        (["Cyber Law", "Criminal Law"], "NLU Nagpur", "Intermediate", "Auditory"),
        (["Contract Law", "IP Law"], "NLU Punjab", "Beginner", "Visual"),
        (["Family Law", "International Law"], "NLU Sikkim", "Advanced", "Reading/Writing"),
        (["Torts", "Company Law"], "NLU Meghalaya", "Intermediate", "Kinesthetic"),
        (["Environmental Law", "Cyber Law"], "NLU Tamil Nadu", "Beginner", "Visual"),
    ])
])

In [None]:
def preprocess(df, subject_col, college_col, level_col, style_col):
    mlb = MultiLabelBinarizer()
    subject_encoded = pd.DataFrame(mlb.fit_transform(df[subject_col]), columns=mlb.classes_)

    ohe = OneHotEncoder(sparse_output=False)
    college_encoded = pd.DataFrame(ohe.fit_transform(df[[college_col]]), columns=ohe.get_feature_names_out())
    level_encoded = pd.DataFrame(ohe.fit_transform(df[[level_col]]), columns=ohe.get_feature_names_out())
    style_encoded = pd.DataFrame(ohe.fit_transform(df[[style_col]]), columns=ohe.get_feature_names_out())

    final_df = pd.concat([subject_encoded, college_encoded, level_encoded, style_encoded], axis=1)
    return final_df

aspirant_features = preprocess(aspirants, "preferred_subjects", "target_college", "prep_level", "learning_style")
mentor_features = preprocess(mentors, "expertise_subjects", "college", "prep_level_mastery", "teaching_style")


In [None]:

similarities = cosine_similarity(aspirant_features.values, mentor_features.values)
top_indices = similarities[0].argsort()[::-1][:3]

recommended_mentors = mentors.iloc[top_indices][['id', 'name']]
print("🔗 Recommended Mentors:")
print(recommended_mentors)

🔗 Recommended Mentors:
     id      name
13  113  Mentor N
0   100  Mentor A
7   107  Mentor H


In [None]:
import pickle

# Save everything needed
with open("recommender_model.pkl", "wb") as f:
    pickle.dump({
        "aspirants": aspirants,
        "mentors": mentors,
        "aspirant_features": aspirant_features,
        "mentor_features": mentor_features
    }, f)
