### Please note if you have any queries/feedback, please don't hesitate to reach me at - jai-ansh.bindra@polytechnique.edu ###

In [13]:
# Imports
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [14]:
# Load the Excel file
file_path = '/Users/jaianshsinghbindra/Downloads/Mentor Mentee Algorithm./Sample-Data.xlsx'
df = pd.read_excel(file_path)

In [15]:
# Handle missing values by replacing with empty strings
df.fillna('', inplace=True)

In [16]:
# Separate mentors and mentees
mentors = df[df['Email @polytechnique.edu'].str.contains('@polytechnique.edu')].copy()
mentees = df[~df['Email @polytechnique.edu'].str.contains('@polytechnique.edu')].copy()

In [17]:
# Ensure gender information is available (Depends if you want to use the gender as a factor or not.)
if 'Gender' not in mentors.columns or 'Gender' not in mentees.columns:
    raise ValueError("Gender information is missing in the dataset")

# Helper function to combine interests
def combine_interests(row):
    return f"{row['Hobbies (3 choice max)']} {row['What genre of music do you like?']} {row['What would you like to do in Paris']} {row['Regarding the previous question, any plans/ideas in particular?']}"

In [19]:
# Main constructs happen here.
mentors['combined_interests'] = mentors.apply(combine_interests, axis=1)
mentees['combined_interests'] = mentees.apply(combine_interests, axis=1)

vectorizer = CountVectorizer().fit_transform(mentors['combined_interests'].tolist() + mentees['combined_interests'].tolist())
vectors = vectorizer.toarray()

mentor_vectors = vectors[:len(mentors)]
mentee_vectors = vectors[len(mentors):]

similarity_matrix = cosine_similarity(mentor_vectors, mentee_vectors)

In [20]:
# Initialize mentor-mentee allocation
mentor_mentee_allocation = {mentor: [] for mentor in mentors['Full name']}
mentor_capacity = {mentor: 5 for mentor in mentors['Full name']}
mentor_gender_constraints = {mentor: {'Male': 0, 'Female': 0} for mentor in mentors['Full name']}

In [21]:
#Actual allocation occurs here.
for i in range(len(mentees)):
    mentee_gender = mentees.iloc[i]['Gender']
    sorted_indices = np.argsort(similarity_matrix[:, i])[::-1]
    allocated = False
    
    for idx in sorted_indices:
        mentor_name = mentors.iloc[idx]['Full name']
        if len(mentor_mentee_allocation[mentor_name]) < mentor_capacity[mentor_name]:
            if mentor_gender_constraints[mentor_name][mentee_gender] < (3 if mentee_gender == 'Male' else 2):
                mentor_mentee_allocation[mentor_name].append(mentees.iloc[i]['Full name'])
                mentor_gender_constraints[mentor_name][mentee_gender] += 1
                allocated = True
                break
    
    if not allocated:
        # If not allocated based on similarity, allocate to any mentor with available capacity
        for mentor_name in mentor_mentee_allocation.keys():
            if len(mentor_mentee_allocation[mentor_name]) < mentor_capacity[mentor_name]:
                mentor_mentee_allocation[mentor_name].append(mentees.iloc[i]['Full name'])
                mentor_gender_constraints[mentor_name][mentee_gender] += 1
                break

In [22]:
#You display/print the results here.
for mentor, mentees in mentor_mentee_allocation.items():
    print(f"Mentor: {mentor}, Mentees: {mentees}")

Mentor: Alice Smith, Mentees: ['Eve Davis']
Mentor: Bob Johnson, Mentees: ['Frank Moore', 'Jack Taylor']
Mentor: Carol White, Mentees: ['Grace Lee', 'Ivy Martinez']
Mentor: David Brown, Mentees: ['Henry Wilson']


In [24]:
#Here I just did some further very minor adjustments to meet the specifications of my data.
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the Excel file
file_path = '/Users/jaianshsinghbindra/Downloads/Mentor Mentee Algorithm./Sample-Data.xlsx'
df = pd.read_excel(file_path)

# Handle missing values by replacing with empty strings
df.fillna('', inplace=True)

# Separate mentors and mentees
mentors = df[df['Email @polytechnique.edu'].str.contains('@polytechnique.edu')].copy()
mentees = df[~df['Email @polytechnique.edu'].str.contains('@polytechnique.edu')].copy()

# Ensure gender information is available
if 'Gender' not in mentors.columns or 'Gender' not in mentees.columns:
    raise ValueError("Gender information is missing in the dataset")

# Helper function to combine interests
def combine_interests(row):
    return f"{row['Hobbies (3 choice max)']} {row['What genre of music do you like?']} {row['What would you like to do in Paris']} {row['Regarding the previous question, any plans/ideas in particular?']}"

mentors['combined_interests'] = mentors.apply(combine_interests, axis=1)
mentees['combined_interests'] = mentees.apply(combine_interests, axis=1)

vectorizer = CountVectorizer().fit_transform(mentors['combined_interests'].tolist() + mentees['combined_interests'].tolist())
vectors = vectorizer.toarray()

mentor_vectors = vectors[:len(mentors)]
mentee_vectors = vectors[len(mentors):]

similarity_matrix = cosine_similarity(mentor_vectors, mentee_vectors)

# Initialize mentor-mentee allocation
mentor_mentee_allocation = {mentor: [] for mentor in mentors['Full name']}
mentor_capacity = {mentor: 5 for mentor in mentors['Full name']}
mentor_gender_constraints = {mentor: {'Male': 0, 'Female': 0} for mentor in mentors['Full name']}

# Allocate mentees to mentors
for i in range(len(mentees)):
    mentee_gender = mentees.iloc[i]['Gender']
    sorted_indices = np.argsort(similarity_matrix[:, i])[::-1]
    allocated = False
    
    for idx in sorted_indices:
        mentor_name = mentors.iloc[idx]['Full name']
        if len(mentor_mentee_allocation[mentor_name]) < mentor_capacity[mentor_name]:
            if mentor_gender_constraints[mentor_name][mentee_gender] < (3 if mentee_gender == 'Male' else 2):
                mentor_mentee_allocation[mentor_name].append(mentees.iloc[i]['Full name'])
                mentor_gender_constraints[mentor_name][mentee_gender] += 1
                allocated = True
                break
    
    if not allocated:
        # If not allocated based on similarity, allocate to any mentor with available capacity
        for mentor_name in mentor_mentee_allocation.keys():
            if len(mentor_mentee_allocation[mentor_name]) < mentor_capacity[mentor_name]:
                mentor_mentee_allocation[mentor_name].append(mentees.iloc[i]['Full name'])
                mentor_gender_constraints[mentor_name][mentee_gender] += 1
                break

# Display the allocation
for mentor, mentees in mentor_mentee_allocation.items():
    print(f"Mentor: {mentor}, Mentees: {mentees}")


Mentor: Alice Smith, Mentees: ['Eve Davis']
Mentor: Bob Johnson, Mentees: ['Frank Moore', 'Jack Taylor']
Mentor: Carol White, Mentees: ['Grace Lee', 'Ivy Martinez']
Mentor: David Brown, Mentees: ['Henry Wilson']


In [None]:
#Finally convert the allocation you generated to an excel file.
output_file_path = 'Your_file_path_for_output_file_here.xlsx'
allocation_df.to_excel(output_file_path, index=False)