In [18]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the Excel file
file_path = '/Users/jaianshsinghbindra/Downloads/Mentor Mentee Algorithm./Sample-Data.xlsx'
df = pd.read_excel(file_path)

# Handle missing values by replacing with empty strings
df.fillna('', inplace=True)

# Separate mentors and mentees
mentors = df[df['Email @polytechnique.edu'].str.contains('@polytechnique.edu')].copy()
mentees = df[~df['Email @polytechnique.edu'].str.contains('@polytechnique.edu')].copy()

# Ensure gender information is available
if 'Gender' not in mentors.columns or 'Gender' not in mentees.columns:
    raise ValueError("Gender information is missing in the dataset")

# Helper function to combine interests
def combine_interests(row):
    return f"{row['Hobbies (3 choice max)']} {row['What genre of music do you like?']} {row['What would you like to do in Paris']} {row['Regarding the previous question, any plans/ideas in particular?']}"

mentors['combined_interests'] = mentors.apply(combine_interests, axis=1)
mentees['combined_interests'] = mentees.apply(combine_interests, axis=1)

vectorizer = CountVectorizer().fit_transform(mentors['combined_interests'].tolist() + mentees['combined_interests'].tolist())
vectors = vectorizer.toarray()

mentor_vectors = vectors[:len(mentors)]
mentee_vectors = vectors[len(mentees):]

similarity_matrix = cosine_similarity(mentor_vectors, mentee_vectors)

# Initialize mentor-mentee allocation
mentor_mentee_allocation = {mentor: [] for mentor in mentors['Full name']}
mentor_capacity = {mentor: 5 for mentor in mentors['Full name']}
mentor_gender_constraints = {mentor: {'Male': 0, 'Female': 0} for mentor in mentors['Full name']}

# Calculate the number of mentees each mentor should have
mentees_per_mentor = len(mentees) // len(mentors)
extra_mentees = len(mentees) % len(mentors)

# Sort mentees by similarity scores to mentors
sorted_mentees = np.argsort(-similarity_matrix, axis=1)

# Flatten the sorted mentees indices and remove duplicates
sorted_mentees = np.unique(sorted_mentees.flatten(), return_index=True)[1]

# Allocate mentees to mentors
allocated_mentees = set()
mentor_indices = list(range(len(mentors)))

for i in range(len(mentees)):
    mentee_idx = sorted_mentees[i % len(sorted_mentees)]
    if mentee_idx in allocated_mentees:
        continue
    
    mentee_gender = mentees.iloc[mentee_idx]['Gender']
    mentor_index = i % len(mentor_indices)
    
    allocated = False
    while not allocated and mentor_index < len(mentor_indices):
        mentor_name = mentors.iloc[mentor_indices[mentor_index]]['Full name']
        
        if len(mentor_mentee_allocation[mentor_name]) < mentor_capacity[mentor_name]:
            if mentor_gender_constraints[mentor_name][mentee_gender] < (3 if mentee_gender == 'Male' else 2):
                mentor_mentee_allocation[mentor_name].append(mentees.iloc[mentee_idx]['Full name'])
                mentor_gender_constraints[mentor_name][mentee_gender] += 1
                allocated_mentees.add(mentee_idx)
                allocated = True
        
        mentor_index += 1

    if mentor_index >= len(mentor_indices):
        mentor_index = 0

# Ensure balanced distribution
for mentor_name in mentor_mentee_allocation.keys():
    while len(mentor_mentee_allocation[mentor_name]) < mentees_per_mentor:
        for mentee_idx in range(len(mentees)):
            if mentee_idx not in allocated_mentees:
                mentor_mentee_allocation[mentor_name].append(mentees.iloc[mentee_idx]['Full name'])
                allocated_mentees.add(mentee_idx)
                break

for mentor_name in mentor_mentee_allocation.keys():
    if extra_mentees > 0 and len(mentor_mentee_allocation[mentor_name]) < mentees_per_mentor + 1:
        for mentee_idx in range(len(mentees)):
            if mentee_idx not in allocated_mentees:
                mentor_mentee_allocation[mentor_name].append(mentees.iloc[mentee_idx]['Full name'])
                allocated_mentees.add(mentee_idx)
                extra_mentees -= 1
                break

# Display the allocation
for mentor, mentees_list in mentor_mentee_allocation.items():
    print(f"Mentor: {mentor}, Mentees: {mentees_list}")


Mentor: Alice Smith, Mentees: ['Eve Davis', 'Ivy Martinez']
Mentor: Bob Johnson, Mentees: ['Henry Wilson', 'Jack Taylor']
Mentor: Carol White, Mentees: ['Grace Lee']
Mentor: David Brown, Mentees: ['Frank Moore']


In [19]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the Excel file
file_path = '/Users/jaianshsinghbindra/Downloads/Mentor Mentee Algorithm./Sample-Data.xlsx'
df = pd.read_excel(file_path)

# Handle missing values by replacing with empty strings
df.fillna('', inplace=True)

# Separate mentors and mentees
mentors = df[df['Email @polytechnique.edu'].str.contains('@polytechnique.edu')].copy()
mentees = df[~df['Email @polytechnique.edu'].str.contains('@polytechnique.edu')].copy()

# Ensure gender information is available
if 'Gender' not in mentors.columns or 'Gender' not in mentees.columns:
    raise ValueError("Gender information is missing in the dataset")

# Helper function to combine interests
def combine_interests(row):
    return f"{row['Hobbies (3 choice max)']} {row['What genre of music do you like?']} {row['What would you like to do in Paris']} {row['Regarding the previous question, any plans/ideas in particular?']}"

mentors['combined_interests'] = mentors.apply(combine_interests, axis=1)
mentees['combined_interests'] = mentees.apply(combine_interests, axis=1)

vectorizer = CountVectorizer().fit_transform(mentors['combined_interests'].tolist() + mentees['combined_interests'].tolist())
vectors = vectorizer.toarray()

mentor_vectors = vectors[:len(mentors)]
mentee_vectors = vectors[len(mentees):]

similarity_matrix = cosine_similarity(mentor_vectors, mentee_vectors)

# Initialize mentor-mentee allocation
mentor_mentee_allocation = {mentor: [] for mentor in mentors['Full name']}
mentor_capacity = {mentor: 5 for mentor in mentors['Full name']}
mentor_gender_constraints = {mentor: {'Male': 0, 'Female': 0} for mentor in mentors['Full name']}

# Calculate the number of mentees each mentor should have
mentees_per_mentor = len(mentees) // len(mentors)
extra_mentees = len(mentees) % len(mentors)

# Sort mentees by similarity scores to mentors
sorted_mentees = np.argsort(-similarity_matrix, axis=1)

# Flatten the sorted mentees indices and remove duplicates
sorted_mentees = np.unique(sorted_mentees.flatten(), return_index=True)[1]

# Allocate mentees to mentors
allocated_mentees = set()
mentor_indices = list(range(len(mentors)))

for i in range(len(mentees)):
    mentee_idx = sorted_mentees[i % len(sorted_mentees)]
    if mentee_idx in allocated_mentees:
        continue
    
    mentee_gender = mentees.iloc[mentee_idx]['Gender']
    mentor_index = i % len(mentor_indices)
    
    allocated = False
    while not allocated and mentor_index < len(mentor_indices):
        mentor_name = mentors.iloc[mentor_indices[mentor_index]]['Full name']
        
        if len(mentor_mentee_allocation[mentor_name]) < mentor_capacity[mentor_name]:
            if mentor_gender_constraints[mentor_name][mentee_gender] < (3 if mentee_gender == 'Male' else 2):
                mentor_mentee_allocation[mentor_name].append(mentees.iloc[mentee_idx]['Full name'])
                mentor_gender_constraints[mentor_name][mentee_gender] += 1
                allocated_mentees.add(mentee_idx)
                allocated = True
        
        mentor_index += 1

    if mentor_index >= len(mentor_indices):
        mentor_index = 0

# Ensure balanced distribution
for mentor_name in mentor_mentee_allocation.keys():
    while len(mentor_mentee_allocation[mentor_name]) < mentees_per_mentor:
        for mentee_idx in range(len(mentees)):
            if mentee_idx not in allocated_mentees:
                mentor_mentee_allocation[mentor_name].append(mentees.iloc[mentee_idx]['Full name'])
                allocated_mentees.add(mentee_idx)
                break

for mentor_name in mentor_mentee_allocation.keys():
    if extra_mentees > 0 and len(mentor_mentee_allocation[mentor_name]) < mentees_per_mentor + 1:
        for mentee_idx in range(len(mentees)):
            if mentee_idx not in allocated_mentees:
                mentor_mentee_allocation[mentor_name].append(mentees.iloc[mentee_idx]['Full name'])
                allocated_mentees.add(mentee_idx)
                extra_mentees -= 1
                break

# Display the allocation
for mentor, mentees_list in mentor_mentee_allocation.items():
    print(f"Mentor: {mentor}, Mentees: {mentees_list}")


Mentor: Alice Smith, Mentees: ['Eve Davis', 'Ivy Martinez']
Mentor: Bob Johnson, Mentees: ['Henry Wilson', 'Jack Taylor']
Mentor: Carol White, Mentees: ['Grace Lee']
Mentor: David Brown, Mentees: ['Frank Moore']


In [20]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Load the Excel file
file_path = '/Users/jaianshsinghbindra/Downloads/Mentor Mentee Algorithm./Sample-Data.xlsx'
df = pd.read_excel(file_path)

# Handle missing values by replacing with empty strings
df.fillna('', inplace=True)

# Separate mentors and mentees
mentors = df[df['Email @polytechnique.edu'].str.contains('@polytechnique.edu')].copy()
mentees = df[~df['Email @polytechnique.edu'].str.contains('@polytechnique.edu')].copy()

# Ensure gender information is available
if 'Gender' not in mentors.columns or 'Gender' not in mentees.columns:
    raise ValueError("Gender information is missing in the dataset")

# Helper function to combine interests
def combine_interests(row):
    return f"{row['Hobbies (3 choice max)']} {row['What genre of music do you like?']} {row['What would you like to do in Paris']} {row['Regarding the previous question, any plans/ideas in particular?']}"

mentors['combined_interests'] = mentors.apply(combine_interests, axis=1)
mentees['combined_interests'] = mentees.apply(combine_interests, axis=1)

vectorizer = CountVectorizer().fit_transform(mentors['combined_interests'].tolist() + mentees['combined_interests'].tolist())
vectors = vectorizer.toarray()

mentor_vectors = vectors[:len(mentors)]
mentee_vectors = vectors[len(mentees):]

similarity_matrix = cosine_similarity(mentor_vectors, mentee_vectors)

# Initialize mentor-mentee allocation
mentor_mentee_allocation = {mentor: [] for mentor in mentors['Full name']}
mentor_capacity = {mentor: 5 for mentor in mentors['Full name']}
mentor_gender_constraints = {mentor: {'Male': 0, 'Female': 0} for mentor in mentors['Full name']}

# Calculate the number of mentees each mentor should have
mentees_per_mentor = len(mentees) // len(mentors)
extra_mentees = len(mentees) % len(mentors)

# Sort mentees by similarity scores to mentors
sorted_mentees = np.argsort(-similarity_matrix, axis=1)

# Flatten the sorted mentees indices and remove duplicates
sorted_mentees = np.unique(sorted_mentees.flatten(), return_index=True)[1]

# Allocate mentees to mentors
allocated_mentees = set()
mentor_indices = list(range(len(mentors)))

for i in range(len(mentees)):
    mentee_idx = sorted_mentees[i % len(sorted_mentees)]
    if mentee_idx in allocated_mentees:
        continue
    
    mentee_gender = mentees.iloc[mentee_idx]['Gender']
    mentor_index = i % len(mentor_indices)
    
    allocated = False
    while not allocated and mentor_index < len(mentor_indices):
        mentor_name = mentors.iloc[mentor_indices[mentor_index]]['Full name']
        
        if len(mentor_mentee_allocation[mentor_name]) < mentor_capacity[mentor_name]:
            if mentor_gender_constraints[mentor_name][mentee_gender] < (3 if mentee_gender == 'Male' else 2):
                mentor_mentee_allocation[mentor_name].append(mentees.iloc[mentee_idx]['Full name'])
                mentor_gender_constraints[mentor_name][mentee_gender] += 1
                allocated_mentees.add(mentee_idx)
                allocated = True
        
        mentor_index += 1

    if mentor_index >= len(mentor_indices):
        mentor_index = 0

# Ensure balanced distribution
for mentor_name in mentor_mentee_allocation.keys():
    while len(mentor_mentee_allocation[mentor_name]) < mentees_per_mentor:
        for mentee_idx in range(len(mentees)):
            if mentee_idx not in allocated_mentees:
                mentor_mentee_allocation[mentor_name].append(mentees.iloc[mentee_idx]['Full name'])
                allocated_mentees.add(mentee_idx)
                break

for mentor_name in mentor_mentee_allocation.keys():
    if extra_mentees > 0 and len(mentor_mentee_allocation[mentor_name]) < mentees_per_mentor + 1:
        for mentee_idx in range(len(mentees)):
            if mentee_idx not in allocated_mentees:
                mentor_mentee_allocation[mentor_name].append(mentees.iloc[mentee_idx]['Full name'])
                allocated_mentees.add(mentee_idx)
                extra_mentees -= 1
                break

# Create a DataFrame for the allocation results
allocation_list = []

for mentor, mentees_list in mentor_mentee_allocation.items():
    for mentee in mentees_list:
        allocation_list.append({'Mentor': mentor, 'Mentee': mentee})

allocation_df = pd.DataFrame(allocation_list)

# Save the allocation to an Excel file
output_file_path = '/Users/jaianshsinghbindra/Downloads/Mentor Mentee Algorithm./Mentor_Mentee_Allocation.xlsx'
allocation_df.to_excel(output_file_path, index=False)

# Display the allocation
allocation_df.head()


Unnamed: 0,Mentor,Mentee
0,Alice Smith,Eve Davis
1,Alice Smith,Ivy Martinez
2,Bob Johnson,Henry Wilson
3,Bob Johnson,Jack Taylor
4,Carol White,Grace Lee
