In [1]:
import pandas as pd
import model2
import openpyxl

In [2]:
ls

Choice vectors.ipynb  data-pairings.csv     model.py
[34m__pycache__[m[m/          data-students.csv     model2.py
data-mentors.csv      main.ipynb            model2.py.ipynb


In [3]:
# Read data

data_mentor = pd.read_csv('data-mentors.csv', encoding='utf-8')
data_student = pd.read_csv('data-students.csv', encoding='utf-8')
data_pairing = pd.read_csv('data-pairings.csv', encoding = 'utf-8')

In [4]:
# Remove the questions in the column names

for i in range(5,35):
    data_student.rename(columns = {data_student.columns[i]: data_student.columns[i][80:-1]},inplace=True)
    data_mentor.rename(columns = {data_mentor.columns[i]: data_mentor.columns[i][66:-1]},inplace=True)

Required sets for the model

1. List of all universities
2. List of subjects
3. A dictionary for universites - key: university name, value: list of students from that university
4. Two dictionaries for interest areas (one for students, one for mentors) - key: student name, value: a list of his/her ordered choices
5. A dictionary for the score of assigning mentee i to mentor j - key: pairs (student, mentee), value: an integer score of matching a mentor with a mentee


Note: If a mentee (or mentor) has only one interest, say only choice 1, then leave his/her choices 2 and 3 empty, and assign a score to him/her by using only choice 1. Since some mentors have a fourth choice, all mentees will get zero for their fourth choices.

In [5]:
# The required set (1)

student_uni = data_student['University'].unique()    

# The required set (2)

subjects = [col for col in data_student.columns[5:35]]  

# The required set (3)

# data_student['University'].value_counts()

uni_students = data_student.groupby('University')['Name'].apply(list).to_dict()

In [6]:
# The required set (4)

choice_mentee = {}  
choice_mentor = {}
wrong_mentee=[]
wrong_mentor = []

for index,row in data_student.iterrows():
    choice_mentee[row['Name']] = {'First Choice':0, 'Second Choice':0, 'Third Choice':0}
    for subject in subjects:
        if pd.isnull(row[subject]) == False:
            interest_list = str(row[subject]).split(',')
          #  print(interest_list)

            for word in interest_list:
                if word in ['First Choice', 'Second Choice', 'Third Choice']:
                    choice_mentee[row['Name']][word] = subject
                else:   
                    wrong_mentee.append(word)
                   # print(row)


for index,row in data_mentor.iterrows():
    choice_mentor[row['Name']] = {'First Choice':0, 'Second Choice':0, 'Third Choice':0, 'Fourth Choice':0}
    for subject in subjects:
        if pd.isnull(row[subject]) == False:
            interest_list = str(row[subject]).split(',')

            for word in interest_list:
                if word in ['First Choice', 'Second Choice', 'Third Choice', 'Fourth Choice']:
                    choice_mentor[row['Name']][word] = subject
                else:   
                    wrong_mentor.append(word)
                   # print(row)
                    

In [7]:
# The required set (5)

weights = {'First Choice':8, 'Second Choice':6, 'Third Choice':4, 'Fourth Choice':2}

# Define score function

def score(student, mentor):
    score = 0
    for choice1 in choice_mentee[student].keys():
            for choice2 in choice_mentor[mentor].keys():
                if choice_mentee[student][choice1] == choice_mentor[mentor][choice2]:
                    score += weights[choice1]*weights[choice2]
    return score

# Define the scores dictionary

scores = {(mentee, mentor):0 for mentor in choice_mentor.keys() for mentee in choice_mentee.keys()}

for student in choice_mentee.keys():
    for mentor in choice_mentor.keys():
        scores[(student,mentor)] = score(student,mentor)

In [8]:
max_student = 1
uni_capacity = 20
time = 300

obj_value, solution, df = model2.matching(choice_mentee, choice_mentor, uni_students, scores, max_student, uni_capacity, time)
print(obj_value)
df

4004.0


Unnamed: 0,Mentor,Student,Score
0,Mentor1,Student18,88
1,Mentor2,Student81,64
2,Mentor3,Student5,48
3,Mentor4,Student84,68
4,Mentor5,Student39,112
5,Mentor6,Student9,96
6,Mentor7,Student36,88
7,Mentor8,Student90,64
8,Mentor9,Student50,104
9,Mentor10,Student56,100


In [9]:
# Score of the 2022 pairing

score_2022 = 0

for index, row in data_pairing.iterrows():
    score_2022 += score(row['Students'],row['Mentors'])
    
score_2022

2564