In [306]:
import time
import pandas as pd
import numpy as np

start = time.time()

tutors = pd.read_csv("tutors.csv")
students = pd.read_csv("students.csv")

In [307]:
# filter out irrelevant columns
days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday']
tutors = tutors[['Name', 'Which instrument(s) would you like to teach?'] + days]
tutors = tutors.rename(columns={'Which instrument(s) would you like to teach?': 'Instrument'})

In [308]:
# remove/replace invalid data entries
tutors = tutors[tutors['Instrument'].notna()]
tutors['Instrument'] = tutors['Instrument'].str.lower()
tutors = tutors.replace(np.nan, '')

In [309]:
# extract day/time availabilities into separate columns
times = ['Before 11am', '11am- 1pm', '1pm- 3pm', '3pm- 5pm', "After 5pm"]

daystimes = [day + ' ' + time for day in days for time in times]
index = 0
for i in range(len(days)):
    for j in range(len(times)):
        d = days[i]
        t = times[j]
        tutors[daystimes[index]] = tutors[d].str.contains(t)
        index += 1

In [310]:
# keep only day, time combination columns in dataframe
tutors = tutors.drop(days, 1)

In [311]:
# add column for number of availabilities
temp = tutors.drop(['Name', 'Instrument'], 1)
temp['Count'] = temp.sum(1)
tutors['Count'] = temp['Count']
tutors = tutors.sort_values(by='Count')

In [312]:
# processed dataframe for tutors
tutors

Unnamed: 0,Name,Instrument,Monday Before 11am,Monday 11am- 1pm,Monday 1pm- 3pm,Monday 3pm- 5pm,Monday After 5pm,Tuesday Before 11am,Tuesday 11am- 1pm,Tuesday 1pm- 3pm,...,Saturday 11am- 1pm,Saturday 1pm- 3pm,Saturday 3pm- 5pm,Saturday After 5pm,Sunday Before 11am,Sunday 11am- 1pm,Sunday 1pm- 3pm,Sunday 3pm- 5pm,Sunday After 5pm,Count
39,Person 33,piano,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,1
15,Person 9,piano,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,1
13,Person 7,piano,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,1
40,Person 34,piano,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,1
67,Person 61,"piano, composition",False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,2
58,Person 52,guitar,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,True,False,False,2
26,Person 20,"saxophone, piano",False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,2
21,Person 15,piano,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,3
64,Person 58,flute,False,False,False,True,False,False,False,False,...,False,False,False,False,False,False,False,False,False,3
54,Person 48,piano,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,3


In [313]:
# filter out irrelevant columns
students = students[['Student Name - Last, First', 'Instrument'] + days]
students = students.rename(columns={'Student Name - Last, First': 'Name'})

In [314]:
# remove/replace invalid data entries
students = students[students['Instrument'].notna()]
students['Instrument'] = students['Instrument'].str.lower()
students = students.replace(np.nan, '')

In [315]:
# extract day/time availabilities into separate columns
index = 0
for i in range(len(days)):
    for j in range(len(times)):
        d = days[i]
        t = times[j]
        students[daystimes[index]] = students[d].str.contains(t)
        index += 1

In [316]:
# add column for number of availabilities
students = students.drop(days, 1)
temp = students.drop(['Name', 'Instrument'], 1)
temp['Count'] = temp.sum(1)
students['Count'] = temp['Count']
students = students.sort_values(by='Count')

In [317]:
# processed dataframe for students; matches tutors dataframe
students

Unnamed: 0,Name,Instrument,Monday Before 11am,Monday 11am- 1pm,Monday 1pm- 3pm,Monday 3pm- 5pm,Monday After 5pm,Tuesday Before 11am,Tuesday 11am- 1pm,Tuesday 1pm- 3pm,...,Saturday 11am- 1pm,Saturday 1pm- 3pm,Saturday 3pm- 5pm,Saturday After 5pm,Sunday Before 11am,Sunday 11am- 1pm,Sunday 1pm- 3pm,Sunday 3pm- 5pm,Sunday After 5pm,Count
140,Raymond Austin,guitar,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,1
137,Wade Berger,piano,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,1
40,Minerva Bass,piano,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,1
101,Vladimir Dominguez,piano,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,1
102,Graiden Burris,guitar,False,False,False,False,True,False,False,False,...,False,False,False,False,False,False,False,False,False,1
139,Medge Lee,guitar,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,False,False,1
39,Lila Rivas,piano,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,1
15,Talon Gallagher,piano,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,1
13,Garrett Mooney,piano,False,False,False,False,False,False,False,False,...,False,False,False,False,True,False,False,False,False,1
76,Silas Bean,piano,False,False,False,False,False,False,False,False,...,False,False,False,False,False,False,False,True,False,1


In [318]:
# copy to mutate in function
students2 = students.copy()

In [319]:
"""
Takes in a tutor in the form of a tuple with (length, Series) -- values from a df.iterrows() iterable.
Returns a dataframe containing the tutor as column 0 and every student that can be matched as a following column.
Matches are considered valid if there is at least one instrument overlap between student and tutor, and at least one time availability overlap.
"""
def match(tutor):
    ts = tutor[1].to_frame()

    s_without_count = students2.drop(columns='Count')
    for student in s_without_count.iterrows():
        tutor_instruments = tutor[1]['Instrument'].split(', ')
        student_instruments = student[1]['Instrument'].split(', ')
        
        if len(set(tutor_instruments).intersection(set(student_instruments))) > 0 and tutor[1].add(student[1]).value_counts().index.contains(2):
            ts[student[1][0]] = student[1]
    
    ts = ts.rename(columns={tutor[0] : tutor[1][0]})
    return ts[ts[tutor[1][0]] == True]

In [320]:
# create the iterator from rows of tutors
t_without_count = tutors.drop(columns='Count')
tutors_iter = t_without_count.iterrows()

In [321]:
# create the dataframe to store matches once made
matches = pd.DataFrame(columns=['Tutor', 'Student'])

In [322]:
# iterate through tutors; use function defined above to get all valid matches
i = 0
for t in tutors_iter:
    m = match(t)
    names = m.columns
    if (len(names) < 2):
        # not able to match tutor to any students
        matches.loc[i] = [names[0], 'None']
    else:
        # match created: add (tutor, student) pair to matches dataframe and remove student from students pool
        # currently matches tutor to student with least availabilities; can edit later to add priority consideration
        matches.loc[i] = [names[0], names[1]]
        students2 = students2.drop(students2[students2['Name'] == names[1]].index)
    i += 1

In [323]:
matches

Unnamed: 0,Tutor,Student
0,Person 33,Minerva Bass
1,Person 9,Lila Rivas
2,Person 7,Talon Gallagher
3,Person 34,Garrett Mooney
4,Person 61,Alma Mcpherson
5,Person 52,Melyssa Hogan
6,Person 20,Nolan Brady
7,Person 15,Wade Berger
8,Person 58,Akeem Guerra
9,Person 48,Silas Bean


In [324]:
# time for computation (in seconds):
time.time() - start

12.555426836013794