In [1]:
import pandas as pd
import random

In [2]:
#variable to store which student dataset we want to run
student_url = 'data/raw/datasetA_with_roommate_pref.csv'

In [3]:
#variable to store which room dataset we want to run
room_url = 'data/raw/final_room_data.csv'

In [4]:
student_df = pd.read_csv(student_url, index_col= False)
room_df = pd.read_csv(room_url, index_col= False)

In [5]:
#drop unneccessary columns (maybe drop: name, school, is_single_preferred)
student_df = student_df.drop(columns=['date_of_birth', 'is_private_bathroom_preferred', 'laundry_availibility'])

# Sorting
We sorted the dataset using the following steps:
1. assigning every student a lottery number & sorting them by it
2. prioritizing freshman students (bringing them to the top of the list regardless of lottery number)
3. prioritizing disability students (bringing them to the top of the list regardless of lottery number)
4. prioritizing low-income students (bringing them to the top of the list regardless of lottery number)

## What this means
All of these categories are sorted by lottery
1. Freshman & Disabled & Low-income
2. Disabled & Low-income
3. Disabled
4. Freshman & Low-income
5. Low-income
6. Freshman
7. Everyone else

In [6]:
# defining variables
total_students = student_df.shape[0]

In [7]:
# definining weights
disability_pts = 20
income_pts = 10
freshman_pts = 5

# assigning weights to each student
weight_df = student_df
weight_df['weights'] = 0
for row in weight_df.index:
    if weight_df.loc[row, 'accessibility_need'] == 1:
        weight_df.at[row, 'weights'] += disability_pts
    if weight_df.loc[row, 'low_income_status'] == 1:
        weight_df.at[row, 'weights'] += income_pts
    if weight_df.loc[row, 'student_year'] == "Freshman":
        weight_df.at[row, 'weights'] += freshman_pts

In [8]:
# will assign a different random number to each student
lottery_df = weight_df
numbers_range = range(1, total_students*4)
lottery_numbers = random.choices(numbers_range, k=total_students)
lottery_df["lottery_number"] = lottery_numbers

In [9]:
# sort student dataset by weight and then lottery
sorted_df = lottery_df.sort_values(by=['weights', 'lottery_number'], ascending=[False, True])
sorted_df['assigned'] = 0
sorted_df.to_csv('data/processed/sorted_students.csv', index=False)

# Assigning Rooms

Using priorities
If none of their preferences are available, random assignment


In [10]:
# setting variables
total_rooms = room_df.shape[0]
graduate_rooms = 0
accessible_rooms = 0

for row in range(0,total_rooms):
    if room_df.loc[row, 'has_accessibility_ramps'] == 1:
        accessible_rooms += 1
    if room_df.loc[row, 'room_contract_type'].find('Graduate') != -1:
        accessible_rooms += 1


In [11]:
#create a room preference list for each RUID
preference_list = dict()
for row in sorted_df.index:
    series_ids = sorted_df.loc[row, ['preferred_hall_ids']]
    string_ids = series_ids.to_string()
    temp = string_ids[22:]
    temp_str = temp.replace(' ', ",").replace('[','').replace(']','').replace("'",'').replace("'",'')
    preference_list[sorted_df.loc[row, 'RUID']] = temp_str.split(',') #string split

In [12]:
#create a roommate preference list for each RUID
roommate_list = dict()
for row in sorted_df.index:
    series_ids = sorted_df.loc[row, ['roommate_preferences']]
    string_ids = series_ids.to_string()
    temp = string_ids[24:]
    temp_str = temp.replace('[', '').replace(']', '')
    roommate_list[sorted_df.loc[row, 'RUID']] = temp_str.split(',') #string split

In [13]:
ruid_df = pd.DataFrame({'preferred_hall':pd.Series(preference_list), 'roommate_preferences':pd.Series(roommate_list)})
print(ruid_df)

           preferred_hall     roommate_preferences
906155943    [WS, BS, SA]  [956668962,  743988610]
152782265    [RA, MH, BS]  [842369533,  626751036]
312170287  [MZH, JS, NCA]                       []
755331365   [NCA, MS, RA]  [694212164,  869964826]
199302575   [NCA, TS, MH]                       []
...                   ...                      ...
390346299        [JA, BA]                       []
643395246   [NCA, TS, BS]  [122364754,  415738137]
914367253   [SA, TS, MRS]  [987291052,  979417663]
849602917   [BS, NCA, BH]  [230972311,  674662268]
234441184    [AH, TS, RA]              [785123321]

[5330 rows x 2 columns]


In [23]:
result = dict()

room_df['available'] = 1

for ruid in ruid_df.index:
    
    student_info = pd.DataFrame()
    student_info = sorted_df.loc[(sorted_df['RUID'] == ruid)]
    student_info = student_info.reset_index().drop(columns=['index'])
    
    #check if ruid is disability
    disability_check = False
    if student_info.loc[0]['accessibility_need'] == 1:
        disability_check = True
    
    #check if ruid is low-income : in 2 parts
    income_check = False
    if student_info.loc[0]['low_income_status'] == 1:
        income_check = True
    
    max_price = student_info.loc[0]['max_price']
    
    is_pref_found = False
    room_details = []
    available_rooms_df = pd.DataFrame()
    
    preferences = ruid_df.loc[ruid, 'preferred_hall']
    roommate = ruid_df.loc[ruid, 'roommate_preferences']

    for pref in preferences:
        
        if disability_check:
            available_rooms_df = room_df.loc[(room_df['hall_id'] == pref) & (room_df['available'] == 1) & (room_df['has_accessibility_ramps'] == 1)]
        else:
            available_rooms_df = room_df.loc[(room_df['hall_id'] == pref) & room_df['available'] == 1]
        
        if not available_rooms_df.empty:
            #try allocating first room in the df
            is_pref_found = True
            break
    
    if not is_pref_found:
        #allocate random available room
        available_rooms_df = room_df.loc[room_df['available'] == 1]
    
    if not available_rooms_df.empty:
        #try allocating first room in the df
        
        #roommate assignment here
        allocated_room = available_rooms_df.iloc[0]
            
        room_details.append(allocated_room['hall_id'])
        room_details.append(allocated_room['building_id'])
        room_details.append(allocated_room['room_id'])
            
        room_df.loc[allocated_room.name,'available'] = 0
        result[ruid] = room_details 

In [24]:
df = pd.DataFrame(list(result.items()), columns=['RUID', 'Assigned Rooms'])
df.to_csv('allocated.csv', index=False)

# Matching Algorithm

In [16]:
print(student_df.head)
for row in student_df.index:
    print(student_df.loc[row, 'roommate_preferences'])
    temp_list = student_df.loc[row, 'roommate_preferences']
    if temp_list != '[]':
        
        new_lst = []
        new_lst = temp_list.replace('[', '').replace(']', '').split(',')
        print(new_lst)
        
        for student in new_lst:
            print(student)
            match_list = []
            for row2 in student_df:
                if student_df.loc[row2, 'RUID'] == student:
                    match_list = student_df.loc[row2, 'roommate_preferences']
            if student not in match_list:
                print("False")
                break

<bound method NDFrame.head of                   name sex       RUID          student_year  \
0     Jennifer Simmons   F  977522906             Sophomore   
1      Courtney Murphy   F  567583671             Sophomore   
2       Alexander King   M  450859644              Freshman   
3      Amanda Atkinson   F  826829351             Sophomore   
4          Mark Rogers   M  852305641                Junior   
...                ...  ..        ...                   ...   
5325  Mr. Joseph White   M  900451724             Doctorate   
5326   Alyssa Franklin   F  933744275             Doctorate   
5327       David James   M  417696262               Masters   
5328     Stephen Heath   M  940035368  Postdoctoral studies   
5329    Daniel Nichols   M  755301395             Doctorate   

                                         student_school  accessibility_need  \
0                                 School of Engineering                   1   
1                           School of Arts and Sciences

KeyError: 'name'