In [1]:
import pandas as pd
import random

In [2]:
#variable to store which student dataset we want to run
student_url = 'data/raw/datasetB_with_roommate_pref.csv'

In [3]:
#variable to store which room dataset we want to run
room_url = 'data/raw/final_room_data.csv'

In [4]:
student_df = pd.read_csv(student_url, index_col= False)
room_df = pd.read_csv(room_url, index_col= False)

In [5]:
#drop unneccessary columns (maybe drop: name, school, is_single_preferred)
student_df = student_df.drop(columns=['is_single_preferred',
                                      'is_private_bathroom_preferred',
                                      'laundry_availibility'])
room_df = room_df.drop(columns=['number_of_floors', 'is_aircooled',
                                'has_vending_machine', 'has_laundry', 'has_main_lounge',
                                'has_floor_lounge', 'has_tv_in_building', 'has_recreational_activities', 
                                'has_wardrobe', 'has_private_bathroom'])

# Sorting
We sorted the dataset using the following steps:
1. assigning every student a lottery number & sorting them by it
2. assigning a priority weight to students who need accessibility (20)
3. assigning a priority weight to low-income students (10)
4. assigning a priority weight to freshman students (5)

## What this means
Organized by weight into these categoris and within these categories they are sorted by lottery:
1. Freshman & Disabled & Low-income
2. Disabled & Low-income
3. Disabled
4. Freshman & Low-income
5. Low-income
6. Freshman
7. Everyone else

In [6]:
# defining variables
total_students = student_df.shape[0]

In [7]:
# definining weights
disability_pts = 20
income_pts = 10
freshman_pts = 5

# assigning weights to each student
weight_df = student_df
weight_df['weights'] = 0
for row in weight_df.index:
    if weight_df.loc[row, 'accessibility_need'] == 1:
        weight_df.at[row, 'weights'] += disability_pts
    if weight_df.loc[row, 'low_income_status'] == 1:
        weight_df.at[row, 'weights'] += income_pts
    if weight_df.loc[row, 'student_year'] == "Freshman":
        weight_df.at[row, 'weights'] += freshman_pts

In [8]:
# will assign a different random number to each student
lottery_df = weight_df
numbers_range = range(1, total_students*4)
lottery_numbers = random.choices(numbers_range, k=total_students)
lottery_df["lottery_number"] = lottery_numbers

In [9]:
# sort student dataset by weight and then lottery
sorted_df = lottery_df.sort_values(by=['weights', 'lottery_number'], ascending=[False, True])
sorted_df['assigned'] = 0
sorted_df.to_csv('data/processed/sorted_students.csv', index=False)

# Assigning Rooms

In [10]:
# setting variables
total_rooms = room_df.shape[0]
accessible_rooms = 0

for row in range(0,total_rooms):
    if room_df.loc[row, 'has_accessibility_ramps'] == 1:
        accessible_rooms += 1

In [11]:
#create a room preference list for each RUID
preference_list = dict()
for row in sorted_df.index:
    series_ids = sorted_df.loc[row, ['preferred_hall_ids']]
    string_ids = series_ids.to_string()
    temp = string_ids[22:]
    temp_str = temp.replace(' ', ",").replace('[','').replace(']','').replace("'",'').replace("'",'')
    preference_list[sorted_df.loc[row, 'RUID']] = temp_str.split(',') #string split

In [12]:
#create a roommate preference list for each RUID
roommate_list = dict()
for row in sorted_df.index:
    series_ids = sorted_df.loc[row, ['roommate_preferences']]
    string_ids = series_ids.to_string()
    temp = string_ids[24:]
    temp_str = temp.replace('[', '').replace(']', '')
    roommate_list[sorted_df.loc[row, 'RUID']] = temp_str.split(',') #string split

In [13]:
ruid_df = pd.DataFrame({'preferred_hall':pd.Series(preference_list), 'roommate_preferences':pd.Series(roommate_list)})
print(ruid_df)

          preferred_hall     roommate_preferences
110322866  [TS, MZH, RA]  [114942167,  594262393]
421462888   [BS, SA, RA]  [122082784,  316541638]
632997885  [RA, SA, MZH]                       []
692225359  [MRS, SA, BS]  [494246013,  344112668]
158185009   [BS, RA, WS]  [536413979,  843673543]
...                  ...                      ...
547689833   [MH, AH, SA]  [525888768,  580322562]
926637268       [BA, JA]  [736233529,  672055804]
429882627   [MH, BS, RA]                       []
592870719       [BA, JA]  [439357663,  549235572]
420296176   [BS, MS, MH]              [813558427]

[10660 rows x 2 columns]


In [14]:
result = dict()

room_df['available'] = 1
ruid_df['assigned'] = 0

for ruid in ruid_df.index:
    
    if ruid_df.loc[ruid, 'assigned'] == 1:
        continue
    
    student_info = pd.DataFrame()
    student_info = sorted_df.loc[(sorted_df['RUID'] == ruid)]
    student_info = student_info.reset_index().drop(columns=['index'])
    
    #check if ruid is disability
    disability_check = False
    if student_info.loc[0]['accessibility_need'] == 1:
        disability_check = True
    
    #check if graduate
    graduate_check = False
    if student_info.loc[0]['student_school'].find('Graduate') != -1:
        graduate_check = True
    
    max_price = student_info.loc[0]['max_price']
    
    is_pref_found = False
    room_details = []
    available_rooms_df = pd.DataFrame()
    
    preferences = ruid_df.loc[ruid, 'preferred_hall']
    roommate = ruid_df.loc[ruid, 'roommate_preferences']

    for pref in preferences:
        
        if disability_check:
            available_rooms_df = room_df.loc[(room_df['hall_id'] == pref) & (room_df['available'] == 1) & (room_df['price'] <= max_price) & 
                                             (room_df['has_accessibility_ramps'] == 1)]
        else:
            if not graduate_check:
                available_rooms_df = room_df.loc[(room_df['hall_id'] == pref) & (room_df['available'] == 1) & (room_df['price'] <= max_price) & (room_df['room_contract_type'] == 'Undergraduate Academic Year (Two Semesters)')]
            else:
                available_rooms_df = room_df.loc[(room_df['hall_id'] == pref) & (room_df['available'] == 1) & (room_df['price'] <= max_price)]
        
        if not available_rooms_df.empty:
            #try allocating first room in the df
            is_pref_found = True
            break
    
    if not is_pref_found:
        #allocate random available room they can afford
        
        found = False
        
        if disability_check:
            available_rooms_df = room_df.loc[(room_df['available'] == 1) & (room_df['price'] <= max_price) & (room_df['has_accessibility_ramps'] == 1)]
            if available_rooms_df.empty: found = True
        if not found:
            if not graduate_check:
                available_rooms_df = room_df.loc[(room_df['available'] == 1) & (room_df['price'] <= max_price) & (room_df['room_contract_type'] == 'Undergraduate Academic Year (Two Semesters)')]
            else:
                available_rooms_df = room_df.loc[(room_df['available'] == 1) & (room_df['price'] <= max_price)]
    
    
    if available_rooms_df.empty:
        #allocate random available room
        if disability_check:
            available_rooms_df = room_df.loc[(room_df['available'] == 1) & (room_df['has_accessibility_ramps'] == 1)]
        else:
            available_rooms_df = room_df.loc[(room_df['available'] == 1)]
        
    
    if not available_rooms_df.empty:
        #allocate random room in the df
        
        #room assignment here
        allocated_room_df = available_rooms_df.sample()
        allocated_room = allocated_room_df.iloc[0]
            
        room_details.append(allocated_room['hall_id'])
        room_details.append(allocated_room['building_id'])
        room_details.append(allocated_room['room_id'])
            
        room_df.loc[allocated_room.name,'available'] = 0
        ruid_df.loc[ruid, 'assigned'] = 1
        result[ruid] = room_details 
        
        #for roommates:
        room_num = allocated_room['room_number']
        hall_id = allocated_room['hall_id']
        building_id = allocated_room['building_id']
        
        
        #making dataframe to see if there are other beds available with the same room number
        roommate_rooms_df = pd.DataFrame()
        roommate_rooms_df = available_rooms_df.loc[(available_rooms_df['hall_id'] == hall_id) & 
                                                   (available_rooms_df['room_number'] == room_num) &
                                                   (available_rooms_df['building_id'] == building_id)]
        
        # if they have a list of roommates & there is a bed available, assign the roommate
        if ((roommate != ['']) & (len(roommate_rooms_df) != 0)):
            n=0 #index for iterating through df
            for x in roommate:
                roommate_details = []
                x = int(x)
                if ruid_df.loc[x, 'assigned'] == 0: #if not already assigned
                    if not (n >= len(roommate_rooms_df)):
                        temp_room = roommate_rooms_df.iloc[n]
                        
                        if temp_room['room_id'] == allocated_room['room_id']:
                            n = n+1
                            if n >= len(roommate_rooms_df):
                                break
                            
                        roommate_room = roommate_rooms_df.iloc[n]
                        roommate_details.append(roommate_room['hall_id'])
                        roommate_details.append(roommate_room['building_id'])
                        roommate_details.append(roommate_room['room_id'])
                        
                        room_df.loc[roommate_room.name,'available'] = 0
                        result[x] = roommate_details
                        ruid_df.loc[x, 'assigned'] = 1
                        n = n+1
        
    else:
        result[ruid] = []


In [15]:
df = pd.DataFrame(list(result.items()), columns=['RUID', 'Assigned Rooms'])
df.to_csv('data/processed/allocated.csv', index=False)