In [1]:
import pandas as pd
import numpy as np
import time
import re

#CHANGE TO YOUR PATH FOR CSV
student_df = pd.read_csv('datasetC_with_roommate_pref.csv', index_col= False)
student_df = student_df.set_index('RUID', drop=False)
student_df['Score'] = 0.0
student_df['Placement'] = 0

#CHANGE TO YOUR PATH FOR CSV
rooms_df = pd.read_csv('final_room_data.csv', index_col= False)
rooms_df['Assigned'] = [set() for _ in range(rooms_df.shape[0])]

not_placed = set()

In [2]:
def find_room(roomsdf, student):

    #seek rooms in hall preference first. rooms_df truncated to search within halls
    #Round 1: find rooms, in hall preference, with chance for roommates
    #Round 2: look at full rooms in preferred halls, bump lower score student
    if student['Placement'] == 1:
        return True
    
    hall_pref = (re.sub(r'[^a-zA-Z ]', '', student['preferred_hall_ids'])).split()
    for hall in hall_pref:
        temp_df = roomsdf[rooms_df["hall_id"] == hall]
        for index, room in temp_df.iterrows():
            if (len(room['Assigned']) < room['max_room_occupancy']):
                if eval_constraint(room, student):
                    if chk_mutal(room, student):
                        return True
                    else:
                        room['Assigned'].add(int(student.name))
                        student_df.loc[student.RUID, 'Placement'] = 1 
                        eval_pref(room, student)
                        return True
            #If room full, priority given to student.score that is higher
            #Unlikely to move roommate matches, less likely they'll have roommates
            elif (len(room['Assigned']) == room['max_room_occupancy']):
                if eval_constraint(room, student):
                    if test_occupants(room, student):
                        return True
 

    #round 3: Look for rooms outside hall preferences. Include roomates if possible
    remaining_halls = rooms_df[~rooms_df['hall_id'].isin(hall_pref)]
    for index, room in remaining_halls.iterrows():
        if (len(room['Assigned']) < room['max_room_occupancy']):
            if eval_constraint(room, student):
                if chk_mutal(room, student):
                    return True
                else:
                    room['Assigned'].add(int(student.name))
                    eval_pref(room, student)
                    student_df.loc[student.RUID, 'Placement'] = 1
                    return True
        #If room full, priority given to student.score that is higher
        #Unlikely to move roommate matches, less likely they'll have roommates
        elif (len(room['Assigned']) == room['max_room_occupancy']):
            if eval_constraint(room, student):
                if test_occupants(room, student):
                    return True
    
    not_placed.add(student.RUID)
    return False

In [3]:
def eval_constraint(rooms, student):
    chk_1 = ''
    
    year = student['student_year']
    if year == 'Sophomore' or year == 'Freshman' or year == 'Junior'or year == 'Senior':
        chk_1 = 'Undergraduate Academic Year (Two Semesters)'
    else:
        chk_1 = 'Graduate Full Calendar Year – 12 Month Contracts (Two semesters and all breaks)'
        #print('grad')
    
    if chk_1 != rooms['room_contract_type']:
        #print('year')
        return False
       
    if rooms['price'] > student['max_price']:
        #print('price')
        return False
    
    if student['accessibility_need'] == 1 and rooms['has_accessibility_ramps'] != 1:
        return False
    
    return True

In [4]:
def eval_pref(room, student):
    score =0
    student_df.loc[student.RUID, 'Score'] = 0
    
    if room['residence_type'] == student['preferred_residence_type']:
        #print('residence type + 2.0')
        score += 1.5
        
    if room['has_laundry'] == student['laundry_availibility']:
        #print('laundry + .25')
        score += .25
    
    if room['has_private_bathroom'] == student['is_private_bathroom_preferred']:
        #print('bathroom + .25')
        score += .25

    if room['hall_id'] in (student['preferred_hall_ids']):
        #print('hall id + 1')
        score += 2.0
    
    if student['accessibility_need'] == 1:
        #print('access + 9')
        score +=9.0
    
    student_df.loc[student['RUID'], 'Score'] += score

In [5]:
def chk_mutal(room, student):
    
    if student['Placement'] == 1:
        return False
    #if Student has no roommate preferences
    if not student['roommate_preferences'] or len(student['roommate_preferences'].strip()) == 0:
        return False
    
    #find if any mutally listed roommates
    students_list = (re.sub(r'[^0-9 ]', '', student['roommate_preferences'])).split()
    are_mutal = []
    
    for mate in students_list:
        mate_list = (re.sub(r'[^0-9 ]', '', student_df.loc[int(mate), 'roommate_preferences'])).split()
        if str(student.RUID) in mate_list:
            are_mutal.append(int(mate))
    
    #is there room space for all mutal roommates
    available_space = int(room['max_room_occupancy']) - len(room['Assigned'])
    if len(students_list) > available_space:
        return False
           
    #assign mutals to room with initially given student
    success = True
    for mate in are_mutal:
        mate_obj = student_df.loc[mate]
        if student_df.loc[mate, 'Placement'] == 0 and eval_constraint(room, mate_obj):
            room['Assigned'].add(mate)
            student_df.loc[mate, 'Placement'] = 1
            eval_pref(room, mate_obj)
            student_df.loc[mate, 'Score'] += 2.0
        else:
            success = False

    if success:
        room['Assigned'].add(student.RUID)
        student_df.loc[student.RUID, 'Placement'] = 1
        eval_pref(room, student)
        student_df.loc[student.RUID, 'Score'] += 2.0
        return True

    return False

In [6]:
def test_occupants(room, student):
    #If room full in first preferred hall:
    #temp turn student.Assigned to prevent roommate method since room is already full
    #Deferred acceptance style: compare student,score to occupants.score in rooms
    #If student,score evaluates higher, remove occupant, add student, set statistics, find occupant a room
    
    student_df.loc[student.RUID, 'Placement'] = 1
    eval_pref(room, student)
    
    for occupants in list(room['Assigned']):
        if student_df.loc[student.RUID, 'Score'] > student_df.loc[occupants, 'Score']:
            room['Assigned'].remove(occupants)
            student_df.loc[int(occupants), 'Placement'] = 0
            find_room(rooms_df, student_df.loc[occupants])
            room['Assigned'].add(student.RUID)
            return True
        
    return False    
    

In [7]:
def make_output_file():
    results = {}
    for ruid, rms in rooms_df.iterrows():
        assigned_Rooms =[]
        assigned_Rooms.append(rms.hall_id) 
        assigned_Rooms.append(rms.building_id)
        assigned_Rooms.append(rms.room_id)
        if len(rms.Assigned) > 0:
            for ruid in rms.Assigned:
                #assigned_Rooms.append(student_df.loc[int(ruid)].Score)
                results[ruid] = assigned_Rooms

    len(results)

    df = pd.DataFrame(list(results.items()), columns=['RUID', 'Assigned Rooms'])
    df.to_csv('GroupHP1_DataC2.csv')
    print('Done')

In [8]:
def start_me(rm_df, stu_df):
    
    loop_time = time.time()
    
    for index, student in stu_df.iterrows():
        find_room(rm_df, student)
        
    print('FPS {}'.format(1 / (time.time() - loop_time)))
    #make_output_file()
    

In [9]:
# student =student_df.iloc[500]
# hall_pref = (re.sub(r'[^a-zA-Z ]', '', student['preferred_hall_ids'])).split()
# print(student)
# remaining_halls = rooms_df[~rooms_df['hall_id'].isin(hall_pref)]
# print(remaining_halls['hall_id'].unique())

In [10]:
stu_accessdf = student_df[student_df['accessibility_need'] ==1]
start_me(rooms_df, stu_accessdf)

remaining_stud = student_df[student_df['accessibility_need'] ==0]

remain_stu1 = remaining_stud[0:1000]
start_me(rooms_df, remain_stu1)

remain_stu2 = remaining_stud[1001:2000]
start_me(rooms_df, remain_stu2)

remain_stu3 = remaining_stud[2001:3000]
start_me(rooms_df, remain_stu3)

remain_stu4 = remaining_stud[3001:]
start_me(rooms_df, remain_stu4)

print(student_df.Score.value_counts())

FPS 0.034234155260801855
FPS 0.02823111815663756
FPS 0.02094399895705047
FPS 0.021295461448646056
FPS 0.010965896629971904
Score
4.00     962
2.50     947
2.25     507
0.00     444
0.50     369
0.25     357
2.00     336
6.00     334
4.50     326
3.75     257
4.25     209
1.75     101
5.75      52
11.50     40
3.50      23
9.50      19
11.00     11
13.00      8
9.25       5
15.00      5
13.50      4
12.75      4
1.50       3
5.50       3
11.25      2
9.00       1
12.50      1
Name: count, dtype: int64


In [11]:
make_output_file()

Done


In [12]:
print(len(not_placed))

731
