<h1>Our Algorithm</h1>

In [1]:
#import necessary packages
import pandas as pd
import geopandas as gpd

In [2]:
subdistrict_resale = gpd.read_file('Generated Files/all_resale_subzoned_o.shp')
#subdistrict_resale.head()

In [3]:
subdistrict_database = gpd.read_file('Generated Files/subzones_featurecount.shp')
#subdistrict_database.head()

In [4]:
x = subdistrict_resale[['real_price', 'SUBZONE_N']].groupby(['SUBZONE_N']).mean()
x = pd.DataFrame(x)
x

Unnamed: 0_level_0,real_price
SUBZONE_N,Unnamed: 1_level_1
ADMIRALTY,393255.777228
ALEXANDRA HILL,400363.665595
ALJUNIED,381270.731677
ANAK BUKIT,599948.774799
BALESTIER,421755.347798
...,...
YISHUN EAST,349140.296902
YISHUN SOUTH,362786.970805
YISHUN WEST,256567.088794
YUHUA,322573.907888


In [8]:
x

Unnamed: 0,SUBZONE_N,real_price
0,ADMIRALTY,393255.777228
1,ALEXANDRA HILL,400363.665595
2,ALJUNIED,381270.731677
3,ANAK BUKIT,599948.774799
4,BALESTIER,421755.347798
...,...,...
143,YISHUN EAST,349140.296902
144,YISHUN SOUTH,362786.970805
145,YISHUN WEST,256567.088794
146,YUHUA,322573.907888


In [5]:
travel_time = gpd.read_file('Generated Files/subzones_all_stats_with_travel.shp')
travel_order = travel_time[['SUBZONE_N','travel__cn']]
resale_and_travel = x.merge(travel_order, how= 'left', on= 'SUBZONE_N')
resale_and_travel['travel__cn'] = resale_and_travel['travel__cn']/60
resale_and_travel.head()

Unnamed: 0,SUBZONE_N,real_price,travel__cn
0,ADMIRALTY,393255.777228,50.916667
1,ALEXANDRA HILL,400363.665595,24.6
2,ALJUNIED,381270.731677,18.516667
3,ANAK BUKIT,599948.774799,40.883333
4,BALESTIER,421755.347798,31.883333


In [None]:
def get_tuples(sliders_dict):
    amenities = ['busstop_no','hawkercent','malls_no','mrtsg_no','schools_no', 'supermarke']
    result = []
    for i in range(6):
        result.append((amenities[i], sliders_dict[i]))
    return result

In [6]:
# INPUTS: (amenity * score) tuples, (resale_low_range * resale_high_range) tuple, max_travel_time int
# name of amenity needs to match the column name for easy processing

def our_algorithm(resale_range, list_of_tuples_from_website, max_travel_time):
    
    #filter out based on resale range
    filtered_resale = resale_and_travel[(resale_range[0] <= resale_and_travel['real_price']) & (resale_and_travel['real_price'] <= resale_range[1])]
    #filter out long travel times
    filtered_travel_time = filtered_resale[filtered_resale['travel__cn']<= max_travel_time]
    #get names of the filtered subdistricts
    filtered_subdistrict_names = filtered_travel_time['SUBZONE_N']

    #initialise result_list
    result_list = []
    
    # using names
    subdistrict_database_prices = subdistrict_database[subdistrict_database['SUBZONE_N'].isin(filtered_subdistrict_names)]
    #reset index
    subdistrict_database_prices = subdistrict_database_prices.reset_index()

    #calculate score for each subdistrict row
    for index, row in subdistrict_database_prices.iterrows():
        #initialise score for row
        result = 0
        #compute based on inputs
        for amenity, score in list_of_tuples_from_website:
            result += list(subdistrict_database_prices.iterrows())[index][1][amenity] * score
        result_list.append((index, result))
    
    #sort the list
    result_list.sort(key = lambda result: result[1], reverse = True)

    #keep the top 10 results
    result_list = result_list[:10]

    #return the database with those results by taking the indices in result_list
    result_database = pd.DataFrame(subdistrict_database_prices.iloc[list(map(lambda result: result[0], result_list))])

    #append their score to the database
    scores_list = []
    for index, score in result_list:
        scores_list.append(score)
    result_database['SCORE'] = scores_list

    #return database
    return result_database

def clean_algorithm_results(result_database):
    #given the database of our top 10 results
    #return the desired outputs in a dataframe
    result_clean = pd.DataFrame(result_database.iloc[:,[3,6,8,16,17,18,19,20,21,23]])
    result_clean.columns = ['Subzone', 'Planning Area', 'Region', '# Bus Stops', '# MRT', '# Schools', '# Malls', '# Supermarkets', '# Hawker Centres', 'Score']
    return result_clean

In [7]:
#run the actual code
#create dummy inputs
resale_range = (300000,400000)
list_of_tuples = [('busstop_no', 5),('mrtsg_no',5),('schools_no',5)] #the rest are 0
max_travel_time = 40 #in minutes

#obtain results
results_database = our_algorithm(resale_range, list_of_tuples, max_travel_time)
clean_results_database = clean_algorithm_results(results_database)

In [10]:
clean_results_database.columns = ['Subzone', 'Planning Area', 'Region', '# Bus Stops', '# MRT', '# Schools', '# Malls', '# Supermarkets', '# Hawker Centres', 'Score']

In [11]:
clean_results_database

Unnamed: 0,Subzone,Planning Area,Region,# Bus Stops,# MRT,# Schools,# Malls,# Supermarkets,# Hawker Centres,Score
19,SERANGOON CENTRAL,SERANGOON,NORTH-EAST REGION,56.0,3.0,1.0,1.0,15.0,0.0,300.0
41,BEDOK NORTH,BEDOK,EAST REGION,56.0,1.0,3.0,5.0,31.0,5.0,300.0
17,HOUGANG CENTRAL,HOUGANG,NORTH-EAST REGION,51.0,1.0,3.0,2.0,17.0,0.0,275.0
1,ALJUNIED,GEYLANG,CENTRAL REGION,45.0,3.0,2.0,0.0,26.0,4.0,250.0
39,YIO CHU KANG,ANG MO KIO,NORTH-EAST REGION,43.0,1.0,1.0,0.0,5.0,1.0,225.0
42,BEDOK SOUTH,BEDOK,EAST REGION,37.0,1.0,3.0,0.0,8.0,3.0,205.0
0,GEYLANG EAST,GEYLANG,CENTRAL REGION,35.0,3.0,2.0,4.0,29.0,3.0,200.0
12,KAMPONG UBI,GEYLANG,CENTRAL REGION,37.0,1.0,2.0,0.0,5.0,0.0,200.0
43,KAKI BUKIT,BEDOK,EAST REGION,37.0,2.0,1.0,0.0,11.0,1.0,200.0
3,LORONG AH SOO,HOUGANG,NORTH-EAST REGION,26.0,0.0,1.0,2.0,16.0,2.0,135.0
