<h1>Our Algorithm</h1>

In [1]:
#import necessary packages
import pandas as pd
import geopandas as gpd

In [2]:
subdistrict_resale = gpd.read_file('Generated Files/all_resale_subzoned_o.shp')
#subdistrict_resale.head()

In [3]:
subdistrict_database = gpd.read_file('Generated Files/subzones_featurecount.shp')
#subdistrict_database.head()

In [4]:
x = subdistrict_resale[['real_price', 'SUBZONE_N']].groupby(['SUBZONE_N']).mean()
x = pd.DataFrame(x)
x

Unnamed: 0_level_0,real_price
SUBZONE_N,Unnamed: 1_level_1
ADMIRALTY,393255.777228
ALEXANDRA HILL,400363.665595
ALJUNIED,381270.731677
ANAK BUKIT,599948.774799
BALESTIER,421755.347798
...,...
YISHUN EAST,349140.296902
YISHUN SOUTH,362786.970805
YISHUN WEST,256567.088794
YUHUA,322573.907888


In [5]:
travel_time = gpd.read_file('Generated Files/subzones_all_stats_with_travel.shp')
travel_order = travel_time[['SUBZONE_N','travel__cn']]
resale_and_travel = x.merge(travel_order, how= 'left', on= 'SUBZONE_N')
resale_and_travel['travel__cn'] = resale_and_travel['travel__cn']/60
resale_and_travel.head()

Unnamed: 0,SUBZONE_N,real_price,travel__cn
0,ADMIRALTY,393255.777228,50.916667
1,ALEXANDRA HILL,400363.665595,24.6
2,ALJUNIED,381270.731677,18.516667
3,ANAK BUKIT,599948.774799,40.883333
4,BALESTIER,421755.347798,31.883333


In [6]:
#pseudocode draft 1
# INPUTS: (amenity * score) tuples, (resale_low_range * resale_high_range) tuple, max_travel_time int
# name of amenity needs to match the column name for easy processing

def our_algorithm(resale_range, list_of_tuples_from_website, max_travel_time):
    
    #filter out based on resale range
    filtered_resale = resale_and_travel[(resale_range[0] <= resale_and_travel['real_price']) & (resale_and_travel['real_price'] <= resale_range[1])]
    #filter out long travel times
    filtered_travel_time = filtered_resale[filtered_resale['travel__cn']<= max_travel_time]
    #get names of the filtered subdistricts
    filtered_subdistrict_names = filtered_travel_time['SUBZONE_N']

    #initialise result_list
    result_list = []
    
    # using names
    subdistrict_database_prices = subdistrict_database[subdistrict_database['SUBZONE_N'].isin(filtered_subdistrict_names)]
    #reset index
    subdistrict_database_prices = subdistrict_database_prices.reset_index()

    #calculate score for each subdistrict row
    for index, row in subdistrict_database_prices.iterrows():
        #initialise score for row
        result = 0
        #compute based on inputs
        for amenity, score in list_of_tuples_from_website:
            result += list(subdistrict_database_prices.iterrows())[index][1][amenity] * score
        result_list.append((index, result))
    
    #sort the list
    result_list.sort(key = lambda result: result[1], reverse = True)

    #keep the top 10 results
    result_list = result_list[:10]

    #return the database with those results by taking the indices in result_list
    result_database = pd.DataFrame(subdistrict_database_prices.iloc[list(map(lambda result: result[0], result_list))])

    #append their score to the database
    scores_list = []
    for index, score in result_list:
        scores_list.append(score)
    result_database['SCORE'] = scores_list

    #return database
    return result_database

def clean_algorithm_results(result_database):
    #given the database of our top 10 results
    #return the desired outputs in dictionary format for easy access
    pass

In [7]:
#run the actual code
#import subdistrict_database
#...

#subdistrict_database = gpd.read_file('Generated Files/subzones_featurecount.shp')

#create dummy inputs
resale_range = (300000,400000)
list_of_tuples = [('busstop_no', 5),('mrtsg_no',5),('schools_no',5)]
max_travel_time = 40 #in minutes

#obtain results
results_database = our_algorithm(resale_range, list_of_tuples, max_travel_time)
results_dictionary = clean_algorithm_results(results_database)

In [8]:
results_database.head(10)

Unnamed: 0,index,OBJECTID,SUBZONE_NO,SUBZONE_N,SUBZONE_C,CA_IND,PLN_AREA_N,PLN_AREA_C,REGION_N,REGION_C,...,SHAPE_Leng,SHAPE_Area,busstop_no,mrtsg_no,schools_no,malls_no,supermarke,hawkercent,geometry,SCORE
19,151,152,4,SERANGOON CENTRAL,SGSZ04,N,SERANGOON,SG,NORTH-EAST REGION,NER,...,9137.391554,3140777.0,56.0,3.0,1.0,1.0,15.0,0.0,"POLYGON ((32986.926 37324.426, 32968.297 37310...",300.0
41,277,278,4,BEDOK NORTH,BDSZ04,N,BEDOK,BD,EAST REGION,ER,...,8413.277055,3202920.0,56.0,1.0,3.0,5.0,31.0,5.0,"POLYGON ((41191.002 34698.575, 41164.711 34676...",300.0
17,136,137,1,HOUGANG CENTRAL,HGSZ01,N,HOUGANG,HG,NORTH-EAST REGION,NER,...,6868.798253,2291220.0,51.0,1.0,3.0,2.0,17.0,0.0,"POLYGON ((35948.803 40264.817, 35951.927 40239...",275.0
1,7,8,4,ALJUNIED,GLSZ04,N,GEYLANG,GL,CENTRAL REGION,CR,...,7049.124464,2945506.0,45.0,3.0,2.0,0.0,26.0,4.0,"POLYGON ((34277.234 32322.636, 34282.432 32306...",250.0
39,270,271,8,YIO CHU KANG,AMSZ08,N,ANG MO KIO,AM,NORTH-EAST REGION,NER,...,7665.910209,3617085.0,43.0,1.0,1.0,0.0,5.0,1.0,"POLYGON ((30759.279 41446.512, 30763.459 41376...",225.0
42,279,280,6,BEDOK SOUTH,BDSZ06,N,BEDOK,BD,EAST REGION,ER,...,7812.691833,2997458.0,37.0,1.0,3.0,0.0,8.0,3.0,"POLYGON ((41191.669 34697.586, 41202.645 34681...",205.0
0,6,7,5,GEYLANG EAST,GLSZ05,N,GEYLANG,GL,CENTRAL REGION,CR,...,7027.740482,2580431.0,35.0,3.0,2.0,4.0,29.0,3.0,"POLYGON ((35987.194 33319.391, 36001.551 33245...",200.0
12,103,104,3,KAMPONG UBI,GLSZ03,N,GEYLANG,GL,CENTRAL REGION,CR,...,5573.533058,1878092.0,37.0,1.0,2.0,0.0,5.0,0.0,"POLYGON ((36064.714 34258.022, 35898.566 34217...",200.0
43,281,282,1,KAKI BUKIT,BDSZ01,N,BEDOK,BD,EAST REGION,ER,...,8153.838704,2819363.0,37.0,2.0,1.0,0.0,11.0,1.0,"POLYGON ((37595.840 34959.383, 37493.949 34909...",200.0
3,11,12,4,LORONG AH SOO,HGSZ04,N,HOUGANG,HG,NORTH-EAST REGION,NER,...,5432.407082,1507934.0,26.0,0.0,1.0,2.0,16.0,2.0,"POLYGON ((34643.156 38173.492, 34654.156 38093...",135.0
