### This notebook follows ``ors_iteration_add_rentalDistance.ipynb`` which computed distance/time from each property to all places and CBD. Now we only want the closest distances.

In [1]:
import pandas as pd
import sys
import os
# caution: path[0] is reserved for script path (or '' in REPL)
sys.path.insert(1, '../../scripts/')
from add_distance import get_min_distance_time

In [2]:
if not os.path.exists('../../data/distance'):
    os.makedirs('../../data/distance')

In [3]:
YEARS = [2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]
for year in YEARS:
    distance_df = pd.read_csv(f"../../data/featured/{year}_distance_rental_place.csv")
    # Drop the 0 values here (due to API request failures)
    clean_distance_df = distance_df.drop(distance_df[(distance_df['dist_to_place_M'] == 0.0) & (distance_df['time_to_place_S'] == 0.0)].index)
    addr_month_pair = len(clean_distance_df.drop_duplicates(subset=['address', 'month']))
    # Check original number of unique property-month pairs
    print(f"Originally {addr_month_pair} address-month pairs, shape = {clean_distance_df.shape}")
    min_distance_df = get_min_distance_time(clean_distance_df, year)

Originally 10939 address-month pairs, shape = (168747, 24)
Completed Year 2013, updated 10939 address-month pairs, shape = (44940, 21)
Originally 12088 address-month pairs, shape = (211804, 24)
Completed Year 2014, updated 12088 address-month pairs, shape = (51511, 21)
Originally 12490 address-month pairs, shape = (220248, 24)
Completed Year 2015, updated 12490 address-month pairs, shape = (53404, 21)
Originally 14751 address-month pairs, shape = (312983, 24)
Completed Year 2016, updated 14751 address-month pairs, shape = (63627, 21)
Originally 16706 address-month pairs, shape = (352858, 24)
Completed Year 2017, updated 16706 address-month pairs, shape = (71858, 21)
Originally 19015 address-month pairs, shape = (397671, 24)
Completed Year 2018, updated 19015 address-month pairs, shape = (81522, 21)
Originally 20851 address-month pairs, shape = (431750, 24)
Completed Year 2019, updated 20851 address-month pairs, shape = (89413, 21)
Originally 20398 address-month pairs, shape = (438142, 

### Transform dataset, map row-wise distance to column-wise

In [4]:
if not os.path.exists('../../data/curated/min_distance'):
    os.makedirs('../../data/curated/min_distance')

In [5]:
YEARS = [2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021, 2022]
for year in YEARS:
   min_distance_df = pd.read_csv(f"../../data/distance/{year}_min_distance.csv")
   print(f"original min distance shape {min_distance_df.shape}")
   house_df = min_distance_df[['address', 'latitude_ori', 'longitude_ori', 'nbed', 'nbath', 'ncar',
      'weekly_rent', 'postcode', 'year', 'month', 'residence_type',
      'SA2_CODE', 'dist_to_cbd_KM']]
   mask = min_distance_df[['min_distance_to_place_KM', 'place_type']]
   
   park_dist_df = mask[mask['place_type'] == 'park']
    
   prim_dist_df = mask[mask['place_type'] == 'primary']

   second_dist_df = mask[mask['place_type'] == 'secondary']

   train_dist_df = mask[mask['place_type'] == 'station']

   hosp_dist_df = mask[mask['place_type'] == 'hospital']

   poli_dist_df = mask[mask['place_type'] == 'police']

   shop_dist_df = mask[mask['place_type'] == 'shopping']

   added_df = house_df.join(park_dist_df).rename({"min_distance_to_place_KM": "min_distance_to_park"}, axis=1).drop('place_type', axis=1)
   added_df = added_df.join(prim_dist_df).rename({"min_distance_to_place_KM": "min_distance_to_prim"}, axis=1).drop('place_type', axis=1)
   added_df = added_df.join(second_dist_df).rename({"min_distance_to_place_KM": "min_distance_to_second"}, axis=1).drop('place_type', axis=1)
   added_df = added_df.join(train_dist_df).rename({"min_distance_to_place_KM": "min_distance_to_train"}, axis=1).drop('place_type', axis=1)
   added_df = added_df.join(hosp_dist_df).rename({"min_distance_to_place_KM": "min_distance_to_hosp"}, axis=1).drop('place_type', axis=1)
   added_df = added_df.join(poli_dist_df).rename({"min_distance_to_place_KM": "min_distance_to_poli"}, axis=1).drop('place_type', axis=1)
   added_df = added_df.join(shop_dist_df).rename({"min_distance_to_place_KM": "min_distance_to_shop"}, axis=1).drop('place_type', axis=1)

   transformed_df = added_df.groupby(['address', 'latitude_ori', 'longitude_ori', 'year', 'month', 'nbed', 'nbath', 'ncar', 'postcode', 'SA2_CODE', 'residence_type', 'dist_to_cbd_KM'], as_index=False)\
   .agg({'min_distance_to_park': 'first', 'min_distance_to_prim': 'first', 'min_distance_to_second': 'first', 'min_distance_to_train': 'first', 
   'min_distance_to_hosp': 'first', 'min_distance_to_poli': 'first', 'min_distance_to_shop': 'first', 'weekly_rent': 'mean'})\
   #.drop(['latitude_ori', 'longitude_ori'], axis=1)
   
   print(f"transformed df has shape {transformed_df.shape}")
   transformed_df.to_csv(f"../../data/curated/min_distance/{year}_min_distance.csv", index=False)

original min distance shape (44940, 21)
transformed df has shape (10939, 20)
original min distance shape (51511, 21)
transformed df has shape (12088, 20)
original min distance shape (53404, 21)
transformed df has shape (12490, 20)
original min distance shape (63627, 21)
transformed df has shape (14751, 20)
original min distance shape (71858, 21)
transformed df has shape (16706, 20)
original min distance shape (81522, 21)
transformed df has shape (19015, 20)
original min distance shape (89413, 21)
transformed df has shape (20851, 20)
original min distance shape (87081, 21)
transformed df has shape (20398, 20)
original min distance shape (105648, 21)
transformed df has shape (24945, 20)
original min distance shape (288794, 21)
transformed df has shape (68456, 20)


In [6]:
min_distance_df = pd.read_csv(f"../../data/distance/2020_min_distance.csv")
print(f"original min distance shape {min_distance_df.shape}")
house_df = min_distance_df[['address', 'latitude_ori', 'longitude_ori', 'nbed', 'nbath', 'ncar',
    'weekly_rent', 'postcode', 'year', 'month', 'residence_type',
    'SA2_CODE', 'dist_to_cbd_KM']]
mask = min_distance_df[['min_distance_to_place_KM', 'place_type']]

park_dist_df = mask[mask['place_type'] == 'park']

prim_dist_df = mask[mask['place_type'] == 'primary']

second_dist_df = mask[mask['place_type'] == 'secondary']

train_dist_df = mask[mask['place_type'] == 'station']

hosp_dist_df = mask[mask['place_type'] == 'hospital']

poli_dist_df = mask[mask['place_type'] == 'police']

shop_dist_df = mask[mask['place_type'] == 'shopping']

added_df = house_df.join(park_dist_df).rename({"min_distance_to_place_KM": "min_distance_to_park"}, axis=1).drop('place_type', axis=1)
added_df = added_df.join(prim_dist_df).rename({"min_distance_to_place_KM": "min_distance_to_prim"}, axis=1).drop('place_type', axis=1)
added_df = added_df.join(second_dist_df).rename({"min_distance_to_place_KM": "min_distance_to_second"}, axis=1).drop('place_type', axis=1)
added_df = added_df.join(train_dist_df).rename({"min_distance_to_place_KM": "min_distance_to_train"}, axis=1).drop('place_type', axis=1)
added_df = added_df.join(hosp_dist_df).rename({"min_distance_to_place_KM": "min_distance_to_hosp"}, axis=1).drop('place_type', axis=1)
added_df = added_df.join(poli_dist_df).rename({"min_distance_to_place_KM": "min_distance_to_poli"}, axis=1).drop('place_type', axis=1)
added_df = added_df.join(shop_dist_df).rename({"min_distance_to_place_KM": "min_distance_to_shop"}, axis=1).drop('place_type', axis=1)

transformed_df = added_df.groupby(['address', 'latitude_ori', 'longitude_ori', 'nbed', 'nbath', 'ncar', 'postcode', 'SA2_CODE', 'residence_type', 'dist_to_cbd_KM'], as_index=False)\
.agg({'min_distance_to_park': 'first', 'min_distance_to_prim': 'first', 'min_distance_to_second': 'first', 'min_distance_to_train': 'first', 
'min_distance_to_hosp': 'first', 'min_distance_to_poli': 'first', 'min_distance_to_shop': 'first', 'weekly_rent': 'mean'})\
.drop(['latitude_ori', 'longitude_ori'], axis=1)

print(f"transformed df has shape {transformed_df.shape}")
transformed_df.head(20)

original min distance shape (87081, 21)
transformed df has shape (15832, 16)


Unnamed: 0,address,nbed,nbath,ncar,postcode,SA2_CODE,residence_type,dist_to_cbd_KM,min_distance_to_park,min_distance_to_prim,min_distance_to_second,min_distance_to_train,min_distance_to_hosp,min_distance_to_poli,min_distance_to_shop,weekly_rent
0,". 'WILLOW COTTAGE' TARCOMBE RD, AVENEL",3.0,1.0,1,3664,204011058,House,126.1111,18.377,9.08981,,10.60099,,28.86936,,250.0
1,". 'WILLOW COTTAGE' TARCOMBE ROAD, AVENEL",3.0,1.0,1,3664,204011058,House,127.42434,19.69024,10.40304,,11.91423,,30.18259,,250.0
2,". UNDER APPLICATION ., DOLLAR",3.0,2.0,0,3871,205031087,House,158.61377,6.05915,6.98571,19.96922,15.76803,20.72416,15.85114,,370.0
3,". UNDER APPLICATION ., FAIRBANK",3.0,2.0,2,3951,205031090,House,135.618,10.3244,10.17627,9.78421,6.72449,10.80624,9.5112,,420.0
4,".275 SOLDIERS SETTLERS ROAD, TALLANGATTA VALLEY",3.0,1.0,2,3701,204031072,House,367.34544,12.32206,5.23339,15.68916,5.419,14.93642,14.76891,,280.0
5,".4 BURNETT STREET, YARRAGON",3.0,1.0,1,3823,205011078,House,117.01551,0.91479,0.60114,9.59293,0.6766,,0.57072,,300.0
6,"0 MANAGERS RESIDENCE, BOLINDA VALE, CLARKEFIELD",3.0,1.0,2,3430,210021235,House,50.62358,15.87879,0.88293,14.93019,0.85544,,15.97593,,330.0
7,"003/903 DANDENONG ROAD, MALVERN EAST",1.0,1.0,0,3145,208041195,Apartment,13.86135,0.9325,0.9188,2.494,2.208,,,3.96501,305.0
8,"005/903 DANDENONG ROAD, MALVERN EAST",1.0,1.0,0,3145,208041195,Apartment,13.86135,0.9325,0.9188,2.494,2.208,,,3.96501,270.0
9,"01/17 MACQUARIE STREET, PRAHRAN",1.0,1.0,0,3181,206061136,Apartment,6.76544,,0.87658,0.16912,0.65365,,1.34207,,260.0


In [7]:
min_distance_df.iloc[40:80, :]

Unnamed: 0,address,latitude_ori,longitude_ori,nbed,nbath,ncar,weekly_rent,postcode,year,month,...,SA2_CODE,place_type,dist_to_cbd_M,dist_to_cbd_KM,time_to_cbd_S,time_to_cbd_MIN,min_distance_to_place_M,min_distance_to_place_KM,min_time_to_place_S,min_time_to_place_MIN
40,"34 DEAKIN AVENUE, BAIRNSDALE",-37.814134,147.629506,4.0,2.0,2,400,3875,2020,1,...,205021081,police,282904.91,282.90491,13116.57,218.6095,4073.23,4.07323,441.69,7.3615
41,"34 DEAKIN AVENUE, BAIRNSDALE",-37.814134,147.629506,4.0,2.0,2,400,3875,2020,1,...,205021081,hospital,282904.91,282.90491,13116.57,218.6095,4138.97,4.13897,419.21,6.986833
42,"34 DEAKIN AVENUE, BAIRNSDALE",-37.814134,147.629506,4.0,2.0,2,400,3875,2020,1,...,205021081,secondary,282904.91,282.90491,13116.57,218.6095,3425.75,3.42575,364.73,6.078833
43,"34 DEAKIN AVENUE, BAIRNSDALE",-37.814134,147.629506,4.0,2.0,2,400,3875,2020,1,...,205021081,primary,282904.91,282.90491,13116.57,218.6095,252.73,0.25273,67.34,1.122333
44,"34 DEAKIN AVENUE, BAIRNSDALE",-37.814134,147.629506,4.0,2.0,2,400,3875,2020,1,...,205021081,station,282904.91,282.90491,13116.57,218.6095,3770.96,3.77096,407.01,6.7835
45,"34 DEAKIN AVENUE, BAIRNSDALE",-37.814134,147.629506,4.0,2.0,2,400,3875,2020,1,...,205021081,park,282904.91,282.90491,13116.57,218.6095,360.74,0.36074,86.57,1.442833
46,"15 STRABANE WAY, HAMPTON PARK",-38.041171,145.261854,3.0,2.0,2,1651,3976,2020,1,...,212031562,park,43543.86,43.54386,2507.85,41.7975,332.06,0.33206,74.51,1.241833
47,"4 JASON PLACE, HAMPTON PARK",-38.038203,145.253295,4.0,3.0,2,400,3976,2020,1,...,212031562,shopping,42799.21,42.79921,2446.86,40.781,1773.63,1.77363,326.93,5.448833
48,"15 STRABANE WAY, HAMPTON PARK",-38.041171,145.261854,3.0,2.0,2,1651,3976,2020,1,...,212031562,primary,43543.86,43.54386,2507.85,41.7975,967.13,0.96713,171.66,2.861
49,"15 STRABANE WAY, HAMPTON PARK",-38.041171,145.261854,3.0,2.0,2,1651,3976,2020,1,...,212031562,shopping,43543.86,43.54386,2507.85,41.7975,1483.89,1.48389,216.32,3.605333
