#### Code reference https://www.xbyte.io/how-to-use-python-to-scrape-real-estate-website-data-using-web-scraping-and-making-data-wrangling.php

In [1]:
import pandas as pd


In [10]:
property = pd.read_csv('../../data/curated/API_clean_price_with_SA2_using_geolocation.csv').iloc[:,1:]
print(property.shape)
property.head(10)

(14694, 18)


Unnamed: 0,id,time,listing_type,price,property_type,area,building_area,land_area,bedrooms,bathrooms,carspaces,street_address,suburb,postcode,latitude,longitude,weekly_rent,SA2_CODE
0,16066003,2022-08-31T13:00:15,Rent,$320,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,0,"1007/238 Flinders St, Melbourne",MELBOURNE,3000,-37.817516,144.966492,320,206041503.0
1,16065974,2022-08-31T12:50:55,Rent,$700,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,2.0,2.0,0,"1211/260 Spencer Street, Melbourne",MELBOURNE,3000,-37.814564,144.952286,700,206041505.0
2,15302782,2022-08-31T10:12:30,Rent,$395_per_week,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,2.0,1.0,0,"504/350 La Trobe Street, Melbourne",MELBOURNE,3000,-37.81068,144.959274,395,206041504.0
3,16064379,2022-08-30T15:20:28,Rent,$370_per_week,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,0,"1106/555 Flinders Street, Melbourne",MELBOURNE,3000,-37.820972,144.956482,370,206041505.0
4,16064243,2022-08-30T14:50:32,Rent,$500_per_week,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,2.0,1.0,1,"2901/200 Spencer Street, Melbourne",MELBOURNE,3000,-37.816227,144.953247,500,206041505.0
5,16063884,2022-08-30T13:28:24,Rent,$2500_Per_Week,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,3.0,3.0,2,"5401/464 Collins Street, Melbourne",MELBOURNE,3000,-37.817562,144.958588,2500,206041505.0
6,16063772,2022-08-30T13:01:47,Rent,$450.00_per_week,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,0,"405/39 Queen Street, Melbourne",MELBOURNE,3000,-37.817886,144.961731,450,206041505.0
7,16063139,2022-08-30T10:15:45,Rent,$530,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,3.0,2.0,0,"625/118 Franklin Street, Melbourne",MELBOURNE,3000,-37.808205,144.958908,530,206041504.0
8,16062720,2022-08-29T17:38:39,Rent,$550_per_week,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,2.0,1.0,0,"1206/118 Russell Street, Melbourne",MELBOURNE,3000,-37.813587,144.968719,550,206041503.0
9,16062428,2022-08-29T16:32:47,Rent,$680,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,2.0,2.0,0,"1411/120 A Beckett St, Melbourne",MELBOURNE,3000,-37.809307,144.959732,680,206041504.0


In [11]:
import math
cleaned_house_df = property.dropna(subset=['weekly_rent'])
cleaned_house_df.reset_index(drop = True, inplace = True)
# radius of earth is 6378
r = 6378
dis_to_city = []
for i in range(len(cleaned_house_df)):
    
    lat1_n = math.radians(-37.818078)
    lat2 = math.radians(float(cleaned_house_df['latitude'][i]))
    
    lon1_n = math.radians(144.96681)
    lon2 = math.radians(float(cleaned_house_df['longitude'][i]))
    
    lon_diff_n = lon2 - lon1_n
    lat_diff_n = lat2 - lat1_n
    
    a_n = math.sin(lat_diff_n / 2)**2 + math.cos(lat1_n) * math.cos(lat2) * math.sin(lon_diff_n / 2)**2
    c_n = 2 * math.atan2(math.sqrt(a_n), math.sqrt(1 - a_n))
    
    dis_to_city.append(round(r*c_n, 4))
    
cleaned_house_df['distance_to_city'] = dis_to_city

In [13]:
cleaned_house_df.iloc[1000:1050, :]

Unnamed: 0,id,time,listing_type,price,property_type,area,building_area,land_area,bedrooms,bathrooms,carspaces,street_address,suburb,postcode,latitude,longitude,weekly_rent,SA2_CODE,distance_to_city
1000,16047502,2022-08-19T10:22:24,Rent,$450_per_week,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,1,"S2609/231 Harbour Esplanade, Docklands",DOCKLANDS,3008,-37.813576,144.943,450,206041118.0,2.153
1001,16046673,2022-08-18T16:18:11,Rent,$500_weekly,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,0,"1501/628 Flinders Street, Docklands",DOCKLANDS,3008,-37.82097,144.953247,500,206041118.0,1.2353
1002,16045684,2022-08-18T12:07:57,Rent,$620_weekly,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,2.0,2.0,1,"1610S/883 Collins Street, Docklands",DOCKLANDS,3008,-37.821213,144.9425,620,206041118.0,2.166
1003,16003855,2022-08-18T11:36:27,Rent,$700_per_week,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,2.0,2.0,1,"2703/100 Harbour Esplanade, Docklands",DOCKLANDS,3008,-37.818577,144.947372,700,206041118.0,1.7102
1004,16044098,2022-08-17T16:06:14,Rent,$650PW_$2824PCM,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,2.0,1.0,1,"1212N/889 Collins Street, Docklands",DOCKLANDS,3008,-37.821163,144.94194,650,206041118.0,2.2137
1005,16043764,2022-08-17T15:02:48,Rent,$400_per_week,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,0,"402E/888 Collins Street, Docklands",DOCKLANDS,3008,-37.82073,144.942261,400,206041118.0,2.1788
1006,16042012,2022-08-16T16:45:04,Rent,$550,House,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,0,"2009N/889 Collins Street, Docklands",DOCKLANDS,3008,-37.821163,144.94194,550,206041118.0,2.2137
1007,16039501,2022-08-15T15:53:03,Rent,$550_per_week,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,2.0,1.0,1,"S1501/231 Harbour Esplanade, Docklands",DOCKLANDS,3008,-37.813576,144.943,550,206041118.0,2.153
1008,16039206,2022-08-15T14:58:31,Rent,$400_Per_Week,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,1.0,1.0,0,"709/838 Bourke Street, Docklands",DOCKLANDS,3008,-37.819813,144.943268,400,206041118.0,2.0792
1009,16038423,2022-08-15T12:00:33,Rent,$550.00,ApartmentUnitFlat,Melbourne City Council - Greater Area,0.0,0.0,2.0,2.0,1,"910/673 La Trobe Street, Docklands",DOCKLANDS,3008,-37.813873,144.949051,550,206041118.0,1.6303
