In [72]:
# imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob # for file handling

import googlemaps
import time
import dotenv as dotenv
import csv
from math import radians, sin, cos, sqrt, atan2

### Section 1: Data concatenation, cleaning and preprocessing

In [32]:
# Preprocess initial .csv files from data.gov.sg
csv_files = glob.glob("./data/*.csv")
print(csv_files)
dataframes = [pd.read_csv(file) for file in csv_files]

for df in dataframes:
    df['month'] = pd.to_datetime(df['month'])

# check columns of each dataframe
for i, df in enumerate(dataframes):
    print(f"DataFrame {csv_files[i]} \nColumns: {df.columns}")

['./data\\Resale Flat Prices (Based on Approval Date), 1990 - 1999.csv', './data\\Resale Flat Prices (Based on Approval Date), 2000 - Feb 2012.csv', './data\\Resale Flat Prices (Based on Registration Date), From Jan 2015 to Dec 2016.csv', './data\\Resale Flat Prices (Based on Registration Date), From Mar 2012 to Dec 2014.csv', './data\\Resale flat prices based on registration date from Jan-2017 onwards.csv']
DataFrame ./data\Resale Flat Prices (Based on Approval Date), 1990 - 1999.csv 
Columns: Index(['month', 'town', 'flat_type', 'block', 'street_name', 'storey_range',
       'floor_area_sqm', 'flat_model', 'lease_commence_date', 'resale_price'],
      dtype='object')
DataFrame ./data\Resale Flat Prices (Based on Approval Date), 2000 - Feb 2012.csv 
Columns: Index(['month', 'town', 'flat_type', 'block', 'street_name', 'storey_range',
       'floor_area_sqm', 'flat_model', 'lease_commence_date', 'resale_price'],
      dtype='object')
DataFrame ./data\Resale Flat Prices (Based on Regist

### Decision to remove column 'remaining_lease' - that is onyl present for datasets post 2015
In order to maintain consistency between all datasets, we have decided to remove the remaining_lease column

In [None]:
# Remove 'remaining_lease' column from all dataframes
for df in dataframes:
    if 'remaining_lease' in df.columns:
        df.drop(columns=['remaining_lease'], inplace=True)

# Create new data.csv file with all data combined
combined_df = pd.concat(dataframes, ignore_index=True)
combined_df.sort_values(by='month', inplace=True)

# count nan values in each column
print(combined_df.isna().sum())
# combined_df.dropna(inplace=True) # Not required as no nan values in the dataset

# due to nature of data, namely: storey range, dosent make sense to drop duplicates
# duplicates = combined_df[combined_df.duplicated()]
# print(f"Number of duplicate rows: {duplicates.shape[0]}")

# # remove duplicates
# combined_df.drop_duplicates(inplace=True)

# # check for duplicates again
# duplicates = combined_df[combined_df.duplicated()]
# print(f"Number of duplicate rows after removal: {duplicates.shape[0]}")

# reset index
combined_df.reset_index(drop=True, inplace=True)

# final csv file
# combined_df.to_csv("data.csv", index=False)

month                  0
town                   0
flat_type              0
block                  0
street_name            0
storey_range           0
floor_area_sqm         0
flat_model             0
lease_commence_date    0
resale_price           0
dtype: int64


### Section 2: Feature Engineering: Calculating distance of HDB from MRT

In [52]:
# Using google maps api, get lat long coordinates of each location, add to dataframe

''' 
Rate Limit: 
3,000 QPM (queries per minute), calculated as the sum of client-side and server-side queries.

We have about 950k rows in our dataset, one way to solve would be to cache similar results
'''

# calculating number of similar blocks + street names to cache
count_pairs = combined_df.groupby(['block', 'street_name']).size().reset_index(name='counts')
print(count_pairs)

# drop 'counts' column
count_pairs.drop(columns=['counts'], inplace=True)

     block        street_name  counts
0        1           BEACH RD     147
1        1    BEDOK STH AVE 1     228
2        1       CHAI CHEE RD     129
3        1  CHANGI VILLAGE RD      35
4        1          DELTA AVE      89
...    ...                ...     ...
9885   99A    LOR 2 TOA PAYOH      50
9886   99B    LOR 2 TOA PAYOH      43
9887   99C    LOR 2 TOA PAYOH      52
9888    9A      BOON TIONG RD      32
9889    9B      BOON TIONG RD      26

[9890 rows x 3 columns]


##### Since we have only 10k unique combinations, we can just add a sleep (hopefully i dont go broke)

In [56]:
gmaps = googlemaps.Client(key=dotenv.get_key('.env', 'GOOGLE_MAPS_API_KEY'))

geocode_cache = {}

# test api call so i dont go broke
response = gmaps.geocode("10 Yishun Ave 9, Singapore")
print(response)
print(response[0]['geometry']['location'])


[{'address_components': [{'long_name': '10', 'short_name': '10', 'types': ['street_number']}, {'long_name': 'Yishun Avenue 9', 'short_name': 'Yishun Ave 9', 'types': ['route']}, {'long_name': 'Yishun', 'short_name': 'Yishun', 'types': ['neighborhood', 'political']}, {'long_name': 'Singapore', 'short_name': 'Singapore', 'types': ['locality', 'political']}, {'long_name': 'Singapore', 'short_name': 'SG', 'types': ['country', 'political']}, {'long_name': '768888', 'short_name': '768888', 'types': ['postal_code']}], 'formatted_address': '10 Yishun Ave 9, Singapore 768888', 'geometry': {'bounds': {'northeast': {'lat': 1.4318163, 'lng': 103.8400468}, 'southwest': {'lat': 1.43143, 'lng': 103.8396539}}, 'location': {'lat': 1.4316347, 'lng': 103.8398656}, 'location_type': 'ROOFTOP', 'viewport': {'northeast': {'lat': 1.432972130291502, 'lng': 103.8411993302915}, 'southwest': {'lat': 1.430274169708498, 'lng': 103.8385013697085}}}, 'navigation_points': [{'location': {'latitude': 1.4317223, 'longitu

In [57]:
def geocode_address(block, street):
    address = f"Block {block}, {street}, Singapore"
    try:
        geocode_result = gmaps.geocode(address)
        if geocode_result:
            location = geocode_result[0]['geometry']['location']
            return location['lat'], location['lng']
    except Exception as e:
        print(f"Error geocoding {address}: {e}")
    return None, None

# Loop through the DataFrame and geocode each unique address
count = 0
for idx, row in count_pairs.iterrows():
    block = row['block']
    street = row['street_name']
    
    # Skip if already cached
    if (block, street) in geocode_cache:
        continue
    
    # Call Geocoding API
    lat, lng = geocode_address(block, street)
    geocode_cache[(block, street)] = (lat, lng)
    print(f"Geocoded {block}, {street}: {lat}, {lng}")
    
    # Sleep every 2500 requests for 90s
    count += 1
    if count % 2500 == 0:
        print(f"Processed {count} addresses. Sleeping 90s for API limit")
        time.sleep(90)

# Convert cache dictionary to a DataFrame
cache_df = pd.DataFrame(
    [(k[0], k[1], v[0], v[1]) for k, v in geocode_cache.items()],
    columns=['block', 'street_name', 'latitude', 'longitude']
)

df_merged = combined_df.merge(cache_df, on=['block', 'street_name'], how='left')

# Save the result
df_merged.to_csv("data_with_latlong.csv", index=False)

Geocoded 1, BEACH RD: 1.3035543, 103.8646977
Geocoded 1, BEDOK STH AVE 1: 1.3206048, 103.9331592
Geocoded 1, CHAI CHEE RD: 1.3280646, 103.9225418
Geocoded 1, CHANGI VILLAGE RD: 1.3879674, 103.9876107
Geocoded 1, DELTA AVE: 1.2920784, 103.8285835
Geocoded 1, DOVER RD: 1.3025459, 103.7832746
Geocoded 1, EAST COAST RD: 1.3117001, 103.9229924
Geocoded 1, EVERTON PK: 1.2771984, 103.8402622
Geocoded 1, GHIM MOH RD: 1.312946, 103.7868692
Geocoded 1, HAIG RD: 1.3112025, 103.8976789
Geocoded 1, HOUGANG AVE 3: 1.3641336, 103.8929246
Geocoded 1, JLN BT MERAH: 1.2864913, 103.8083538
Geocoded 1, JLN PASAR BARU: 1.352083, 103.819836
Geocoded 1, KG KAYU RD: 1.3036022, 103.8838141
Geocoded 1, LOR 7 TOA PAYOH: 1.3395332, 103.8533995
Geocoded 1, LOR LEW LIAN: 1.3508705, 103.8753518
Geocoded 1, MARINE TER: 1.3048365, 103.918528
Geocoded 1, MARSILING DR: 1.4406299, 103.7757377
Geocoded 1, PINE CL: 1.3080453, 103.8833062
Geocoded 1, QUEEN'S RD: 1.3173498, 103.8068191
Geocoded 1, REDHILL CL: 1.2843244, 103.

In [62]:
# Create MRT station .csv from: https://mrtmapsingapore.com/mrt-stations-singapore/

data = """NS1 EW24	Jurong East	Red	North South
NS2	Bukit Batok	Red	North South
NS3	Bukit Gombak	Red	North South
NS4 BP1	Choa Chu Kang	Red	North South
NS5	Yew Tee	Red	North South
NS7	Kranji	Red	North South
NS	Marsiling	Red	North South
NS9 TE2	Woodlands	Red	North South
NS10	Admiralty	Red	North South
NS11	Sembawang	Red	North South
NS12	Canberra	Red	North South
NS13	Yishun	Red	North South
NS14	Khatib	Red	North South
NS15	Yio Chu Kang	Red	North South
NS16	Ang Mo Kio	Red	North South
NS17 CC15	Bishan	Red	North South
NS18	Braddell	Red	North South
NS19	Toa Payoh	Red	North South
NS20	Novena	Red	North South
NS21 DT11	Newton	Red	North South
NS22	Orchard	Red	North South
NS23	Somerset	Red	North South
NS24 NE6 CC1	Dhoby Ghaut	Red	North South
NS25 EW13	City Hall	Red	North South
NS26 EW14	Raffles Place	Red	North South
NS27 CE2	Marina Bay	Red	North South
NS28	Marina South Pier	Red	North South
EW1	Pasir Ris	Green	East West
EW2 DT32	Tampines	Green	East West
EW3	Simei	Green	East West
EW4 CG	Tanah Merah	Green	East West
EW5	Bedok	Green	East West
EW6	Kembangan	Green	East West
EW7	Eunos	Green	East West
EW8 CC9	Paya Lebar	Green	East West
EW9	Aljunied	Green	East West
EW10	Kallang	Green	East West
EW11	Lavender	Green	East West
EW12 DT14	Bugis	Green	East West
EW13 NS25	City Hall	Green	East West
EW14 NS26	Raffles Place	Green	East West
EW15	Tanjong Pagar	Green	East West
EW16 NE3	Outram Park	Green	East West
EW17	Tiong Bahru	Green	East West
EW18	Redhill	Green	East West
EW19	Queenstown	Green	East West
EW20	Commonwealth	Green	East West
EW21 CC22	Buona Vista	Green	East West
EW22	Dover	Green	East West
EW23	Clementi	Green	East West
EW24 NS1	Jurong East	Green	East West
EW25	Chinese Garden	Green	East West
EW26	Lakeside	Green	East West
EW27	Boon Lay	Green	East West
EW28	Pioneer	Green	East West
EW29	Joo Koon	Green	East West
EW30	Gul Circle	Green	East West
EW31	Tuas Crescent	Green	East West
EW32	Tuas West Road	Green	East West
EW33	Tuas Link	Green	East West
CG1 DT35	Expo	Green	Changi Airport Branch
CG2	Changi Airport	Green	Changi Airport Branch
NE1 CC29	HarbourFront	Purple	North East
NE3 EW16	Outram Park	Purple	North East
NE4 DT19	Chinatown	Purple	North East
NE5	Clarke Quay	Purple	North East
NE6 NS24 CC1	Dhoby Ghaut	Purple	North East
NE7 DT12	Little India	Purple	North East
NE8	Farrer Park	Purple	North East
NE9	Boon Keng	Purple	North East
NE10	Potong Pasir	Purple	North East
NE11	Woodleigh	Purple	North East
NE12 CC13	Serangoon	Purple	North East
NE13	Kovan	Purple	North East
NE14	Hougang	Purple	North East
NE15	Buangkok	Purple	North East
NE16 STC	Sengkang	Purple	North East
NE17 PTC	Punggol	Purple	North East
CC1 NS24 NE6	Dhoby Ghaut	Orange	Circle
CC2	Bras Basah	Orange	Circle
CC3	Esplanade	Orange	Circle
CC4 DT15	Promenade	Orange	Circle
CC5	Nicoll Highway	Orange	Circle
CC6	Stadium	Orange	Circle
CC7	Mountbatten	Orange	Circle
CC8	Dakota	Orange	Circle
CC9 EW8	Paya Lebar	Orange	Circle
CC10 DT26	MacPherson	Orange	Circle
CC11	Tai Seng	Orange	Circle
CC12	Bartley	Orange	Circle
CC13 NE12	Serangoon	Orange	Circle
CC14	Lorong Chuan	Orange	Circle
CC15 NS17	Bishan	Orange	Circle
CC16	Marymount	Orange	Circle
CC17	Caldecott	Orange	Circle
CC19 DT9	Botanic Gardens	Orange	Circle
CC20	Farrer Road	Orange	Circle
CC21	Holland Village	Orange	Circle
CC22 EW21	Buona Vista	Orange	Circle
CC23	one-north	Orange	Circle
CC24	Kent Ridge	Orange	Circle
CC25	Haw Par Villa	Orange	Circle
CC26	Pasir Panjang	Orange	Circle
CC27	Labrador Park	Orange	Circle
CC28	Telok Blangah	Orange	Circle
CC29 NE1	HarbourFront	Orange	Circle
CE1 DT16	Bayfront	Orange	Circle Extension
CE2 NS27	Marina Bay	Orange	Circle Extension
DT1 BP6	Bukit Panjang	Blue	Downtown
DT2	Cashew	Blue	Downtown
DT3	Hillview	Blue	Downtown
DT5	Beauty World	Blue	Downtown
DT6	King Albert Park	Blue	Downtown
DT7	Sixth Avenue	Blue	Downtown
DT8	Tan Kah Kee	Blue	Downtown
DT9 CC19	Botanic Gardens	Blue	Downtown
DT10	Stevens	Blue	Downtown
DT11 NS21	Newton	Blue	Downtown
DT12 NE7	Little India	Blue	Downtown
DT13	Rochor	Blue	Downtown
DT14 EW12	Bugis	Blue	Downtown
DT15 CC4	Promenade	Blue	Downtown
DT16 CE1	Bayfront	Blue	Downtown
DT17	Downtown	Blue	Downtown
DT18	Telok Ayer	Blue	Downtown
DT19 NE4	Chinatown	Blue	Downtown
DT20	Fort Canning	Blue	Downtown
DT21	Bencoolen	Blue	Downtown
DT22	Jalan Besar	Blue	Downtown
DT23	Bendemeer	Blue	Downtown
DT24	Geylang Bahru	Blue	Downtown
DT25	Mattar	Blue	Downtown
DT26 CC10	MacPherson	Blue	Downtown
DT27	Ubi	Blue	Downtown
DT28	Kaki Bukit	Blue	Downtown
DT29	Bedok North	Blue	Downtown
DT30	Bedok Reservoir	Blue	Downtown
DT31	Tampines West	Blue	Downtown
DT32 EW2	Tampines	Blue	Downtown
DT33	Tampines East	Blue	Downtown
DT34	Upper Changi	Blue	Downtown
DT35 CG1	Expo	Blue	Downtown
TE1 RTS	Woodlands North	Brown	Thomson–East Coast
TE2 NS9	Woodlands	Brown	Thomson–East Coast
TE3	Woodlands South	Brown	Thomson–East Coast
TE4	Springleaf	Brown	Thomson–East Coast
TE5	Lentor	Brown	Thomson–East Coast
TE6	Mayflower	Brown	Thomson–East Coast
TE7	Bright Hill	Brown	Thomson–East Coast
TE8	Upper Thomson	Brown	Thomson–East Coast
TE9 CC17	Caldecott	Brown	Thomson–East Coast
TE11 DT10	Stevens	Brown	Thomson–East Coast
TE12	Napier	Brown	Thomson–East Coast
TE13	Orchard Boulevard	Brown	Thomson–East Coast
TE14 NS22	Orchard	Brown	Thomson–East Coast
TE15	Great World	Brown	Thomson–East Coast
TE16	Havelock	Brown	Thomson–East Coast
TE17 EW16 NE3	Outram Park	Brown	Thomson–East Coast
TE18	Maxwell	Brown	Thomson–East Coast
TE19	Shenton Way	Brown	Thomson–East Coast
TE20 NS27 CE2	Marina Bay	Brown	Thomson–East Coast
TE22	Gardens by the Bay	Brown	Thomson–East Coast
TE23	Tanjong Rhu	Brown	Thomson–East Coast
TE24	Katong Park	Brown	Thomson–East Coast
TE25	Tanjong Katong	Brown	Thomson–East Coast
TE26	Marine Parade	Brown	Thomson–East Coast
TE27	Marine Terrace	Brown	Thomson–East Coast
TE28	Siglap	Brown	Thomson–East Coast
TE29	Bayshore	Brown	Thomson–East Coast"""

lines = data.splitlines()

with open("stations.csv", "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["station", "town", "color", "line"])
    for line in lines:
        row = line.split("\t")
        writer.writerow(row)

print("CSV file 'stations.csv' created successfully.")

CSV file 'stations.csv' created successfully.


In [None]:
df_mrt = pd.read_csv("stations.csv", encoding="cp1252") # fix for UTF-8 encoding issue cause of en-dash (–)

# Add latitude and longitude to the MRT stations
def geocode_mrt(station, town):
    address = f"{town} MRT Station, {station}, Singapore"
    try:
        geocode_result = gmaps.geocode(address)
        if geocode_result:
            location = geocode_result[0]['geometry']['location']
            return location['lat'], location['lng']
    except Exception as e:
        print(f"Error geocoding {address}: {e}")
    return None, None

# Loop through the DataFrame and geocode each unique MRT station, dont need cache as we have only 100 stations
count = 0
for idx, row in df_mrt.iterrows():
    station = row['station']
    town = row['town']
    
    # Skip if already cached
    if (station, town) in geocode_cache:
        continue
    
    # Call Geocoding API
    lat, lng = geocode_mrt(station, town)
    df_mrt.at[idx, 'latitude'] = lat
    df_mrt.at[idx, 'longitude'] = lng
    print(f"Geocoded {station}, {town}: {lat}, {lng}")

# to csv
df_mrt.to_csv("stations_with_latlong.csv", index=False)
print("CSV file 'stations_with_latlong.csv' created successfully.")


Geocoded NS1 EW24, Jurong East: 1.3335109, 103.742022
Geocoded NS2, Bukit Batok: 1.3490579, 103.7496251
Geocoded NS3, Bukit Gombak: 1.3592405, 103.7518398
Geocoded NS4 BP1, Choa Chu Kang: 1.3852487, 103.7443487
Geocoded NS5, Yew Tee: 1.3973666, 103.7474715
Geocoded NS7, Kranji: 1.425249, 103.762
Geocoded NS, Marsiling: 1.43251, 103.77407
Geocoded NS9 TE2, Woodlands: 1.4369162, 103.7864807
Geocoded NS10, Admiralty: 1.4405962, 103.8009702
Geocoded NS11, Sembawang: 1.4490229, 103.8198723
Geocoded NS12, Canberra: 1.443283, 103.829575
Geocoded NS13, Yishun: 1.429387, 103.8350907
Geocoded NS14, Khatib: 1.4172783, 103.8329928
Geocoded NS15, Yio Chu Kang: 1.3818261, 103.8449286
Geocoded NS16, Ang Mo Kio: 1.3695397, 103.8498062
Geocoded NS17 CC15, Bishan: 1.3507781, 103.8487215
Geocoded NS18, Braddell: 1.340703, 103.84668
Geocoded NS19, Toa Payoh: 1.3326718, 103.8473498
Geocoded NS20, Novena: 1.3202681, 103.8435124
Geocoded NS21 DT11, Newton: 1.3125093, 103.8379519
Geocoded NS22, Orchard: 1.303

#### Method to calculate average distance of HDB to nearest MRT station

In [67]:
# hdb lat long
df_merged.head(10)

Unnamed: 0,month,town,flat_type,block,street_name,storey_range,floor_area_sqm,flat_model,lease_commence_date,resale_price,latitude,longitude
0,1990-01-01,ANG MO KIO,1 ROOM,309,ANG MO KIO AVE 1,10 TO 12,31.0,IMPROVED,1977,9000.0,1.364329,103.84411
1,1990-01-01,KALLANG/WHAMPOA,3 ROOM,44,BENDEMEER RD,04 TO 06,63.0,STANDARD,1981,31400.0,1.322221,103.866576
2,1990-01-01,KALLANG/WHAMPOA,3 ROOM,20,ST. GEORGE'S RD,04 TO 06,67.0,NEW GENERATION,1984,66500.0,1.324647,103.863076
3,1990-01-01,KALLANG/WHAMPOA,3 ROOM,14,KG ARANG RD,04 TO 06,103.0,NEW GENERATION,1984,77000.0,1.300282,103.884178
4,1990-01-01,KALLANG/WHAMPOA,3 ROOM,46,OWEN RD,01 TO 03,68.0,NEW GENERATION,1982,58000.0,1.315145,103.850806
5,1990-01-01,KALLANG/WHAMPOA,3 ROOM,49,DORSET RD,04 TO 06,68.0,NEW GENERATION,1979,52000.0,1.312795,103.850025
6,1990-01-01,KALLANG/WHAMPOA,3 ROOM,95,GEYLANG BAHRU,13 TO 15,68.0,NEW GENERATION,1979,61000.0,1.322874,103.870867
7,1990-01-01,KALLANG/WHAMPOA,3 ROOM,94,GEYLANG BAHRU,04 TO 06,68.0,NEW GENERATION,1979,61000.0,1.322688,103.871334
8,1990-01-01,KALLANG/WHAMPOA,3 ROOM,16,UPP BOON KENG RD,04 TO 06,65.0,IMPROVED,1975,42000.0,1.315592,103.871855
9,1990-01-01,KALLANG/WHAMPOA,3 ROOM,68,GEYLANG BAHRU,16 TO 18,65.0,IMPROVED,1974,45700.0,1.322333,103.870043


In [68]:
# mrt station lat long
df_mrt.head(10)

Unnamed: 0,station,town,color,line,latitude,longitude
0,NS1 EW24,Jurong East,Red,North South,1.333511,103.742022
1,NS2,Bukit Batok,Red,North South,1.349058,103.749625
2,NS3,Bukit Gombak,Red,North South,1.359241,103.75184
3,NS4 BP1,Choa Chu Kang,Red,North South,1.385249,103.744349
4,NS5,Yew Tee,Red,North South,1.397367,103.747472
5,NS7,Kranji,Red,North South,1.425249,103.762
6,NS,Marsiling,Red,North South,1.43251,103.77407
7,NS9 TE2,Woodlands,Red,North South,1.436916,103.786481
8,NS10,Admiralty,Red,North South,1.440596,103.80097
9,NS11,Sembawang,Red,North South,1.449023,103.819872


In [74]:
# Haversine formula to compute distance between two (lat, lon) points in km
def haversine(lat1, lon1, lat2, lon2):
    R = 6371.0
    dlat = radians(lat2 - lat1)
    dlon = radians(lon2 - lon1)
    a = sin(dlat/2)**2 + cos(radians(lat1)) * cos(radians(lat2)) * sin(dlon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    return R * c

def find_nearest_station(hdb_row):
    hdb_lat = hdb_row['latitude']
    hdb_lon = hdb_row['longitude']
    
    distances = df_mrt.apply(
        lambda row: haversine(hdb_lat, hdb_lon, row['latitude'], row['longitude']),
        axis=1
    )

    # Find the index of the MRT station with the minimum distance
    min_index = distances.idxmin()
    min_distance = distances[min_index]
    
    # Get the station ID and station name and format as 'NS2 Bukit Batok MRT'
    station_id = df_mrt.loc[min_index, 'station']
    station_name = df_mrt.loc[min_index, 'town']
    nearest_str = f"{station_id} {station_name} MRT"
    
    return pd.Series([nearest_str, min_distance])

# Calculate nearest MRT station + distance for each row in df_merged (HDB dataset)
df_merged[['nearest_mrt', 'distance_km']] = df_merged.apply(find_nearest_station, axis=1)
df_merged['distance_km'] = df_merged['distance_km'].round(4)

# Output final csv file
df_merged.to_csv("data_with_nearest_mrt.csv", index=False)
print("CSV file 'data_with_nearest_mrt.csv' created successfully.")

CSV file 'data_with_nearest_mrt.csv' created successfully.
