In [1]:
import numpy as np
import pandas as pd
import networkx as nx

import matplotlib.pyplot as plt

from utils import preprocess

In [2]:
# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

In [3]:
DATA_DIR = './data/'
PROCESSED_DATA_DIR = './processed_data/'

PROCESSED_PROPERTY_NUMBER = PROCESSED_DATA_DIR + 'processed_property_number.csv'

AUX_DATA_DIR = DATA_DIR + 'auxiliary-data/'
SUBZONE_FILE = AUX_DATA_DIR + 'sg-subzones.csv'
COMMERCIAL_CENTER_FILE = AUX_DATA_DIR + 'sg-commerical-centres.csv'
MRT_FILE = AUX_DATA_DIR + 'sg-mrt-stations.csv'
SHOPPING_MALL_FILE = AUX_DATA_DIR + 'sg-shopping-malls.csv'

MRT_DIS_FILE = PROCESSED_DATA_DIR + 'sg-mrt-stations.npy'

MRT_STATIONS_FILE = PROCESSED_DATA_DIR + 'mrt-stations.csv'
MRT_CONNECTIONS_FILE = PROCESSED_DATA_DIR + 'mrt-connections.csv'

In [4]:
df_property_number = pd.read_csv(PROCESSED_PROPERTY_NUMBER)
df_commercial_center = pd.read_csv(COMMERCIAL_CENTER_FILE)
df_mrt = pd.read_csv(MRT_FILE)
df_shopping_mall = pd.read_csv(SHOPPING_MALL_FILE)

In [5]:
property_number_arary = df_property_number['0'].to_numpy()
distance_mrt = np.load(MRT_DIS_FILE)
distance_mrt_array = distance_mrt.T

In [6]:
property_number_arary_repeated = np.repeat(property_number_arary[np.newaxis, :], distance_mrt_array.shape[0], axis=0)
property_number_matrix = np.where(distance_mrt_array < 1, property_number_arary_repeated, np.zeros_like(property_number_arary_repeated))
mrt_num_property = property_number_matrix.sum(axis=1)

Calculate distance to MRT station for commercial center and shopping mall

In [7]:
distance_commercial_center = preprocess.calculate_distance_km(df_mrt, df_commercial_center)

100%|██████████| 151/151 [00:01<00:00, 92.70it/s]


In [8]:
distance_shopping_mall = preprocess.calculate_distance_km(df_mrt, df_shopping_mall)

100%|██████████| 151/151 [00:06<00:00, 22.52it/s]


Count number of commercial centers and shopping malls

In [9]:
mrt_num_commercial_center = (distance_commercial_center < 1).sum(axis=1)
mrt_num_shopping_mall = (distance_shopping_mall < 1).sum(axis=1)

In [10]:
df_mrt['num_property'] = mrt_num_property
df_mrt['num_commercial_center'] = mrt_num_commercial_center
df_mrt['num_shopping_mall'] = mrt_num_shopping_mall
df_mrt_simplified = df_mrt[['name', 'num_property', 'num_commercial_center', 'num_shopping_mall']].copy()
df_mrt_simplified.drop_duplicates(inplace=True)

In [11]:
LIVING_MRT_STATIONS = df_mrt_simplified.sort_values('num_property', ascending=False)[:30]['name'].to_list()
WORKING_MRT_STATIONS = df_mrt_simplified[df_mrt_simplified['num_commercial_center'] > 0]['name'].to_list()
SHOPPING_MRT_STATIONS = df_mrt_simplified.sort_values('num_shopping_mall', ascending=False)[:30]['name'].to_list()

In [12]:
df_mrt_stations = pd.read_csv(MRT_STATIONS_FILE)
df_mrt = pd.read_csv(MRT_CONNECTIONS_FILE)

In [13]:
G_undirected = nx.Graph()

for idx, row in df_mrt.iterrows():
    G_undirected.add_edge(row['to'], row['from'])

In [14]:
WEEKDAY_BUSY_MRT_DICT = preprocess.get_betweenness(G_undirected, LIVING_MRT_STATIONS, WORKING_MRT_STATIONS)

100%|██████████| 30/30 [00:00<00:00, 1671.26it/s]


In [15]:
WEEKEND_BUSY_MRT_DICT = preprocess.get_betweenness(G_undirected, LIVING_MRT_STATIONS, SHOPPING_MRT_STATIONS)

100%|██████████| 30/30 [00:00<00:00, 2313.97it/s]


In [16]:
list(reversed(sorted(WEEKDAY_BUSY_MRT_DICT, key=WEEKDAY_BUSY_MRT_DICT.get)))[:10]

['little india',
 'dhoby ghaut',
 'newton',
 'chinatown',
 'botanic gardens',
 'bugis',
 'farrer road',
 'clarke quay',
 'stevens',
 'buona vista']

In [17]:
list(reversed(sorted(WEEKEND_BUSY_MRT_DICT, key=WEEKEND_BUSY_MRT_DICT.get)))[:10]

['dhoby ghaut',
 'little india',
 'newton',
 'chinatown',
 'clarke quay',
 'bugis',
 'botanic gardens',
 'stevens',
 'promenade',
 'fort canning']