In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os
import json
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.manifold import MDS
from sklearn.manifold import TSNE
from scipy.spatial import procrustes
from scipy.spatial.distance import jensenshannon

In [2]:
plt.style.use('ggplot')

data=pd.read_csv('combined_data_with_loc.csv')
#load feature category mapping
feature_category_dict=json.load(open('feature_category_dict.json'))
data.head()

Unnamed: 0,Community Name,Region,Map reference,Grid reference,Location,Population Density,Travel time to GPO (minutes),Distance to GPO (km),LGA,Primary Care Partnership,...,Travel time to nearest public hospital with emergency department,Distance to nearest public hospital with emergency department,Presentations to emergency departments due to injury,"Presentations to emergency departments due to injury, %",Category 4 & 5 emergency department presentations,"Category 4 & 5 emergency department presentations, %",geo_lat,geo_lon,cart_x,cart_y
0,Ascot Vale,Northern and Western Metropolitan,4,B3,6km NW of Melbourne,3758.623596,9.360142,6.958742,Moonee Valley (C),Inner North West Primary Care Partnership,...,6.630953,4.993841,679.257076,20.502456,1864.918123,56.290032,-37.775606,144.914871,-4.242641,4.242641
1,Braybrook,Northern and Western Metropolitan,4,A3,10km WNW of Melbourne,2025.468296,15.131666,11.595888,Maribyrnong (C),"HealthWest (b): Hobson's Bay, Maribyrnong, Wyn...",...,12.824977,10.161988,543.631989,20.647263,1683.966712,63.957428,-37.779315,144.858155,-9.238795,3.826834
2,Craigieburn,Northern and Western Metropolitan,2,A3,27km N of Melbourne,1034.97087,31.994666,43.100287,Hume (C),Hume-Whittlesea Primary Care Partnership,...,11.570855,15.213189,2044.424399,20.618005,5102.134434,51.454988,-37.570576,144.963028,0.0,27.0
3,Croydon,Eastern Metropolitan,2,B4,28km E of Melbourne,1730.06483,28.992647,34.071323,Maroondah (C),Outer East Primary Care Partnership,...,5.093285,3.601752,1754.954941,28.537825,3062.182462,49.795026,-37.813411,145.281013,28.0,1.714506e-15
4,Fawkner,Northern and Western Metropolitan,4,C1,12km N of Melbourne,2619.120089,17.405267,13.047142,Moreland (C),Inner North West Primary Care Partnership,...,11.510757,12.004044,680.401318,17.909865,1942.874353,51.141315,-37.705724,144.963028,0.0,12.0


In [10]:
hospital_features=feature_category_dict['Hospital']

hospital_data=data[hospital_features]

# compute the number of missing values in each column
missing_values=hospital_data.isnull().sum()

print('Number of missing values in each column')
# also print the type of each column

for i in range(len(missing_values)):
    print(f'{hospital_data.columns[i]}: {missing_values.iloc[i]} missing values ::: dtype: {hospital_data.dtypes.iloc[i]}')


hospital_data.head()

Number of missing values in each column
Public hospital separations, 2012-13: 0 missing values ::: dtype: float64
Nearest Public Hospital: 0 missing values ::: dtype: object
Travel time to nearest public hospital: 0 missing values ::: dtype: float64
Distance to nearest public hospital: 0 missing values ::: dtype: float64
Obstetric type separations, 2012-13: 0 missing values ::: dtype: object
Nearest public hospital with maternity services: 0 missing values ::: dtype: object
Time to nearest public hospital with maternity services: 0 missing values ::: dtype: float64
Distance to nearest public hospital with maternity services: 0 missing values ::: dtype: float64
Presentations to emergency departments, 2012-13: 0 missing values ::: dtype: float64
Nearest public hospital with emergency department: 0 missing values ::: dtype: object
Travel time to nearest public hospital with emergency department: 0 missing values ::: dtype: float64
Distance to nearest public hospital with emergency departm

Unnamed: 0,"Public hospital separations, 2012-13",Nearest Public Hospital,Travel time to nearest public hospital,Distance to nearest public hospital,"Obstetric type separations, 2012-13",Nearest public hospital with maternity services,Time to nearest public hospital with maternity services,Distance to nearest public hospital with maternity services,"Presentations to emergency departments, 2012-13",Nearest public hospital with emergency department,Travel time to nearest public hospital with emergency department,Distance to nearest public hospital with emergency department,Presentations to emergency departments due to injury,"Presentations to emergency departments due to injury, %",Category 4 & 5 emergency department presentations,"Category 4 & 5 emergency department presentations, %"
0,2701.557647,Western Hospital (Footscray),5.254528,3.876415,215.5550042481527,Royal Women's Hospital,6.490453,4.91257,3313.05218,Royal Melbourne Hospital,6.630953,4.993841,679.257076,20.502456,1864.918123,56.290032
1,2964.691696,Western Hospital (Footscray),5.028871,3.875731,189.98005347998108,Sunshine Hospital,8.071881,6.216803,2632.949379,Royal Melbourne Hospital,12.824977,10.161988,543.631989,20.647263,1683.966712,63.957428
2,9644.630863,Craigieburn Health Service,7.280902,7.277191,918.9149121263106,The Northern Hospital,11.570855,15.213189,9915.723721,The Northern Hospital,11.570855,15.213189,2044.424399,20.618005,5102.134434,51.454988
3,5781.347185,Maroondah Hospital,5.093285,3.601752,317.5160509300805,Yarra Ranges Health,10.683462,9.413847,6149.574954,Maroondah Hospital,5.093285,3.601752,1754.954941,28.537825,3062.182462,49.795026
4,4694.587633,Broadmeadows Health Service,8.910941,6.645261,287.0439496672183,The Northern Hospital,11.510757,12.004044,3799.03089,The Northern Hospital,11.510757,12.004044,680.401318,17.909865,1942.874353,51.141315


In [15]:
numerical_features=hospital_data.select_dtypes(include=[np.number]).columns
categorical_features=hospital_data.select_dtypes(include=[object]).columns

print('Numerical Features\n-----------------')
print(numerical_features)
print()
print('Categorical Features\n-----------------')
print(categorical_features)

Numerical Features
-----------------
Index(['Public hospital separations, 2012-13',
       'Travel time to nearest public hospital',
       'Distance to nearest public hospital',
       'Time to nearest public hospital with maternity services',
       'Distance to nearest public hospital with maternity services',
       'Presentations to emergency departments, 2012-13',
       'Travel time to nearest public hospital with emergency department',
       'Distance to nearest public hospital with emergency department',
       'Presentations to emergency departments due to injury',
       'Presentations to emergency departments due to injury, %',
       'Category 4 & 5 emergency department presentations',
       'Category 4 & 5 emergency department presentations, %'],
      dtype='object')

Categorical Features
-----------------
Index(['Nearest Public Hospital', 'Obstetric type separations, 2012-13',
       'Nearest public hospital with maternity services',
       'Nearest public hospital wi