In [16]:
import pandas as pd
import re
import numpy as np

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)


In [17]:
# import proximity data
list = [i for i in range(13,41)]
df = pd.read_csv('PMD-en.csv', usecols=([0,1,4,5,6,7]+list))
# df.info()

## Clean proximity data

In [18]:
# drop in_db columns
list_indb = ['in_db_emp', 'in_db_pharma', 'in_db_childcare', 'in_db_health', 'in_db_grocery', 'in_db_educpri', 'in_db_educsec', 'in_db_lib', 'in_db_parks', 'in_db_transit']
df.drop(list_indb , axis = 1, inplace = True)

# filter for data from Ontario
mask = df['PRNAME'] == 'Ontario'
df_on = df[mask]

# remove rows with F
df_on2 = df_on[df_on['prox_idx_emp'] != 'F']

# remove commas from population columns
list_pop = ['DBPOP', 'CSDPOP', 'PRPOP']
for each in list_pop:
    df_on2[each] = df_on2[each].str.replace(',', '')

# replace .. entries from proximity data columns with nan values
list_prox = ['prox_idx_emp', 'prox_idx_pharma', 'prox_idx_childcare',
             'prox_idx_health', 'prox_idx_grocery', 'prox_idx_educpri',
             'prox_idx_educsec', 'prox_idx_lib', 'prox_idx_parks', 
             'prox_idx_transit']
for each in list_prox:
    df_on2[each] = df_on2[each].replace({"..":np.nan})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [19]:
# float(np.nan)
# new = df_on2["prox_idx_emp"].replace({"..":np.nan})
# new.astype(float)
# new.value_counts()
# new.str.contains('F', regex=False, na=False)

In [20]:
df_on2.info()

# convert UID columns to strings
df_on2[['DBUID', 'CSDUID']] = df_on2[['DBUID', 'CSDUID']].astype(str)
print(df_on2.dtypes) # check datatypes

# export the proximity data for use in QGIS
df_on2.to_csv(r'prox_ontario_nan.csv', index = False)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 132711 entries, 148270 to 281483
Data columns (total 24 columns):
DBUID                 132711 non-null int64
DBPOP                 132602 non-null object
CSDUID                132711 non-null int64
CSDNAME               132711 non-null object
CSDTYPE               132711 non-null object
CSDPOP                132602 non-null object
PRUID                 132711 non-null int64
PRNAME                132711 non-null object
PRPOP                 132711 non-null object
lon                   132711 non-null float64
lat                   132711 non-null float64
prox_idx_emp          129867 non-null object
prox_idx_pharma       65600 non-null object
prox_idx_childcare    77800 non-null object
prox_idx_health       101047 non-null object
prox_idx_grocery      47048 non-null object
prox_idx_educpri      75944 non-null object
prox_idx_educsec      36585 non-null object
prox_idx_lib          38064 non-null object
prox_idx_parks        80439 non-null

## Aggregate the proximity and amenity data to PHU level

In [21]:
# Import PHU assignments for each db
db_phu = pd.read_csv('Joined_DB_to_PHU.csv', usecols=([1,28,29]))
# db_phu.head()
# db_phu.info()

df_on2['DBUID'] = df_on2['DBUID'].astype(int)

# Join to proximity data
merged = pd.merge(left=df_on2, right=db_phu, how='left', left_on='DBUID', right_on='DBUID')
# merged.head(5)
merged.info()

# merged[['HR_UID', 'ENG_LABEL']].nunique()

# convert population columns to integer types
merged[['DBPOP', 'CSDPOP', 'PRPOP']] = merged[['DBPOP', 'CSDPOP', 'PRPOP']].apply(pd.to_numeric) 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


<class 'pandas.core.frame.DataFrame'>
Int64Index: 132711 entries, 0 to 132710
Data columns (total 26 columns):
DBUID                 132711 non-null int64
DBPOP                 132602 non-null object
CSDUID                132711 non-null object
CSDNAME               132711 non-null object
CSDTYPE               132711 non-null object
CSDPOP                132602 non-null object
PRUID                 132711 non-null int64
PRNAME                132711 non-null object
PRPOP                 132711 non-null object
lon                   132711 non-null float64
lat                   132711 non-null float64
prox_idx_emp          129867 non-null object
prox_idx_pharma       65600 non-null object
prox_idx_childcare    77800 non-null object
prox_idx_health       101047 non-null object
prox_idx_grocery      47048 non-null object
prox_idx_educpri      75944 non-null object
prox_idx_educsec      36585 non-null object
prox_idx_lib          38064 non-null object
prox_idx_parks        80439 non-null obj

In [22]:
# Create a new column which contains the PHU population number for that db
merged['HRPOP'] = merged.groupby('HR_UID')['DBPOP'].transform('sum')

# Create a new column which is the ratio of the DBPOP/HRPOP
merged['DBHRPOP'] = merged['DBPOP']/merged['HRPOP']

# # Inspect the results
# merged.info()
# merged['HRPOP'].value_counts()
# merged['HRPOP'].describe()
# # Merged['DBPOP'].value_counts() # 19599 zeros in DBPOP
# merged['DBPOP'].describe()
# # Merged['DBHRPOP'].value_counts() # # 19599 zeros in DBHRPOP
# merged['DBHRPOP'].describe()


In [23]:
# Create a list containing prox measures column names
list_prox = ['prox_idx_emp', 'prox_idx_pharma', 'prox_idx_childcare',
             'prox_idx_health', 'prox_idx_grocery', 'prox_idx_educpri',
             'prox_idx_educsec', 'prox_idx_lib', 'prox_idx_parks', 
             'prox_idx_transit']

# Convert proximity data columns to float type
merged[list_prox] = merged[list_prox].astype(float)

# Create new wieghted proximity columns for each observation (ie. DBHRPOP * prox_)
for each in list_prox:
    merged[each + '_wt'] = merged[each] * merged['DBHRPOP']
    
# Create a list with weighted prox measures column names
list_prox_wt = [item + '_wt' for item in list_prox]
print(list_prox_wt)
    
# merged.info()
# list_prox_wt

# Create a new PHU level df where weighted proximity values are summed
phu_prox_wt = merged.groupby(['HR_UID', 'ENG_LABEL'])[list_prox_wt].sum()
# phu_prox_wt.info()
# phu_prox_wt.head()

['prox_idx_emp_wt', 'prox_idx_pharma_wt', 'prox_idx_childcare_wt', 'prox_idx_health_wt', 'prox_idx_grocery_wt', 'prox_idx_educpri_wt', 'prox_idx_educsec_wt', 'prox_idx_lib_wt', 'prox_idx_parks_wt', 'prox_idx_transit_wt']


In [24]:
# Create df with median proximity values for each PHU, also the population for each PHU
list_prox2 = ['prox_idx_emp', 'prox_idx_pharma', 'prox_idx_childcare',
             'prox_idx_health', 'prox_idx_grocery', 'prox_idx_educpri',
             'prox_idx_educsec', 'prox_idx_lib', 'prox_idx_parks', 
             'prox_idx_transit', 'HRPOP']
phu_prox_med = merged.groupby(['HR_UID', 'ENG_LABEL'])[list_prox2].median()
phu_prox_med.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,prox_idx_emp,prox_idx_pharma,prox_idx_childcare,prox_idx_health,prox_idx_grocery,prox_idx_educpri,prox_idx_educsec,prox_idx_lib,prox_idx_parks,prox_idx_transit,HRPOP
HR_UID,ENG_LABEL,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3526,The District of Algoma Health Unit,0.0025,0.0208,0.0144,0.0033,0.0346,0.0794,0.0966,0.0834,0.0278,0.0099,112847.0
3527,Brant County Health Unit,0.0163,0.02285,0.0206,0.0046,0.0417,0.1004,0.049,0.0714,0.0445,0.0004,139668.0
3530,Durham Regional Health Unit,0.0257,0.0193,0.0316,0.0075,0.0359,0.0882,0.05,0.0693,0.04005,0.005,655838.0
3531,Elgin-St. Thomas Health Unit,0.0023,0.02685,0.0122,0.0039,0.0337,0.0569,0.05175,0.0824,0.026,,91348.0
3533,Grey Bruce Health Unit,0.0009,0.01625,0.02275,0.0036,0.0373,0.0582,0.0463,0.091,0.0315,,165266.0


In [25]:
# Create a df with average amenity dense measure by PHU
merged['amenity_dense'] = merged['amenity_dense'].astype(int)
phu_amenity = merged.groupby(['HR_UID', 'ENG_LABEL'])['amenity_dense'].mean()
phu_amenity = phu_amenity.to_frame()

In [26]:
# Merge dfs with PHU level proximity data
phu_prox_full = phu_amenity.merge(phu_prox_wt, how='left', 
                                  left_on=['HR_UID', 'ENG_LABEL'], 
                                  right_on=['HR_UID', 'ENG_LABEL']).merge(phu_prox_med, 
                                                                          how='left', 
                                                                          left_on=['HR_UID', 'ENG_LABEL'], 
                                                                          right_on=['HR_UID', 'ENG_LABEL'])
phu_prox_full = phu_prox_full.reset_index()
phu_prox_full.info()
phu_prox_full.head()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 24 columns):
HR_UID                   36 non-null int64
ENG_LABEL                36 non-null object
amenity_dense            36 non-null float64
prox_idx_emp_wt          36 non-null float64
prox_idx_pharma_wt       36 non-null float64
prox_idx_childcare_wt    36 non-null float64
prox_idx_health_wt       36 non-null float64
prox_idx_grocery_wt      36 non-null float64
prox_idx_educpri_wt      36 non-null float64
prox_idx_educsec_wt      36 non-null float64
prox_idx_lib_wt          36 non-null float64
prox_idx_parks_wt        36 non-null float64
prox_idx_transit_wt      36 non-null float64
prox_idx_emp             36 non-null float64
prox_idx_pharma          36 non-null float64
prox_idx_childcare       36 non-null float64
prox_idx_health          36 non-null float64
prox_idx_grocery         36 non-null float64
prox_idx_educpri         36 non-null float64
prox_idx_educsec         36 non-null float64


Unnamed: 0,HR_UID,ENG_LABEL,amenity_dense,prox_idx_emp_wt,prox_idx_pharma_wt,prox_idx_childcare_wt,prox_idx_health_wt,prox_idx_grocery_wt,prox_idx_educpri_wt,prox_idx_educsec_wt,prox_idx_lib_wt,prox_idx_parks_wt,prox_idx_transit_wt,prox_idx_emp,prox_idx_pharma,prox_idx_childcare,prox_idx_health,prox_idx_grocery,prox_idx_educpri,prox_idx_educsec,prox_idx_lib,prox_idx_parks,prox_idx_transit,HRPOP
0,3526,The District of Algoma Health Unit,0.012766,0.00903,0.010033,0.01192,0.004486,0.01453,0.053543,0.027935,0.01675,0.017129,0.005378,0.0025,0.0208,0.0144,0.0033,0.0346,0.0794,0.0966,0.0834,0.0278,0.0099,112847.0
1,3527,Brant County Health Unit,0.084391,0.016292,0.013825,0.017373,0.012814,0.017258,0.078329,0.019088,0.018174,0.037887,4.5e-05,0.0163,0.02285,0.0206,0.0046,0.0417,0.1004,0.049,0.0714,0.0445,0.0004,139668.0
2,3530,Durham Regional Health Unit,0.125284,0.027632,0.01696,0.034447,0.011442,0.019223,0.083528,0.016127,0.018962,0.04049,0.005668,0.0257,0.0193,0.0316,0.0075,0.0359,0.0882,0.05,0.0693,0.04005,0.005,655838.0
3,3531,Elgin-St. Thomas Health Unit,0.0,0.007603,0.009055,0.009582,0.007456,0.006616,0.027106,0.016676,0.015765,0.012064,0.0,0.0023,0.02685,0.0122,0.0039,0.0337,0.0569,0.05175,0.0824,0.026,,91348.0
4,3533,Grey Bruce Health Unit,0.0,0.003378,0.008618,0.014731,0.004521,0.011299,0.027341,0.010255,0.032769,0.016883,0.0,0.0009,0.01625,0.02275,0.0036,0.0373,0.0582,0.0463,0.091,0.0315,,165266.0


In [27]:
# Export the PHU level proximity data
phu_prox_full.to_csv(r'PHU_level_proximity_data_ngan.csv', index = False)

## Merge COVID and comorbidities PHU level data

In [35]:
# Import the COVID PHU level numbers
num = pd.read_csv('PHU_FINAL_num.csv')
# num.info()
# num.head()

keep = ['FEMALE', 'MALE', 'TRANSGENDER', 'UNKNOWN', 'OTHER', 'CONTACT', 'NEITHER', 
        'TRAVEL-RELATED', 'NOT-RESOLVED', 'RESOLVED', 'FATAL', 'TOTAL', 
        'Location', 'HR_UID']
num = num[keep]
# num.head()

# Import the percent comorbids
prop = pd.read_csv('PHU_FINAL_prop.csv')
# prop.info()
# prop.head()

keep2 = ['copd-percent', 'asthma-percent', 'hbp-percent', 'smokers-percent',
         'Location', 'HR_UID']
prop = prop[keep2]

# Import the number comorbids 
num2 = pd.read_csv('PHU_FINAL_num.csv')
keep3 = ['copd', 'asthma', 'hbp', 'smokers', 'Location', 'HR_UID']
num2 = num2[keep3]

# remove commas from number columns
list_comorbids = ['copd', 'asthma', 'hbp', 'smokers']
for each in list_comorbids:
    num2[each] = num2[each].str.replace(',', '')

num2[['copd', 'asthma', 'hbp', 'smokers']] = num2[['copd', 'asthma', 'hbp', 'smokers']].apply(pd.to_numeric)

covid = num.merge(prop, how='outer', 
                       left_on=['Location', 'HR_UID'], 
                       right_on=['Location', 'HR_UID'])

covid = covid.merge(num2, how='outer', 
                       left_on=['Location', 'HR_UID'], 
                       right_on=['Location', 'HR_UID'])
covid.info()
covid.head()




<class 'pandas.core.frame.DataFrame'>
Int64Index: 36 entries, 0 to 35
Data columns (total 22 columns):
FEMALE             36 non-null int64
MALE               36 non-null int64
TRANSGENDER        36 non-null int64
UNKNOWN            36 non-null int64
OTHER              36 non-null int64
CONTACT            36 non-null int64
NEITHER            36 non-null int64
TRAVEL-RELATED     36 non-null int64
NOT-RESOLVED       36 non-null int64
RESOLVED           36 non-null int64
FATAL              36 non-null int64
TOTAL              36 non-null int64
Location           36 non-null object
HR_UID             36 non-null float64
copd-percent       36 non-null object
asthma-percent     36 non-null object
hbp-percent        36 non-null object
smokers-percent    36 non-null object
copd               36 non-null int64
asthma             36 non-null int64
hbp                36 non-null int64
smokers            36 non-null int64
dtypes: float64(1), int64(16), object(5)
memory usage: 6.5+ KB


Unnamed: 0,FEMALE,MALE,TRANSGENDER,UNKNOWN,OTHER,CONTACT,NEITHER,TRAVEL-RELATED,NOT-RESOLVED,RESOLVED,FATAL,TOTAL,Location,HR_UID,copd-percent,asthma-percent,hbp-percent,smokers-percent,copd,asthma,hbp,smokers
0,862,658,0,3,0,329,719,145,346,1076,101,1523,York Regional Health Unit,3570.0,2.0 %,5.1 %,17.6 %,10.5 %,13200,50800,13200,104400
1,2774,2152,0,113,2,732,1737,210,1442,3256,343,5041,City of Toronto Health Unit,3595.0,2.2 %,7.8 %,15.8 %,13.3 %,35600,200400,35600,340100
2,270,188,0,0,0,132,212,79,65,371,22,458,Halton Regional Health Unit,3536.0,2.7 %,6.9 %,17.8 %,11.7 %,8700,34100,8700,57300
3,788,580,0,4,0,336,298,0,557,739,76,1372,City of Ottawa Health Unit,3551.0,2.9 %,10.2 %,13.5 %,12.8 %,15900,86900,15900,108300
4,1279,1046,0,22,0,83,558,69,846,1387,114,2347,Peel Regional Health Unit,3553.0,3.2 %,6.0 %,18.5 %,12.5 %,25500,76400,25500,159100


In [36]:
# Merge COVID/comorbids data with the prox data
phu_final = covid.merge(phu_prox_full, how='outer', 
                       left_on=['Location', 'HR_UID'], 
                       right_on=['ENG_LABEL', 'HR_UID'])

# Reorder columns
phu_final = phu_final[['Location', 'ENG_LABEL', 'HR_UID', 'HRPOP', 'FEMALE', 
           'MALE', 'TRANSGENDER', 'UNKNOWN', 'OTHER', 'CONTACT', 
           'NEITHER', 'TRAVEL-RELATED', 'NOT-RESOLVED', 'RESOLVED', 'FATAL', 
           'TOTAL', 'copd', 'asthma', 'hbp', 'smokers', 'copd-percent', 'asthma-percent', 'hbp-percent', 
           'smokers-percent', 'amenity_dense', 
           'prox_idx_emp_wt', 'prox_idx_emp', 
           'prox_idx_pharma_wt', 'prox_idx_pharma',
           'prox_idx_childcare_wt', 'prox_idx_childcare',
           'prox_idx_health_wt', 'prox_idx_health', 
           'prox_idx_grocery_wt', 'prox_idx_grocery', 
           'prox_idx_educpri_wt', 'prox_idx_educpri',
           'prox_idx_educsec_wt', 'prox_idx_educsec', 
           'prox_idx_lib_wt', 'prox_idx_lib', 
           'prox_idx_parks_wt', 'prox_idx_parks', 
           'prox_idx_transit_wt', 'prox_idx_transit']]

phu_final.info()
phu_final.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 36 entries, 0 to 35
Data columns (total 45 columns):
Location                 36 non-null object
ENG_LABEL                36 non-null object
HR_UID                   36 non-null float64
HRPOP                    36 non-null float64
FEMALE                   36 non-null int64
MALE                     36 non-null int64
TRANSGENDER              36 non-null int64
UNKNOWN                  36 non-null int64
OTHER                    36 non-null int64
CONTACT                  36 non-null int64
NEITHER                  36 non-null int64
TRAVEL-RELATED           36 non-null int64
NOT-RESOLVED             36 non-null int64
RESOLVED                 36 non-null int64
FATAL                    36 non-null int64
TOTAL                    36 non-null int64
copd                     36 non-null int64
asthma                   36 non-null int64
hbp                      36 non-null int64
smokers                  36 non-null int64
copd-percent             36 non

Unnamed: 0,Location,ENG_LABEL,HR_UID,HRPOP,FEMALE,MALE,TRANSGENDER,UNKNOWN,OTHER,CONTACT,NEITHER,TRAVEL-RELATED,NOT-RESOLVED,RESOLVED,FATAL,TOTAL,copd,asthma,hbp,smokers,copd-percent,asthma-percent,hbp-percent,smokers-percent,amenity_dense,prox_idx_emp_wt,prox_idx_emp,prox_idx_pharma_wt,prox_idx_pharma,prox_idx_childcare_wt,prox_idx_childcare,prox_idx_health_wt,prox_idx_health,prox_idx_grocery_wt,prox_idx_grocery,prox_idx_educpri_wt,prox_idx_educpri,prox_idx_educsec_wt,prox_idx_educsec,prox_idx_lib_wt,prox_idx_lib,prox_idx_parks_wt,prox_idx_parks,prox_idx_transit_wt,prox_idx_transit
0,York Regional Health Unit,York Regional Health Unit,3570.0,1131052.0,862,658,0,3,0,329,719,145,346,1076,101,1523,13200,50800,13200,104400,2.0 %,5.1 %,17.6 %,10.5 %,0.076746,0.045526,0.0337,0.03647,0.0347,0.031416,0.028,0.012139,0.0082,0.024277,0.0322,0.09033,0.0945,0.022554,0.04975,0.014949,0.0666,0.085908,0.0608,0.007746,0.0056
1,City of Toronto Health Unit,City of Toronto Health Unit,3595.0,2694831.0,2774,2152,0,113,2,732,1737,210,1442,3256,343,5041,35600,200400,35600,340100,2.2 %,7.8 %,15.8 %,13.3 %,0.661503,0.147789,0.0939,0.089295,0.0494,0.088155,0.0657,0.054205,0.0222,0.091534,0.0663,0.198788,0.16445,0.072676,0.0729,0.082401,0.0934,0.101393,0.0732,0.046492,0.0348
2,Halton Regional Health Unit,Halton Regional Health Unit,3536.0,561949.0,270,188,0,0,0,132,212,79,65,371,22,458,8700,34100,8700,57300,2.7 %,6.9 %,17.8 %,11.7 %,0.106388,0.029093,0.0307,0.029825,0.0306,0.0475,0.03665,0.011782,0.0099,0.019114,0.0341,0.087809,0.088,0.026448,0.0495,0.02037,0.0641,0.068481,0.0678,0.005713,0.0054
3,City of Ottawa Health Unit,City of Ottawa Health Unit,3551.0,936763.0,788,580,0,4,0,336,298,0,557,739,76,1372,15900,86900,15900,108300,2.9 %,10.2 %,13.5 %,12.8 %,0.206235,0.046102,0.0243,0.029149,0.0264,0.096714,0.0892,0.018627,0.0072,0.036913,0.0447,0.115637,0.1168,0.035038,0.0586,0.027587,0.0714,0.074955,0.0673,0.019415,0.0146
4,Peel Regional Health Unit,Peel Regional Health Unit,3553.0,1388271.0,1279,1046,0,22,0,83,558,69,846,1387,114,2347,25500,76400,25500,159100,3.2 %,6.0 %,18.5 %,12.5 %,0.184902,0.055642,0.0496,0.044622,0.0309,0.04043,0.0353,0.011655,0.0076,0.035537,0.0365,0.116991,0.10875,0.0291,0.0515,0.025556,0.0659,0.067542,0.0613,0.008093,0.0032


In [38]:
list_colnames_prop = ['FEMALE', 'MALE', 'TRANSGENDER', 
                      'UNKNOWN', 'OTHER', 'CONTACT', 'NEITHER', 
                      'TRAVEL-RELATED', 'NOT-RESOLVED', 'RESOLVED', 
                      'FATAL', 'TOTAL', 'copd', 'asthma', 
                      'hbp', 'smokers']
for each in list_colnames_prop:
    phu_final[each + '_prop'] = phu_final[each]/phu_final['HRPOP']
    
phu_final.head()

Unnamed: 0,Location,ENG_LABEL,HR_UID,HRPOP,FEMALE,MALE,TRANSGENDER,UNKNOWN,OTHER,CONTACT,NEITHER,TRAVEL-RELATED,NOT-RESOLVED,RESOLVED,FATAL,TOTAL,copd,asthma,hbp,smokers,copd-percent,asthma-percent,hbp-percent,smokers-percent,amenity_dense,prox_idx_emp_wt,prox_idx_emp,prox_idx_pharma_wt,prox_idx_pharma,prox_idx_childcare_wt,prox_idx_childcare,prox_idx_health_wt,prox_idx_health,prox_idx_grocery_wt,prox_idx_grocery,prox_idx_educpri_wt,prox_idx_educpri,prox_idx_educsec_wt,prox_idx_educsec,prox_idx_lib_wt,prox_idx_lib,prox_idx_parks_wt,prox_idx_parks,prox_idx_transit_wt,prox_idx_transit,FEMALE_prop,MALE_prop,TRANSGENDER_prop,UNKNOWN_prop,OTHER_prop,CONTACT_prop,NEITHER_prop,TRAVEL-RELATED_prop,NOT-RESOLVED_prop,RESOLVED_prop,FATAL_prop,TOTAL_prop,copd_prop,asthma_prop,hbp_prop,smokers_prop
0,York Regional Health Unit,York Regional Health Unit,3570.0,1131052.0,862,658,0,3,0,329,719,145,346,1076,101,1523,13200,50800,13200,104400,2.0 %,5.1 %,17.6 %,10.5 %,0.076746,0.045526,0.0337,0.03647,0.0347,0.031416,0.028,0.012139,0.0082,0.024277,0.0322,0.09033,0.0945,0.022554,0.04975,0.014949,0.0666,0.085908,0.0608,0.007746,0.0056,0.000762,0.000582,0.0,3e-06,0.0,0.000291,0.000636,0.000128,0.000306,0.000951,8.9e-05,0.001347,0.011671,0.044914,0.011671,0.092303
1,City of Toronto Health Unit,City of Toronto Health Unit,3595.0,2694831.0,2774,2152,0,113,2,732,1737,210,1442,3256,343,5041,35600,200400,35600,340100,2.2 %,7.8 %,15.8 %,13.3 %,0.661503,0.147789,0.0939,0.089295,0.0494,0.088155,0.0657,0.054205,0.0222,0.091534,0.0663,0.198788,0.16445,0.072676,0.0729,0.082401,0.0934,0.101393,0.0732,0.046492,0.0348,0.001029,0.000799,0.0,4.2e-05,7.421616e-07,0.000272,0.000645,7.8e-05,0.000535,0.001208,0.000127,0.001871,0.01321,0.074365,0.01321,0.126205
2,Halton Regional Health Unit,Halton Regional Health Unit,3536.0,561949.0,270,188,0,0,0,132,212,79,65,371,22,458,8700,34100,8700,57300,2.7 %,6.9 %,17.8 %,11.7 %,0.106388,0.029093,0.0307,0.029825,0.0306,0.0475,0.03665,0.011782,0.0099,0.019114,0.0341,0.087809,0.088,0.026448,0.0495,0.02037,0.0641,0.068481,0.0678,0.005713,0.0054,0.00048,0.000335,0.0,0.0,0.0,0.000235,0.000377,0.000141,0.000116,0.00066,3.9e-05,0.000815,0.015482,0.060682,0.015482,0.101967
3,City of Ottawa Health Unit,City of Ottawa Health Unit,3551.0,936763.0,788,580,0,4,0,336,298,0,557,739,76,1372,15900,86900,15900,108300,2.9 %,10.2 %,13.5 %,12.8 %,0.206235,0.046102,0.0243,0.029149,0.0264,0.096714,0.0892,0.018627,0.0072,0.036913,0.0447,0.115637,0.1168,0.035038,0.0586,0.027587,0.0714,0.074955,0.0673,0.019415,0.0146,0.000841,0.000619,0.0,4e-06,0.0,0.000359,0.000318,0.0,0.000595,0.000789,8.1e-05,0.001465,0.016973,0.092766,0.016973,0.115611
4,Peel Regional Health Unit,Peel Regional Health Unit,3553.0,1388271.0,1279,1046,0,22,0,83,558,69,846,1387,114,2347,25500,76400,25500,159100,3.2 %,6.0 %,18.5 %,12.5 %,0.184902,0.055642,0.0496,0.044622,0.0309,0.04043,0.0353,0.011655,0.0076,0.035537,0.0365,0.116991,0.10875,0.0291,0.0515,0.025556,0.0659,0.067542,0.0613,0.008093,0.0032,0.000921,0.000753,0.0,1.6e-05,0.0,6e-05,0.000402,5e-05,0.000609,0.000999,8.2e-05,0.001691,0.018368,0.055032,0.018368,0.114603


In [39]:
phu_final.to_csv(r'PHU_final_ngan.csv', index = False)