**Extract Recreation Centre Location Data**


In [52]:
import pandas as pd

In [53]:
# read file containing information about recreation centre locations and types
# facility location dataframe
df = pd.read_csv('./ODRSF_v1.0.csv', encoding='cp1252')

  df = pd.read_csv('./ODRSF_v1.0.csv', encoding='cp1252')


In [54]:
# Remove unneeded columns
df = df.drop(columns = ['Index', 'Facility_Name', 'Source_Facility_Type', 'Provider', 'City', 'Source_Format_Address', 'CSD_UID', 'PR_UID', 'Latitude', 'Longitude'])

# Remove unit since almost all values are null
df = df.drop(columns = ['Unit'])

In [55]:
#Replace empty values of street_no with -1
df.loc[df['Street_No'] == '..', 'Street_No'] = '-1'

# Replace rest of street strings with an empty value
df.loc[df['Street_Name'] == '..', 'Street_Name'] = ''
df.loc[df['Street_Type'] == '..', 'Street_Type'] = ''
df.loc[df['Street_Direction'] == '..', 'Street_Direction'] = ''
df.loc[df['Street_Direction'] == '..', 'Street_Direction'] = ''
df.loc[df['Postal_Code'] == '..', 'Postal_Code'] = ''

In [56]:
#convert street_no to integer
df['Street_No'] = df['Street_No'].astype(int)

In [57]:
# Captialize all province/territory column
df['Prov_Terr'] = df['Prov_Terr'].str.upper()

In [58]:
# remove all non ontario provinces
df = df[df['Prov_Terr'] == 'ON']

In [59]:
unique_cities = df['CSD_Name'].unique()

**Create Dataframe for City Demographics in Ontario**

In [60]:
demographic_df = pd.read_csv('./Ontario_Demographics.csv', encoding='cp1252')

In [61]:
demographic_df = demographic_df.drop(columns = ['CENSUS_YEAR', 'DGUID', 'ALT_GEO_CODE', 'GEO_LEVEL', 'TNR_SF', 'TNR_LF', 'DATA_QUALITY_FLAG','SYMBOL', 'CHARACTERISTIC_ID', 'CHARACTERISTIC_NOTE', 'C2_COUNT_MEN+', 'C3_COUNT_WOMEN+', 'C10_RATE_TOTAL', 'C11_RATE_MEN+', 'C12_RATE_WOMEN+', 'SYMBOL.1','SYMBOL.2','SYMBOL.3','SYMBOL.4','SYMBOL.5'])

In [62]:
# Remove unnecessary information after comma in geo_name so processing can be easier
demographic_df['GEO_NAME'] = demographic_df['GEO_NAME'].str.split(',').str[0]

In [63]:
# only keep rows where the city is both in the demographic dataframe and the facility location dataframe
demographic_df = demographic_df[demographic_df['GEO_NAME'].isin(unique_cities)]

In [64]:
#get starting and ending index of rows that contain age information about the city
age_indexes = demographic_df[demographic_df['CHARACTERISTIC_NAME'].str.contains('Total - Age groups of the population - 100% data|100 years and over')].index

#get starting and ending index of rows that contain ethnicity information about the city
ethnicity_indexes = demographic_df[demographic_df['CHARACTERISTIC_NAME'].str.contains('Total visible minority population|Total - Ethnic or cultural origin for the population in private households - 25% sample data')].index

In [65]:
selected_age_dfs = []
selected_ethnicity_dfs = []

# store only data in dataframe from start and end indexes
for i in range(0, len(age_indexes), 2):
    age_df = demographic_df[age_indexes[i] + 1: age_indexes[i + 1] + 1]
    ethnicity_df = demographic_df[ethnicity_indexes[i] + 1: ethnicity_indexes[i + 1]]
    selected_age_dfs.append(age_df)
    selected_ethnicity_dfs.append(ethnicity_df)

#resulting dataframes 
result_age_df = pd.concat(selected_age_dfs, ignore_index=True)
result_ethnicity_df = pd.concat(selected_ethnicity_dfs, ignore_index=True)

#result_age_df.to_csv('output1.csv', index=False)
#result_ethnicity_df.to_csv('output2.csv', index=False)

In [72]:
# get all unique cities from list
cities = result_age_df['GEO_NAME'].unique()

Unnamed: 0,GEO_NAME,CHARACTERISTIC_NAME,C1_COUNT_TOTAL
0,South Glengarry,South Asian,120.0
1,South Glengarry,Chinese,15.0
2,South Glengarry,Black,115.0
3,South Glengarry,Filipino,40.0
4,South Glengarry,Arab,65.0
...,...,...,...
3128,Kenora,Korean,0.0
3129,Kenora,Japanese,10.0
3130,Kenora,"Visible minority, n.i.e.",0.0
3131,Kenora,Multiple visible minorities,0.0


Transpose Age Dataframe

In [67]:
# Unique age ranges - will be used as the new column in the new dataframe
ages = result_age_df['CHARACTERISTIC_NAME'].unique()

age_df = pd.DataFrame(columns=ages)

# add new column for city
age_df.insert(loc=0, column='City', value='')

In [68]:
# Populate first column cities with all city variables
age_df['City'] = cities

# Set column data
j = 0
for i in range(len(cities)):
    for column in age_df.columns[1:]:
        age_df.loc[i, column] = result_age_df.loc[j,'C1_COUNT_TOTAL']
        j += 1     

In [69]:
ethnicities = result_ethnicity_df['CHARACTERISTIC_NAME'].unique()

ethnicity_df = pd.DataFrame(columns=ethnicities)

ethnicity_df.insert(loc=0, column='City', value='')

In [70]:
ethnicity_df['City'] = cities

k = 0
for i in range(len(cities)):
    for column in ethnicity_df.columns[1:]:
        ethnicity_df.loc[i, column] = result_ethnicity_df.loc[k,'C1_COUNT_TOTAL']
        k += 1   

In [71]:
ethnicity_df

Unnamed: 0,City,South Asian,Chinese,Black,Filipino,Arab,Latin American,Southeast Asian,West Asian,Korean,Japanese,"Visible minority, n.i.e.",Multiple visible minorities,Not a visible minority
0,South Glengarry,120.0,15.0,115.0,40.0,65.0,15.0,35.0,0.0,0.0,0.0,15.0,0.0,12765.0
1,Cornwall,2360.0,165.0,775.0,300.0,240.0,240.0,165.0,160.0,50.0,30.0,35.0,85.0,41750.0
2,South Dundas,20.0,35.0,65.0,35.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,10745.0
3,North Glengarry,45.0,0.0,65.0,10.0,20.0,50.0,0.0,0.0,15.0,0.0,0.0,10.0,9670.0
4,Alfred and Plantagenet,40.0,40.0,75.0,25.0,15.0,20.0,0.0,0.0,0.0,0.0,0.0,0.0,9430.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
230,Marathon,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,345.0
231,Machin,2745.0,1020.0,1185.0,400.0,450.0,390.0,735.0,175.0,155.0,315.0,160.0,240.0,98680.0
232,Dryden,0.0,20.0,25.0,0.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,5835.0
233,Ear Falls,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,405.0
