In [None]:
'''
These scripts prepares raw downloaded CSV files for use with Open Tabulate.
It takes the raw directory as input and outputs to the processed directory.
The following steps are taken:
- Format latitude and longitude as seperate columns
- Converts format to CSV 
- Encodes CSV with utf-8
- Filters out data not related to health (eg police stations)

-Sam Lumley
Dec 2021

'''

In [3]:
import pandas as pd

# If necessary, install openpyxl and geopandas
# import sys
# !conda install --yes --prefix {sys.prefix} openpyxl

# %pip install geopandas

In [5]:
# If necessary, generate province folder structure

# import os

# folders=['ab','bc','mb','nb','nl','ns','nt','nu','on','pe','qc','sk','yt']

# for items in folders:
#     os.mkdir(items)


In [7]:
# AB Calgary covid 
# convert point to lat lon
# ignore empty rows

import pandas as pd

df=pd.read_csv('raw/AB_Calgary_covid_vaccination_centres.csv')

def strip_point(x):
    
    x=str(x)  
    if (x == 'nan'):
        t = [None,None]
        return t
    else:
        t=x.strip('POINT (')
        t=t.rstrip(')')
        return t.split()

LONGS=[]
LATS=[]
for i in df["POINT"]:
	LONGS.append(strip_point(i)[0])
	LATS.append(strip_point(i)[1])

df["LONGITUDE"]=LONGS
df["LATITUDE"]=LATS

df.to_csv('AB_Calgary_covid_vaccination_centres.csv')

In [69]:
# AB Calgary healthcare 
# point to lat lon

import pandas as pd

df=pd.read_csv('AB_Calgary_healthcare_facilities.csv')

def strip_point(x):

    x=str(x)    
    t=x.strip('POINT (')
    t=t.rstrip(')')
    # 	print(t)
    return t.split()

LONGS=[]
LATS=[]
for i in df["location"]:
	LONGS.append(strip_point(i)[1])
	LATS.append(strip_point(i)[0])

df["LONGITUDE"]=LONGS
df["LATITUDE"]=LATS

df.to_csv('processed/AB_Calgary_healthcare_facilities.csv')

In [25]:
# British Columbia
# utf-8 encode datasets

df=pd.read_csv('raw/BC_emergency_rooms.csv')
df.to_csv('processed/BC_emergency_rooms.csv')

df2=pd.read_csv('raw/BC_hospital.csv')
df2.to_csv('processed/BC_hospital.csv')

df3=pd.read_csv('raw/BC_pharmacies.csv')
df3.to_csv('processed/BC_pharmacies.csv')

df4=pd.read_csv('raw/BC_urgent_care.csv')
df4.to_csv('processed/BC_urgent_care.csv')

df5=pd.read_csv('raw/BC_walk-in_clinics.csv')
df5.to_csv('processed/BC_walk-in_clinics.csv')

df5=pd.read_csv('raw/ON_Guelph_healthcare_facilities.csv')
df5.to_csv('processed/ON_Guelph_healthcare_facilities.csv')

In [22]:
# Manitoba

# read geojson, output csv
# limit to hospitals
# remove null values in the geometry

import geopandas as gpd
import pandas as pd

#read geojson with geopandas into geodataframe
sc1=gpd.read_file('raw/MB_COVID-19_and_Flu_-_Vaccination_Sites.geojson')

df=pd.DataFrame(sc1)


def strip_point(x):
    
    x=str(x)  
    if (x == 'nan'):
        t = [None,None]
        return t
    elif x == 'None':
        t = [None,None]
        return t
    else:
        t=x.strip('POINT (')
        t=t.rstrip(')')
        return t.split() 


LONGS=[]
LATS=[]
for i in df.geometry:
	LONGS.append(strip_point(i)[0])
	LATS.append(strip_point(i)[1])

df["LONGITUDE"]=LONGS
df["LATITUDE"]=LATS

df.to_csv('processed/MB_covid-19_and_flu_vaccine_sites.csv')


In [37]:
# New Brunswick
# convert point to lat lon

df=pd.read_csv('raw/NB_nursing_homes.csv')

def strip_point(x):
	t=x.strip('(')
	t=t.rstrip(')')
	return t.split()

LONGS=[]
LATS=[]
for i in df["Location"]:
	LONGS.append(strip_point(i)[1])
	LATS.append(strip_point(i)[0])

df["LONGITUDE"]=LONGS
df["LATITUDE"]=LATS

df.to_csv('processed/NB_nursing_homes.csv')

In [32]:
# NL healthcare and hospital datasets
# convert from xls to csv

import openpyxl

df = pd.read_excel("raw/NL_Hospital.xlsx", engine='openpyxl')
df.to_csv('processed/NL_hospital.csv')

df2 = pd.read_excel("raw/NL_HealthCentre.xls", engine='openpyxl')
df2.to_csv('processed/NL_healthcare_facilities.csv')


In [41]:
# Processing for Nova Scotia
# Convert 'POINT' Geometry into lat and lon

import pandas as pd

df=pd.read_csv('raw/NS_hospital.csv')

def strip_point(x):

	x=str(x)    
	t=x.strip('POINT (')
	t=t.rstrip(')')
	return t.split()

LONGS=[]
LATS=[]
for i in df["the_geom"]:
	LONGS.append(strip_point(i)[0])
	LATS.append(strip_point(i)[1])

df["LONGITUDE"]=LONGS
df["LATITUDE"]=LATS

df.to_csv('processed/NS_hospital.csv')


In [14]:
# Processing for PEI Health Facilities data
# Turn 'Location" column into long and lat

df=pd.read_csv('raw/PE_healthcare_facilities.csv')

# df=df.loc[df["Facility Type"] != "Public Nursing Home"]

def strip_point(x):
	t=x.strip('(')
	t=t.rstrip(')')
#	t=t.strip(' (')
#	print(t)
	return t.split()

LONGS=[]
LATS=[]
for i in df["Location 1"]:
	LONGS.append(strip_point(i)[1])
	LATS.append(strip_point(i)[0])

df["LONGITUDE"]=LONGS
df["LATITUDE"]=LATS

df.to_csv('processed/PE_healthcare_facilities.csv')

In [87]:
# Processing for NB Moncton 

#read geojson, output csv
#limit ourselves to hospitals
#Also turn 'POINT' Geometry into lat and lon

import geopandas as gpd
import pandas as pd

#read shapefile with geopandas into geodataframe

files = ['raw/NB_Moncton_Medical_Clinics', 'raw/NB_Moncton_Pharmacies', 'raw/NB_Moncton_Senior_Care_Facilities']

for file in files:

    filename = file + '.geojson'
    print(filename)
    sc1=gpd.read_file(filename)

    df=pd.DataFrame(sc1)


    def strip_point(x):

        x=str(x)    
        t=x.strip('POINT (')
        t=t.rstrip(')')
    # 	print(t)
        return t.split()

    LONGS=[]
    LATS=[]
    for i in df.geometry:
        LONGS.append(strip_point(i)[0])
        LATS.append(strip_point(i)[1])

    df["LONGITUDE"]=LONGS
    df["LATITUDE"]=LATS

    location = 'processed/' + file.lower() + '.csv'
    df.to_csv(location)

In [None]:
# ON Durham, Toronto clinics, Toronto covid test sites and York hospitals

# read geojson, output csv
# limit to hospitals
# Also turn 'POINT' Geometry into lat and lon


import geopandas as gpd
import pandas as pd

#read shapefile with geopandas into geodataframe

files = ['raw/ON_Durham_Healthcare_Facilities', 'raw/ON_Toronto_covid-19-immunization-clinics', 'raw/ON_Toronto_covid-19-testing-sites', 'raw/ON_York_Hospital']

for file in files:

    filename = file + '.geojson'
    print(filename)
    sc1=gpd.read_file(filename)

    df=pd.DataFrame(sc1)


    def strip_point(x):

        x=str(x)    
        t=x.strip('POINT (')
        t=t.rstrip(')')
    # 	print(t)
        return t.split()

    LONGS=[]
    LATS=[]
    for i in df.geometry:
        LONGS.append(strip_point(i)[0])
        LATS.append(strip_point(i)[1])

    df["LONGITUDE"]=LONGS
    df["LATITUDE"]=LATS

    location = 'processed/' + file + '.csv'
    df.to_csv(location)

In [95]:
# QC Gatineau 
# convert POINT to lat lon

import pandas as pd

df=pd.read_csv('raw/QC_Gatineau_public_places_including_hospitals.csv')

def strip_point(x):

    x=str(x)    
    t=x.strip('POINT (')
    t=t.rstrip(')')
    # 	print(t)
    return t.split()

LONGS=[]
LATS=[]
for i in df["GEOM"]:
	LONGS.append(strip_point(i)[1])
	LATS.append(strip_point(i)[0])

df["LONGITUDE"]=LONGS
df["LATITUDE"]=LATS

df.to_csv('processed/QC_Gatineau_public_places_including_hospitals.csv')

In [None]:
# QC Quebec city 
# convert POINT to lat lon

import pandas as pd

df=pd.read_csv('raw/QC_Quebec City_public_places_including_hospitals.csv')

def strip_point(x):

    x=str(x)    
    t=x.strip('POINT (')
    t=t.rstrip(')')
    # 	print(t)
    return t.split()

LONGS=[]
LATS=[]
for i in df["GEOMETRIE"]:
	LONGS.append(strip_point(i)[1])
	LATS.append(strip_point(i)[0])

df["LONGITUDE"]=LONGS
df["LATITUDE"]=LATS

df2 = df[df['DESCRIPTION'] == 'Hôpitaux']

df2.to_csv('processed/QC_Quebec_City_public_places_including_hospitals.csv')