In [1]:
import pandas as pd
import numpy as np

# Set options to show dfs in full
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

## Clean the general LTC data

In [2]:
# Add a column with name in lower case
df_genLTC = pd.read_csv('webscrape_ltc_general_database.csv')
df_genLTC['cleaned_name'] = df_genLTC['name'].str.lower()

# Adjust names from general LTC database to match names from COVID LTC database
df_genLTC["cleaned_name"].replace({"vision '74 inc.": "vision nursing home", "finlandia hoivakoti nursing home limited": "finlandia hoivakoti nursing home", "pinecrest nursing home - bobcaygeon": "pinecrest nursing home (bobcaygeon)", "residence saint- louis": "residence saint-louis", "st patrick's home": "st. patrick's home"}, inplace=True)
df_genLTC.info()
df_genLTC.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 625 entries, 0 to 624
Data columns (total 30 columns):
index                   625 non-null int64
name                    625 non-null object
address                 625 non-null object
city_and_postal_code    625 non-null object
LHIN                    625 non-null object
licensee                625 non-null object
management              71 non-null object
home_type               624 non-null object
beds                    625 non-null object
short_stay              625 non-null object
residents_council       625 non-null object
family_council          625 non-null object
accreditation           625 non-null object
additional_info         4 non-null object
city                    625 non-null object
postal_code             625 non-null object
number_of_beds          625 non-null int64
total_inspections       625 non-null float64
5y_inspections          625 non-null float64
2y_inspections          625 non-null float64
total_complaints 

Unnamed: 0,index,name,address,city_and_postal_code,LHIN,licensee,management,home_type,beds,short_stay,residents_council,family_council,accreditation,additional_info,city,postal_code,number_of_beds,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders,cleaned_name
0,0,AFTON PARK PLACE LONG TERM CARE COMMUNITY,1200 Afton Drive,"Sarnia, N7S6L6",Erie St. Clair,S & R Nursing Homes Ltd.,,For-Profit,Home with approximately 128 beds,No,Yes,Yes,Yes,,Sarnia,N7S6L6,128,67.0,34.0,9.0,29.0,15.0,3.0,28.0,13.0,5.0,5.0,2.0,,afton park place long term care community
1,1,"ALBRIGHT GARDENS HOMES, INCORPORATED",5050 Hillside Drive,"Beamsville, L0R1B2",Hamilton Niagara Haldimand Brant (Hnhb),"Albright Gardens Homes, Incorporated",,Non-Profit,Home with approximately 231 beds,No,Yes,Yes,No,,Beamsville,L0R1B2,231,39.0,25.0,15.0,18.0,10.0,6.0,12.0,7.0,6.0,7.0,7.0,3.0,"albright gardens homes, incorporated"
2,2,ALEXANDER PLACE,329 Parkside Drive P. O. Box 50,"Waterdown, L0R2H0",Hamilton Niagara Haldimand Brant (Hnhb),Waterdown Long Term Care Centre Inc.,,For-Profit,Home with approximately 128 beds,Yes,Yes,Yes,Yes,,Waterdown,L0R2H0,128,28.0,17.0,8.0,11.0,6.0,3.0,8.0,5.0,2.0,4.0,4.0,2.0,alexander place
3,4,ALGOMA MANOR NURSING HOME,145 Dawson Street,"Thessalon, P0R1L0",North East,Algoma Manor Nursing Home,,,Home with approximately 96 beds,Yes,Yes,Yes,Yes,,Thessalon,P0R1L0,96,23.0,14.0,7.0,7.0,5.0,2.0,3.0,2.0,1.0,6.0,3.0,1.0,algoma manor nursing home
4,5,ALGONQUIN NURSING HOME,207 Turcotte Park Road P.O. Box 270,"Mattawa, P0H1V0",North East,Algonquin Nursing Home Of Mattawa,Hôpital De Mattawa Hospital,Non-Profit,Home with approximately 73 beds,Yes,Yes,Yes,Yes,,Mattawa,P0H1V0,73,23.0,14.0,5.0,3.0,1.0,,11.0,7.0,4.0,5.0,3.0,,algonquin nursing home


In [3]:
# df_genLTC[df_genLTC['cleaned_name'].str.contains('meadows', regex=False, na=False)]

## Merge general LTC and covid LTC data

In [4]:
# Import the covid LTC data
# df_covidLTC = pd.read_csv('merged_ltc.csv') # the orginal covid LTC data scraped in March
df_covidLTC = pd.read_csv('merged_ltc_secondScrape.csv')
df_covidLTC['cleaned_name'] = df_covidLTC['LTC Home'].str.lower()

df_covidLTC.info()
df_covidLTC.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 293 entries, 0 to 292
Data columns (total 9 columns):
Unnamed: 0                  293 non-null int64
LTC Home                    293 non-null object
City                        293 non-null object
Beds                        293 non-null int64
Confirmed Resident Cases    129 non-null object
Resident Deaths             293 non-null object
Confirmed Staff Cases       129 non-null object
Status                      293 non-null object
cleaned_name                293 non-null object
dtypes: int64(2), object(7)
memory usage: 20.7+ KB


Unnamed: 0.1,Unnamed: 0,LTC Home,City,Beds,Confirmed Resident Cases,Resident Deaths,Confirmed Staff Cases,Status,cleaned_name
0,0,"Albright Gardens Homes, Incorporated",Beamsville,231,<5,0,0,Active,"albright gardens homes, incorporated"
1,1,Alexander Place,Waterdown,128,<5,0,0,Active,alexander place
2,2,Almonte Country Haven,Almonte,82,0,30,0,Active,almonte country haven
3,3,Altamont Care Community,Scarborough,159,<5,52,24,Active,altamont care community
4,4,Anson Place Care Centre,Hagersville,61,28,23,29,Active,anson place care centre


In [5]:
# Check that homes from COVID LTC database are present in the general LTC database
a = set(df_genLTC['cleaned_name'])
b = set(df_covidLTC['cleaned_name'])

def returnNotMatches(a, b):
    return [[x for x in b if x not in a]]

returnNotMatches(a, b) # All homes present in the COVID database are present in the general database

# df_covidLTC['cleaned_name'].isin(df_genLTC['cleaned_name']).value_counts()

[[]]

In [6]:
# Merge the general LTC database with the COVID LTC database
df_LTC = pd.merge(left=df_genLTC, right=df_covidLTC, how='outer', left_on='cleaned_name', right_on='cleaned_name')


In [7]:
df_LTC.info()
df_LTC.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 625 entries, 0 to 624
Data columns (total 38 columns):
index                       625 non-null int64
name                        625 non-null object
address                     625 non-null object
city_and_postal_code        625 non-null object
LHIN                        625 non-null object
licensee                    625 non-null object
management                  71 non-null object
home_type                   624 non-null object
beds                        625 non-null object
short_stay                  625 non-null object
residents_council           625 non-null object
family_council              625 non-null object
accreditation               625 non-null object
additional_info             4 non-null object
city                        625 non-null object
postal_code                 625 non-null object
number_of_beds              625 non-null int64
total_inspections           625 non-null float64
5y_inspections              625 non

Unnamed: 0.1,index,name,address,city_and_postal_code,LHIN,licensee,management,home_type,beds,short_stay,residents_council,family_council,accreditation,additional_info,city,postal_code,number_of_beds,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders,cleaned_name,Unnamed: 0,LTC Home,City,Beds,Confirmed Resident Cases,Resident Deaths,Confirmed Staff Cases,Status
0,0,AFTON PARK PLACE LONG TERM CARE COMMUNITY,1200 Afton Drive,"Sarnia, N7S6L6",Erie St. Clair,S & R Nursing Homes Ltd.,,For-Profit,Home with approximately 128 beds,No,Yes,Yes,Yes,,Sarnia,N7S6L6,128,67.0,34.0,9.0,29.0,15.0,3.0,28.0,13.0,5.0,5.0,2.0,,afton park place long term care community,,,,,,,,
1,1,"ALBRIGHT GARDENS HOMES, INCORPORATED",5050 Hillside Drive,"Beamsville, L0R1B2",Hamilton Niagara Haldimand Brant (Hnhb),"Albright Gardens Homes, Incorporated",,Non-Profit,Home with approximately 231 beds,No,Yes,Yes,No,,Beamsville,L0R1B2,231,39.0,25.0,15.0,18.0,10.0,6.0,12.0,7.0,6.0,7.0,7.0,3.0,"albright gardens homes, incorporated",0.0,"Albright Gardens Homes, Incorporated",Beamsville,231.0,<5,0.0,0.0,Active
2,2,ALEXANDER PLACE,329 Parkside Drive P. O. Box 50,"Waterdown, L0R2H0",Hamilton Niagara Haldimand Brant (Hnhb),Waterdown Long Term Care Centre Inc.,,For-Profit,Home with approximately 128 beds,Yes,Yes,Yes,Yes,,Waterdown,L0R2H0,128,28.0,17.0,8.0,11.0,6.0,3.0,8.0,5.0,2.0,4.0,4.0,2.0,alexander place,1.0,Alexander Place,Waterdown,128.0,<5,0.0,0.0,Active
3,4,ALGOMA MANOR NURSING HOME,145 Dawson Street,"Thessalon, P0R1L0",North East,Algoma Manor Nursing Home,,,Home with approximately 96 beds,Yes,Yes,Yes,Yes,,Thessalon,P0R1L0,96,23.0,14.0,7.0,7.0,5.0,2.0,3.0,2.0,1.0,6.0,3.0,1.0,algoma manor nursing home,,,,,,,,
4,5,ALGONQUIN NURSING HOME,207 Turcotte Park Road P.O. Box 270,"Mattawa, P0H1V0",North East,Algonquin Nursing Home Of Mattawa,Hôpital De Mattawa Hospital,Non-Profit,Home with approximately 73 beds,Yes,Yes,Yes,Yes,,Mattawa,P0H1V0,73,23.0,14.0,5.0,3.0,1.0,,11.0,7.0,4.0,5.0,3.0,,algonquin nursing home,,,,,,,,


In [8]:
# Check that the number of beds from the general LTC database agree with number of beds from the COVID LTC database
df_LTC_beds = df_LTC[df_LTC.Beds.notnull()]


# There are 23 homes where the number of beds in the 2 LTC databases do not agree
df_LTC_beds['bed_match'] = np.where((df_LTC_beds['number_of_beds'] == df_LTC_beds['Beds']), 'match', 'no_match')
df_LTC_beds['bed_match'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


match       270
no_match    23 
Name: bed_match, dtype: int64

In [9]:
# Review homes where value for number of beds differ between the databases
df_LTC_beds = df_LTC_beds[df_LTC_beds['bed_match'] == 'no_match']
df_LTC_beds['diff_beds'] = df_LTC_beds['number_of_beds'] - df_LTC_beds['Beds']
df_LTC_beds[['cleaned_name', 'number_of_beds', 'Beds', 'diff_beds']]

Unnamed: 0,cleaned_name,number_of_beds,Beds,diff_beds
51,camilla care community,237,236.0,1.0
76,cedarwood village,91,90.0,1.0
79,centre d'accueil roger seguin,110,113.0,-3.0
89,chartwell pine grove long term care residence,101,96.0,5.0
95,chartwell westmount long term care residence,161,160.0,1.0
96,chartwell white eagle long term care residence,57,56.0,1.0
106,collingwood nursing home,60,64.0,-4.0
147,elmwood place,78,90.0,-12.0
183,extendicare timmins,121,180.0,-59.0
226,glen hill strathaven,199,210.0,-11.0


## Manual review of some beds numbers discrepancies
For kensington village, st. patrick's home and mount hope centre for long term care, websites support the COVID LTC values. Therefore where there is a discrepancy in the number of beds, we will take the number from the covid LTC database.

In [10]:
# Create a new column with number of beds as defined above
df_LTC['number_beds'] = np.nan
df_LTC['number_beds'] = np.where((df_LTC.Beds.isnull()), df_LTC['number_of_beds'], df_LTC['Beds'])
df_LTC.iloc[[620, 499]]

Unnamed: 0.1,index,name,address,city_and_postal_code,LHIN,licensee,management,home_type,beds,short_stay,residents_council,family_council,accreditation,additional_info,city,postal_code,number_of_beds,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders,cleaned_name,Unnamed: 0,LTC Home,City,Beds,Confirmed Resident Cases,Resident Deaths,Confirmed Staff Cases,Status,number_beds
620,646,YEE HONG CENTRE - MISSISSAUGA,5510 Mavis Road,"Mississauga, L5V2X5",Mississauga Halton,Yee Hong Centre For Geriatric Care,,Non-Profit,Home with approximately 200 beds,Yes,Yes,Yes,Yes,,Mississauga,L5V2X5,200,23.0,14.0,7.0,10.0,4.0,3.0,7.0,4.0,3.0,2.0,2.0,1.0,yee hong centre - mississauga,161.0,Yee Hong Centre - Mississauga,Mississauga,200.0,,0.0,,Inactive,200.0
499,522,SUMMIT PLACE,850-4Th Street East,"Owen Sound, N4K6A3",South West,Revera Long Term Care Inc.,,For-Profit,Home with approximately 119 beds,No,Yes,Yes,Yes,,Owen Sound,N4K6A3,119,29.0,12.0,8.0,10.0,3.0,2.0,11.0,4.0,3.0,4.0,3.0,3.0,summit place,,,,,,,,,119.0


In [11]:
# Drop columns that are redundant/unnecessary
listofcols = ['index', 'city_and_postal_code', 'beds', 'number_of_beds', 'Unnamed: 0', 'LTC Home', 'City', 'Beds']
df_LTC.drop(listofcols, axis = 1, inplace = True)
df_LTC.head()

Unnamed: 0,name,address,LHIN,licensee,management,home_type,short_stay,residents_council,family_council,accreditation,additional_info,city,postal_code,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders,cleaned_name,Confirmed Resident Cases,Resident Deaths,Confirmed Staff Cases,Status,number_beds
0,AFTON PARK PLACE LONG TERM CARE COMMUNITY,1200 Afton Drive,Erie St. Clair,S & R Nursing Homes Ltd.,,For-Profit,No,Yes,Yes,Yes,,Sarnia,N7S6L6,67.0,34.0,9.0,29.0,15.0,3.0,28.0,13.0,5.0,5.0,2.0,,afton park place long term care community,,,,,128.0
1,"ALBRIGHT GARDENS HOMES, INCORPORATED",5050 Hillside Drive,Hamilton Niagara Haldimand Brant (Hnhb),"Albright Gardens Homes, Incorporated",,Non-Profit,No,Yes,Yes,No,,Beamsville,L0R1B2,39.0,25.0,15.0,18.0,10.0,6.0,12.0,7.0,6.0,7.0,7.0,3.0,"albright gardens homes, incorporated",<5,0.0,0.0,Active,231.0
2,ALEXANDER PLACE,329 Parkside Drive P. O. Box 50,Hamilton Niagara Haldimand Brant (Hnhb),Waterdown Long Term Care Centre Inc.,,For-Profit,Yes,Yes,Yes,Yes,,Waterdown,L0R2H0,28.0,17.0,8.0,11.0,6.0,3.0,8.0,5.0,2.0,4.0,4.0,2.0,alexander place,<5,0.0,0.0,Active,128.0
3,ALGOMA MANOR NURSING HOME,145 Dawson Street,North East,Algoma Manor Nursing Home,,,Yes,Yes,Yes,Yes,,Thessalon,P0R1L0,23.0,14.0,7.0,7.0,5.0,2.0,3.0,2.0,1.0,6.0,3.0,1.0,algoma manor nursing home,,,,,96.0
4,ALGONQUIN NURSING HOME,207 Turcotte Park Road P.O. Box 270,North East,Algonquin Nursing Home Of Mattawa,Hôpital De Mattawa Hospital,Non-Profit,Yes,Yes,Yes,Yes,,Mattawa,P0H1V0,23.0,14.0,5.0,3.0,1.0,,11.0,7.0,4.0,5.0,3.0,,algonquin nursing home,,,,,73.0


In [12]:
df_LTC.info()
# df_LTC.to_csv(r'merge_genltc_covidltc_FINAL.csv', index = False)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 625 entries, 0 to 624
Data columns (total 31 columns):
name                        625 non-null object
address                     625 non-null object
LHIN                        625 non-null object
licensee                    625 non-null object
management                  71 non-null object
home_type                   624 non-null object
short_stay                  625 non-null object
residents_council           625 non-null object
family_council              625 non-null object
accreditation               625 non-null object
additional_info             4 non-null object
city                        625 non-null object
postal_code                 625 non-null object
total_inspections           625 non-null float64
5y_inspections              625 non-null float64
2y_inspections              625 non-null float64
total_complaints            621 non-null float64
5y_complaints               588 non-null float64
2y_complaints               5

## Import and prep ODHF data
- We are merging with the ODHF data to obtain the geodata for the LTC homes

In [13]:
# Import ODHF data for Ontario
odhf = pd.read_csv(r'ohdf_v1_ontario.csv')

odhf.info()
odhf.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3733 entries, 0 to 3732
Data columns (total 17 columns):
index                        3733 non-null int64
facility_name                3733 non-null object
source_facility_type         3705 non-null object
odhf_facility_type           3728 non-null object
provider                     3733 non-null object
unit                         0 non-null float64
street_no                    3122 non-null object
street_name                  3154 non-null object
postal_code                  3726 non-null object
city                         3724 non-null object
province                     3733 non-null object
source_format_str_address    271 non-null object
CSDname                      3540 non-null object
CSDuid                       3151 non-null float64
Pruid                        3733 non-null int64
latitude                     3159 non-null float64
longitude                    3159 non-null float64
dtypes: float64(4), int64(2), object(11)
memo

Unnamed: 0,index,facility_name,source_facility_type,odhf_facility_type,provider,unit,street_no,street_name,postal_code,city,province,source_format_str_address,CSDname,CSDuid,Pruid,latitude,longitude
0,2401,"(georgian, trillium & sunset manors) corporation of the county of simcoe",community support service,Ambulatory health care services,Province of Ontario,,1110,highway 26,L0L1X0,midhurst,on,,Springwater,3543009.0,35,44.444915,-79.744846
1,2402,147 elder street inc.,retirement home,Nursing and residential care facilities,Province of Ontario,,147,elder street,M3H5G9,toronto,on,,Toronto,3520005.0,35,43.762894,-79.454109
2,2403,2 spirited people of the 1st nations,community support service,Ambulatory health care services,Province of Ontario,,145,front street e,M5A1E3,toronto,on,,Toronto,3520005.0,35,43.649821,-79.36935
3,2404,"3 c's reintroduction centre, the",mental health and addiction organization,Nursing and residential care facilities,Province of Ontario,,226,dog lake road,P7G2G2,thunder bay,on,,Thunder Bay,3558004.0,35,48.48676,-89.321521
4,2405,RÈsidence St. FranÁois,retirement home,Nursing and residential care facilities,Province of Ontario,,790,rue principale,K0A1M0,casselman,on,,Casselman,3502044.0,35,45.316392,-75.090252


In [14]:
# Clean facility name
import unicodedata
odhf['cleaned_name'] = odhf['facility_name'].str.lower().apply(lambda val: unicodedata.normalize('NFKD', val).encode('ascii', 'ignore').decode())
odhf.head()

Unnamed: 0,index,facility_name,source_facility_type,odhf_facility_type,provider,unit,street_no,street_name,postal_code,city,province,source_format_str_address,CSDname,CSDuid,Pruid,latitude,longitude,cleaned_name
0,2401,"(georgian, trillium & sunset manors) corporation of the county of simcoe",community support service,Ambulatory health care services,Province of Ontario,,1110,highway 26,L0L1X0,midhurst,on,,Springwater,3543009.0,35,44.444915,-79.744846,"(georgian, trillium & sunset manors) corporation of the county of simcoe"
1,2402,147 elder street inc.,retirement home,Nursing and residential care facilities,Province of Ontario,,147,elder street,M3H5G9,toronto,on,,Toronto,3520005.0,35,43.762894,-79.454109,147 elder street inc.
2,2403,2 spirited people of the 1st nations,community support service,Ambulatory health care services,Province of Ontario,,145,front street e,M5A1E3,toronto,on,,Toronto,3520005.0,35,43.649821,-79.36935,2 spirited people of the 1st nations
3,2404,"3 c's reintroduction centre, the",mental health and addiction organization,Nursing and residential care facilities,Province of Ontario,,226,dog lake road,P7G2G2,thunder bay,on,,Thunder Bay,3558004.0,35,48.48676,-89.321521,"3 c's reintroduction centre, the"
4,2405,RÈsidence St. FranÁois,retirement home,Nursing and residential care facilities,Province of Ontario,,790,rue principale,K0A1M0,casselman,on,,Casselman,3502044.0,35,45.316392,-75.090252,residence st. franaois


In [15]:
# odhf[odhf['cleaned_name'].str.contains('meadows', regex=False, na=False)]

## List of homes in the LTC database that are not listed in the ODHF

In [16]:
a = set(odhf['cleaned_name'])
b = set(df_LTC['cleaned_name'])

def returnNotMatches(a, b):
    return [x for x in b if x not in a]

missing_odhf = returnNotMatches(a, b)

In [17]:
len(missing_odhf)


19

In [18]:
missing_odhf.sort()
missing_odhf

['albright gardens homes, incorporated',
 'bella senior care residences inc.',
 'bon air long term care residence',
 'caressant care cobden',
 'caressant care harriston',
 'champlain long term care residence',
 'heartwood (fka versa-care cornwall)',
 'lancaster long term care residence',
 'niagara long term care residence',
 'north renfrew long-term care services',
 'north shore health network – eldcap unit',
 'north shore health network – ltc unit',
 'santé manitouwadge health',
 "st. joseph's health centre, guelph",
 "st. joseph's villa, dundas",
 'strathcona long term care',
 'the meadows',
 'william a. "bill" george extended care facility',
 'élisabeth-bruyère residence']

## Replace cleaned names in the LTC and ODHF df

In [19]:
df_LTC['cleaned_name'] = df_LTC['cleaned_name'].replace({
    'albright gardens homes, incorporated' : 'albright gardens',
    'bella senior care residences inc.':'bella senior care residences',
    'bon air long term care residence':'chartwell bon air long term care residence',
    'caressant care cobden':'caressant care - cobden',
    'caressant care harriston':'caressant care - harriston',
    'champlain long term care residence':'chartwell champlain long term care residence',
    'heartwood (fka versa-care cornwall)':'heartwood',
    'lancaster long term care residence':'chartwell lancaster long term care residence',
    'niagara long term care residence':'chartwell niagara long term care residence',
    'north renfrew long-term care services':'north renfrew long-term care services inc.',
    'north shore health network – eldcap unit':'north shore health network - eldcap unit' ,
    'north shore health network – ltc unit' : 'north shore health network - ltc unit',
    'santé manitouwadge health':'sante manitouwadge health',
    'st. joseph\'s health centre, guelph':'st. joseph\'s health centre - guelph',
    'st. joseph\'s villa, dundas' : "st. joseph's villa (dundas)",
# strathcona long term care, MISSING SEE NOTE
    'the meadows':'revera inc. o the meadows long term care centre',
    'william a. "bill" george extended care facility': 'william a. \'bill\' george extended care facility',
    'élisabeth-bruyère residence':'elisabeth-bruyere residence'
})

odhf['cleaned_name'] = odhf['cleaned_name'].replace({'lisabeth-bruyre residence': 'elisabeth-bruyere residence',
                                             'rsidence saint-louis' : 'residence saint-louis'})

In [20]:
a = set(odhf['cleaned_name'])
b = set(df_LTC['cleaned_name'])

def returnNotMatches(a, b):
    return [x for x in b if x not in a]

missing_odhf_names = returnNotMatches(a, b)
print(len(missing_odhf_names))

missing_odhf_names.sort()
missing_odhf_names

1


['strathcona long term care']

## Manual review of discrepancy between LTC data and ODHF
### Missing from ODHF
1. Strathcona Long Term Care, https://svch.ca/long-term-care/strathcona/location/, has replaced saugeen valley nursing center in ODHF which is at the same address, https://svch.ca/saugeen-valley-nursing-centre-announces-plan-to-redevelop-87-bed-facility-in-mount-forest/#.Xr2rvRNKjfY

### Other notes:
- sante manitouwadge health: called and this is part of the hospital with 9 LTC beds

### We will change Saugeen Valley to Strathcona in ODHF before merging the LTC data with ODHF

In [21]:
# Replace saugeen with strathcona in ODHF
odhf['cleaned_name'] = odhf['cleaned_name'].replace({'saugeen valley nursing center': 'strathcona long term care'})

odhf[odhf['cleaned_name'].str.contains('strathcona', regex=False, na=False)]
# odhf.iloc[2721]



Unnamed: 0,index,facility_name,source_facility_type,odhf_facility_type,provider,unit,street_no,street_name,postal_code,city,province,source_format_str_address,CSDname,CSDuid,Pruid,latitude,longitude,cleaned_name
2721,5122,saugeen valley nursing center,long-term care home,Nursing and residential care facilities,Province of Ontario,,465,dublin street,N0G2L3,mount forest,on,,Wellington North,3523050.0,35,43.975099,-80.73934,strathcona long term care


## Merge LTC data with ODHF

In [22]:
df_LTC_odhf = pd.merge(left=df_LTC, right=odhf, 
                  how='left', left_on='cleaned_name', 
                  right_on='cleaned_name')
df_LTC_odhf.info()
df_LTC_odhf.head()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 625 entries, 0 to 624
Data columns (total 48 columns):
name                         625 non-null object
address                      625 non-null object
LHIN                         625 non-null object
licensee                     625 non-null object
management                   71 non-null object
home_type                    624 non-null object
short_stay                   625 non-null object
residents_council            625 non-null object
family_council               625 non-null object
accreditation                625 non-null object
additional_info              4 non-null object
city_x                       625 non-null object
postal_code_x                625 non-null object
total_inspections            625 non-null float64
5y_inspections               625 non-null float64
2y_inspections               625 non-null float64
total_complaints             621 non-null float64
5y_complaints                588 non-null float64
2y_complain

Unnamed: 0,name,address,LHIN,licensee,management,home_type,short_stay,residents_council,family_council,accreditation,additional_info,city_x,postal_code_x,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders,cleaned_name,Confirmed Resident Cases,Resident Deaths,Confirmed Staff Cases,Status,number_beds,index,facility_name,source_facility_type,odhf_facility_type,provider,unit,street_no,street_name,postal_code_y,city_y,province,source_format_str_address,CSDname,CSDuid,Pruid,latitude,longitude
0,AFTON PARK PLACE LONG TERM CARE COMMUNITY,1200 Afton Drive,Erie St. Clair,S & R Nursing Homes Ltd.,,For-Profit,No,Yes,Yes,Yes,,Sarnia,N7S6L6,67.0,34.0,9.0,29.0,15.0,3.0,28.0,13.0,5.0,5.0,2.0,,afton park place long term care community,,,,,128.0,2435,afton park place long term care community,long-term care home,Nursing and residential care facilities,Province of Ontario,,1200,afton drive,N7S6L6,sarnia,on,,Sarnia,3538030.0,35,42.976325,-82.361444
1,"ALBRIGHT GARDENS HOMES, INCORPORATED",5050 Hillside Drive,Hamilton Niagara Haldimand Brant (Hnhb),"Albright Gardens Homes, Incorporated",,Non-Profit,No,Yes,Yes,No,,Beamsville,L0R1B2,39.0,25.0,15.0,18.0,10.0,6.0,12.0,7.0,6.0,7.0,7.0,3.0,albright gardens,<5,0.0,0.0,Active,231.0,2439,albright gardens,long-term care home,Nursing and residential care facilities,Province of Ontario,,5050,hillside drive,L0R1B2,lincoln,on,,Lincoln,3526057.0,35,43.151877,-79.476598
2,ALEXANDER PLACE,329 Parkside Drive P. O. Box 50,Hamilton Niagara Haldimand Brant (Hnhb),Waterdown Long Term Care Centre Inc.,,For-Profit,Yes,Yes,Yes,Yes,,Waterdown,L0R2H0,28.0,17.0,8.0,11.0,6.0,3.0,8.0,5.0,2.0,4.0,4.0,2.0,alexander place,<5,0.0,0.0,Active,128.0,2442,alexander place,long-term care home,Nursing and residential care facilities,Province of Ontario,,329,parkside drive,L0R2H0,hamilton,on,,Hamilton,3525005.0,35,43.342345,-79.90082
3,ALGOMA MANOR NURSING HOME,145 Dawson Street,North East,Algoma Manor Nursing Home,,,Yes,Yes,Yes,Yes,,Thessalon,P0R1L0,23.0,14.0,7.0,7.0,5.0,2.0,3.0,2.0,1.0,6.0,3.0,1.0,algoma manor nursing home,,,,,96.0,2449,algoma manor nursing home,long-term care home,Nursing and residential care facilities,Province of Ontario,,145,dawson street,P0R1L0,thessalon,on,,Thessalon,3557028.0,35,46.263462,-83.563249
4,ALGONQUIN NURSING HOME,207 Turcotte Park Road P.O. Box 270,North East,Algonquin Nursing Home Of Mattawa,Hôpital De Mattawa Hospital,Non-Profit,Yes,Yes,Yes,Yes,,Mattawa,P0H1V0,23.0,14.0,5.0,3.0,1.0,,11.0,7.0,4.0,5.0,3.0,,algonquin nursing home,,,,,73.0,2456,algonquin nursing home,long-term care home,Nursing and residential care facilities,Province of Ontario,,231,10 street,P0H1V0,mattawa,on,,Mattawa,3548021.0,35,46.313493,-78.719841


In [23]:
df_LTC_odhf.head()

Unnamed: 0,name,address,LHIN,licensee,management,home_type,short_stay,residents_council,family_council,accreditation,additional_info,city_x,postal_code_x,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders,cleaned_name,Confirmed Resident Cases,Resident Deaths,Confirmed Staff Cases,Status,number_beds,index,facility_name,source_facility_type,odhf_facility_type,provider,unit,street_no,street_name,postal_code_y,city_y,province,source_format_str_address,CSDname,CSDuid,Pruid,latitude,longitude
0,AFTON PARK PLACE LONG TERM CARE COMMUNITY,1200 Afton Drive,Erie St. Clair,S & R Nursing Homes Ltd.,,For-Profit,No,Yes,Yes,Yes,,Sarnia,N7S6L6,67.0,34.0,9.0,29.0,15.0,3.0,28.0,13.0,5.0,5.0,2.0,,afton park place long term care community,,,,,128.0,2435,afton park place long term care community,long-term care home,Nursing and residential care facilities,Province of Ontario,,1200,afton drive,N7S6L6,sarnia,on,,Sarnia,3538030.0,35,42.976325,-82.361444
1,"ALBRIGHT GARDENS HOMES, INCORPORATED",5050 Hillside Drive,Hamilton Niagara Haldimand Brant (Hnhb),"Albright Gardens Homes, Incorporated",,Non-Profit,No,Yes,Yes,No,,Beamsville,L0R1B2,39.0,25.0,15.0,18.0,10.0,6.0,12.0,7.0,6.0,7.0,7.0,3.0,albright gardens,<5,0.0,0.0,Active,231.0,2439,albright gardens,long-term care home,Nursing and residential care facilities,Province of Ontario,,5050,hillside drive,L0R1B2,lincoln,on,,Lincoln,3526057.0,35,43.151877,-79.476598
2,ALEXANDER PLACE,329 Parkside Drive P. O. Box 50,Hamilton Niagara Haldimand Brant (Hnhb),Waterdown Long Term Care Centre Inc.,,For-Profit,Yes,Yes,Yes,Yes,,Waterdown,L0R2H0,28.0,17.0,8.0,11.0,6.0,3.0,8.0,5.0,2.0,4.0,4.0,2.0,alexander place,<5,0.0,0.0,Active,128.0,2442,alexander place,long-term care home,Nursing and residential care facilities,Province of Ontario,,329,parkside drive,L0R2H0,hamilton,on,,Hamilton,3525005.0,35,43.342345,-79.90082
3,ALGOMA MANOR NURSING HOME,145 Dawson Street,North East,Algoma Manor Nursing Home,,,Yes,Yes,Yes,Yes,,Thessalon,P0R1L0,23.0,14.0,7.0,7.0,5.0,2.0,3.0,2.0,1.0,6.0,3.0,1.0,algoma manor nursing home,,,,,96.0,2449,algoma manor nursing home,long-term care home,Nursing and residential care facilities,Province of Ontario,,145,dawson street,P0R1L0,thessalon,on,,Thessalon,3557028.0,35,46.263462,-83.563249
4,ALGONQUIN NURSING HOME,207 Turcotte Park Road P.O. Box 270,North East,Algonquin Nursing Home Of Mattawa,Hôpital De Mattawa Hospital,Non-Profit,Yes,Yes,Yes,Yes,,Mattawa,P0H1V0,23.0,14.0,5.0,3.0,1.0,,11.0,7.0,4.0,5.0,3.0,,algonquin nursing home,,,,,73.0,2456,algonquin nursing home,long-term care home,Nursing and residential care facilities,Province of Ontario,,231,10 street,P0H1V0,mattawa,on,,Mattawa,3548021.0,35,46.313493,-78.719841


In [24]:
list_col = ['licensee', 'management', 'index', 'facility_name', 'source_facility_type', 
            'odhf_facility_type', 'provider', 'unit', 'street_no', 'street_name',
            'postal_code_y','city_y', 'province', 'source_format_str_address', 'Pruid']
df_LTC_odhf = df_LTC_odhf.drop(list_col, axis = 1)
df_LTC_odhf = df_LTC_odhf.rename(columns = {'city_x':'city', 'postal_code_x': 'postal_code'})
df_LTC_odhf.head()
df_LTC_odhf.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 625 entries, 0 to 624
Data columns (total 33 columns):
name                        625 non-null object
address                     625 non-null object
LHIN                        625 non-null object
home_type                   624 non-null object
short_stay                  625 non-null object
residents_council           625 non-null object
family_council              625 non-null object
accreditation               625 non-null object
additional_info             4 non-null object
city                        625 non-null object
postal_code                 625 non-null object
total_inspections           625 non-null float64
5y_inspections              625 non-null float64
2y_inspections              625 non-null float64
total_complaints            621 non-null float64
5y_complaints               588 non-null float64
2y_complaints               536 non-null float64
total_critical              620 non-null float64
5y_critical               

In [25]:
# df_LTC_odhf['home_type'].value_counts()
# df_LTC_odhf['family_council'].value_counts()
# df_LTC_odhf['accreditation'].value_counts()
# df_LTC_odhf.cleaned_name.str.contains("hospital", na=False).value_counts()
# df_LTC_odhf[df_LTC_odhf['cleaned_name'].str.contains('hospital', regex=False, na=False)]

## Review LTC homes with missing values 

In [26]:
# Assess home with missing home_type
df_LTC_odhf[df_LTC_odhf['home_type'].isnull()] # algoma manor nursing home
# df_trim['home type'].value_counts()

Unnamed: 0,name,address,LHIN,home_type,short_stay,residents_council,family_council,accreditation,additional_info,city,postal_code,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders,cleaned_name,Confirmed Resident Cases,Resident Deaths,Confirmed Staff Cases,Status,number_beds,CSDname,CSDuid,latitude,longitude
3,ALGOMA MANOR NURSING HOME,145 Dawson Street,North East,,Yes,Yes,Yes,Yes,,Thessalon,P0R1L0,23.0,14.0,7.0,7.0,5.0,2.0,3.0,2.0,1.0,6.0,3.0,1.0,algoma manor nursing home,,,,,96.0,Thessalon,3557028.0,46.263462,-83.563249


In [27]:
# Fill in some of the missing values for lennox and addington county general hospital manually
# This home was removed from the general LTC database previously
# df_trim.at[311,'LHIN'] = 'South East'
# df_trim.at[311,'home type'] = 'Non-Profit'
# df_trim.at[311,'short stay'] = 'Yes'
# df_trim.at[311,'number_beds'] = '22'

# Fill in missing home type value for algoma manor
df_LTC_odhf.at[3, 'home_type'] = 'Non-Profit'

In [28]:
df_LTC_odhf.loc[df_LTC_odhf['cleaned_name'] == 'algoma manor nursing home']


Unnamed: 0,name,address,LHIN,home_type,short_stay,residents_council,family_council,accreditation,additional_info,city,postal_code,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders,cleaned_name,Confirmed Resident Cases,Resident Deaths,Confirmed Staff Cases,Status,number_beds,CSDname,CSDuid,latitude,longitude
3,ALGOMA MANOR NURSING HOME,145 Dawson Street,North East,Non-Profit,Yes,Yes,Yes,Yes,,Thessalon,P0R1L0,23.0,14.0,7.0,7.0,5.0,2.0,3.0,2.0,1.0,6.0,3.0,1.0,algoma manor nursing home,,,,,96.0,Thessalon,3557028.0,46.263462,-83.563249


Algoma manor nursing home: missing home type, https://www.northeasthealthline.ca/displayservice.aspx?id=90025

In [30]:
df_LTC_odhf.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 625 entries, 0 to 624
Data columns (total 33 columns):
name                        625 non-null object
address                     625 non-null object
LHIN                        625 non-null object
home_type                   625 non-null object
short_stay                  625 non-null object
residents_council           625 non-null object
family_council              625 non-null object
accreditation               625 non-null object
additional_info             4 non-null object
city                        625 non-null object
postal_code                 625 non-null object
total_inspections           625 non-null float64
5y_inspections              625 non-null float64
2y_inspections              625 non-null float64
total_complaints            621 non-null float64
5y_complaints               588 non-null float64
2y_complaints               536 non-null float64
total_critical              620 non-null float64
5y_critical               

## Export merged file


In [31]:
df_LTC_odhf.to_csv(r'merged_LTC_odhf.csv', index = False)