In [1]:
import pandas as pd

## Clean LTC data

In [2]:
# convert name and additional info columns to lowercase
df_ngan = pd.read_csv('df_final_ngan.csv')
df_ngan['cleaned_name'] = df_ngan['name'].str.lower()
df_ngan['additional_info'] = df_ngan['additional_info'].str.lower()

# adjust home names to match LTC COVID data
df_ngan["cleaned_name"].replace({"vision '74 inc.": "vision nursing home", "finlandia hoivakoti nursing home limited": "finlandia hoivakoti nursing home", "pinecrest nursing home - bobcaygeon": "pinecrest nursing home (bobcaygeon)", "residence saint- louis": "residence saint-louis", "st patrick's home": "st. patrick's home"}, inplace=True)
df_ngan.head()

Unnamed: 0,name,address,city and postal code,LHIN,licensee,management,home type,beds,short stay,residents council,...,total_inspections,quartiles_total_range,quartiles_total_rank,5y_inspections,quartiles_5y_range,quartiles_5y_rank,2y_inspections,quartiles_2y_range,quartiles_2y_rank,cleaned_name
0,AFTON PARK PLACE LONG TERM CARE COMMUNITY,1200 Afton Drive,"Sarnia, N7S6L6",Erie St. Clair,S & R Nursing Homes Ltd.,,For-Profit,Home with approximately 128 beds,No,Yes,...,67.0,"(46.0, 172.0]",3.0,34.0,"(23.0, 86.0]",3.0,9.0,"(7.0, 11.0]",2.0,afton park place long term care community
1,"ALBRIGHT GARDENS HOMES, INCORPORATED",5050 Hillside Drive,"Beamsville, L0R1B2",Hamilton Niagara Haldimand Brant (Hnhb),"Albright Gardens Homes, Incorporated",,Non-Profit,Home with approximately 231 beds,No,Yes,...,39.0,"(31.0, 46.0]",2.0,25.0,"(23.0, 86.0]",3.0,15.0,"(11.0, 44.0]",3.0,"albright gardens homes, incorporated"
2,ALEXANDER PLACE,329 Parkside Drive P. O. Box 50,"Waterdown, L0R2H0",Hamilton Niagara Haldimand Brant (Hnhb),Waterdown Long Term Care Centre Inc.,,For-Profit,Home with approximately 128 beds,Yes,Yes,...,28.0,"(21.0, 31.0]",1.0,17.0,"(16.0, 23.0]",2.0,8.0,"(7.0, 11.0]",2.0,alexander place
3,ALGOMA DISTRICT HOMES FOR THE AGED (ALGOMA MANOR),135 Dawson Street,"Thessalon, P0R1L0",North East,Board Of Management For The District Of Algoma,,,Home with approximately 108 beds,Yes,Yes,...,5.0,"(0.0, 21.0]",0.0,,,,,,,algoma district homes for the aged (algoma manor)
4,ALGOMA MANOR NURSING HOME,145 Dawson Street,"Thessalon, P0R1L0",North East,Algoma Manor Nursing Home,,,Home with approximately 96 beds,Yes,Yes,...,23.0,"(21.0, 31.0]",1.0,14.0,"(11.0, 16.0]",1.0,7.0,"(5.0, 7.0]",1.0,algoma manor nursing home


## Review records of homes that have been closed

In [3]:
# we see that a 20 homes are closed, and one home was merged
df_ngan['additional_info'].value_counts()
df_ngan.additional_info.str.contains("closed", na=False).value_counts()

False    631
True      20
Name: additional_info, dtype: int64

In [4]:
# drop rows with homes that are closed
df_ngan = df_ngan[~df_ngan.additional_info.str.contains("closed", na=False)].reset_index()
df_ngan['additional_info'].value_counts()

recipient no 346524 – sarsfield colonial home\r\ncomment - change in ownership effective february 19 2020. details transferred to recipient no 664183/ facility no nh1692\r\n    1
pavillon omer deslauriers, 37 beds, long term care services provided in french and english.                                                                                      1
datars bere, dearness home managing director                                                                                                                                     1
stutti@craigwielgardens.on.ca                                                                                                                                                    1
home was merged with mount hope long-term care centre effective 1st january, 2016. please see mount hope long-term care centre for information.                                  1
Name: additional_info, dtype: int64

## Manual Review of one home that was merged in Jan 2016

- Address to the merged home is just a general PO BOX to the licensee
- Will remove the merged home from the database

In [5]:
df_ngan[df_ngan['additional_info'].str.contains('hope', regex=False, na=False)]

Unnamed: 0,index,name,address,city and postal code,LHIN,licensee,management,home type,beds,short stay,...,total_inspections,quartiles_total_range,quartiles_total_rank,5y_inspections,quartiles_5y_range,quartiles_5y_rank,2y_inspections,quartiles_2y_range,quartiles_2y_rank,cleaned_name
487,504,"ST. JOSEPH'S HEALTH CARE, LONDON - MOUNT HOPE ...",200 College Avenue P.O. Box 5777,"London, N6A1Y1",South West,"St. Joseph's Health Care, London",,Non-Profit,Home with approximately 217 beds,Yes,...,68.0,"(46.0, 172.0]",3.0,17.0,"(16.0, 23.0]",2.0,,,,"st. joseph's health care, london - mount hope ..."


In [6]:
# show rows with information on the 2 homes that were merged
df_ngan[df_ngan['cleaned_name'].str.contains('mount hope', regex=False, na=False)]

Unnamed: 0,index,name,address,city and postal code,LHIN,licensee,management,home type,beds,short stay,...,total_inspections,quartiles_total_range,quartiles_total_rank,5y_inspections,quartiles_5y_range,quartiles_5y_rank,2y_inspections,quartiles_2y_range,quartiles_2y_rank,cleaned_name
360,373,MOUNT HOPE CENTRE FOR LONG TERM CARE,21 Grosvenor Street P.O. Box 5777,"London, N6A1Y6",South West,"St. Joseph's Health Care, London",,Non-Profit,Home with approximately 177 beds,Yes,...,87.0,"(46.0, 172.0]",3.0,44.0,"(23.0, 86.0]",3.0,15.0,"(11.0, 44.0]",3.0,mount hope centre for long term care
487,504,"ST. JOSEPH'S HEALTH CARE, LONDON - MOUNT HOPE ...",200 College Avenue P.O. Box 5777,"London, N6A1Y1",South West,"St. Joseph's Health Care, London",,Non-Profit,Home with approximately 217 beds,Yes,...,68.0,"(46.0, 172.0]",3.0,17.0,"(16.0, 23.0]",2.0,,,,"st. joseph's health care, london - mount hope ..."


In [7]:
df_ngan.iloc[487]

index                                                                  504
name                     ST. JOSEPH'S HEALTH CARE, LONDON - MOUNT HOPE ...
address                                   200 College Avenue P.O. Box 5777
city and postal code                                        London, N6A1Y1
LHIN                                                            South West
licensee                                  St. Joseph's Health Care, London
management                                                             NaN
home type                                                       Non-Profit
beds                                      Home with approximately 217 beds
short stay                                                             Yes
residents council                                                      Yes
family council                                                         Yes
accreditation                                                          Yes
additional_info          

In [8]:
# drop row with the merged home
df_ngan = df_ngan.drop(df_ngan.index[487])
# check to see it is gone
df_ngan[df_ngan['cleaned_name'].str.contains('mount hope', regex=False, na=False)]

Unnamed: 0,index,name,address,city and postal code,LHIN,licensee,management,home type,beds,short stay,...,total_inspections,quartiles_total_range,quartiles_total_rank,5y_inspections,quartiles_5y_range,quartiles_5y_rank,2y_inspections,quartiles_2y_range,quartiles_2y_rank,cleaned_name
360,373,MOUNT HOPE CENTRE FOR LONG TERM CARE,21 Grosvenor Street P.O. Box 5777,"London, N6A1Y6",South West,"St. Joseph's Health Care, London",,Non-Profit,Home with approximately 177 beds,Yes,...,87.0,"(46.0, 172.0]",3.0,44.0,"(23.0, 86.0]",3.0,15.0,"(11.0, 44.0]",3.0,mount hope centre for long term care


## Merge LTC and LTC covid data

In [9]:
df_kt = pd.read_csv('merged_ltc.csv')
df_kt['cleaned_name'] = df_kt['LTC Home'].str.lower()
df_kt.head()

Unnamed: 0.1,Unnamed: 0,LTC Home,City,Beds,Confirmed Resident Cases,Resident Deaths,Confirmed Staff Cases,Status,cleaned_name
0,0,Almonte Country Haven,Almonte,82,<5,28,13,Active,almonte country haven
1,1,Altamont Care Community,Scarborough,159,72,46,60,Active,altamont care community
2,2,Anson Place Care Centre,Hagersville,61,28,23,29,Active,anson place care centre
3,3,Arbour Creek Long-Term Care Centre,Hamilton,129,0,0,<5,Active,arbour creek long-term care centre
4,4,Avalon Retirement Centre,Orangeville,137,0,0,<5,Active,avalon retirement centre


In [10]:
a = set(df_ngan['cleaned_name'])
b = set(df_kt['cleaned_name'])
def returnNotMatches(a, b):
    return [[x for x in b if x not in a]]
returnNotMatches(a, b)

[[]]

In [11]:
df_merged = pd.merge(left=df_ngan, right=df_kt, how='outer', left_on='cleaned_name', right_on='cleaned_name')

In [12]:
df_merged.to_csv(r'df_ltc_final.csv', index = False)

## Import and prep ODHF data

In [13]:
odhf = pd.read_csv('odhf_v1.csv', engine='python')
df_on = odhf[odhf['province'].isin(['on'])].reset_index()

import unicodedata
df_on['cleaned_name'] = df_on['facility_name'].str.lower().apply(lambda val: unicodedata.normalize('NFKD', val).encode('ascii', 'ignore').decode())
df_on.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3733 entries, 0 to 3732
Data columns (total 19 columns):
level_0                      3733 non-null int64
index                        3733 non-null int64
facility_name                3733 non-null object
source_facility_type         3705 non-null object
odhf_facility_type           3728 non-null object
provider                     3733 non-null object
unit                         0 non-null object
street_no                    3122 non-null object
street_name                  3154 non-null object
postal_code                  3726 non-null object
city                         3724 non-null object
province                     3733 non-null object
source_format_str_address    271 non-null object
CSDname                      3540 non-null object
CSDuid                       3151 non-null float64
Pruid                        3733 non-null int64
latitude                     3159 non-null float64
longitude                    3159 non-null float

In [14]:
pd.set_option('display.max_colwidth', -1)
df_on[df_on['cleaned_name'].str.contains('health care london', regex=False, na=False)]

Unnamed: 0,level_0,index,facility_name,source_facility_type,odhf_facility_type,provider,unit,street_no,street_name,postal_code,city,province,source_format_str_address,CSDname,CSDuid,Pruid,latitude,longitude,cleaned_name
2315,4715,4716,parkwood hospital pain & symptom management - st. joseph's health care london,community support service,Ambulatory health care services,Province of Ontario,,800.0,commissioners road e,N6A4G5,london,on,,London,3539036.0,35,42.960038,-81.225435,parkwood hospital pain & symptom management - st. joseph's health care london
3022,5422,5423,st. joseph's health care london,mental health and addiction organization,Nursing and residential care facilities,Province of Ontario,,294.0,talbot street,N5P4E3,saint thomas,on,,St. Thomas,3534021.0,35,42.77878,-81.200235,st. joseph's health care london
3023,5423,5424,st. joseph's health care london - parkwood,hospital - site,Hospitals,Province of Ontario,,550.0,wellington road,N6C0A7,london,on,,London,3539036.0,35,42.955419,-81.225272,st. joseph's health care london - parkwood
3024,5424,5425,st. joseph's health care london - southwest centre for forensic mental health care,hospital - site,Hospitals,Province of Ontario,,401.0,sunset drive,N5R3C6,st thomas,on,,Central Elgin,3534020.0,35,42.74212,-81.190465,st. joseph's health care london - southwest centre for forensic mental health care
3025,5425,5426,st. joseph's health care london - st. joseph's hospital,hospital - corporation,Hospitals,Province of Ontario,,268.0,grosvenor street,N6A4V2,london,on,,London,3539036.0,35,43.000915,-81.254831,st. joseph's health care london - st. joseph's hospital
3049,5449,5450,st. joseph�s health care london � mount hope centre for long term care,long term care,Nursing and residential care facilities,Canadian Institute for Health Information,,,,N6A1Y6,london,on,,London,,35,,,st. josephs health care london mount hope centre for long term care
3050,5450,5451,st. joseph�s health care london � parkwood institute,"active acute hospital, long term care",Hospitals,Canadian Institute for Health Information,,,,N6C0A7,london,on,,London,,35,,,st. josephs health care london parkwood institute
3332,5732,5733,third age outreach program - st. joseph's health care london,community support service,Ambulatory health care services,Province of Ontario,,268.0,grosvenor street,N6A4V2,london,on,,London,3539036.0,35,43.001268,-81.255036,third age outreach program - st. joseph's health care london


In [15]:
home = df_on.iloc[[3049]]
home['cleaned_name']

3049    st. josephs health care london  mount hope centre for long term care
Name: cleaned_name, dtype: object

## List of homes in the LTC database that are not listed in the ODHF

In [16]:
a = set(df_on['cleaned_name'])
b = set(df_merged['cleaned_name'])
def returnNotMatches(a, b):
    return [x for x in b if x not in a]
missing_odhf = returnNotMatches(a, b)

In [17]:
len(missing_odhf)


24

In [18]:
missing_odhf.sort()
missing_odhf

['albright gardens homes, incorporated',
 'bella senior care residences inc.',
 'bon air long term care residence',
 'caressant care cobden',
 'caressant care harriston',
 'champlain long term care residence',
 'heartwood (fka versa-care cornwall)',
 'lady isabelle nursing home',
 'lancaster long term care residence',
 'malden park continuing care centre',
 'niagara long term care residence',
 'north renfrew long-term care services',
 'north shore health network – eldcap unit',
 'north shore health network – ltc unit',
 'people care centre',
 'residence saint-louis',
 'santé manitouwadge health',
 "st. joseph's health centre, guelph",
 "st. joseph's mother house (martha wing)",
 "st. joseph's villa, dundas",
 'strathcona long term care',
 'the meadows',
 'william a. "bill" george extended care facility',
 'élisabeth-bruyère residence']

## Replace cleaned names in the LTC and ODHF df

In [19]:
df_merged_names = df_merged['cleaned_name'].replace({
    'albright gardens homes, incorporated' : 'albright gardens',
    'bella senior care residences inc.':'bella senior care residences',
    'bon air long term care residence':'chartwell bon air long term care residence',
    'caressant care cobden':'caressant care - cobden',
    'caressant care harriston':'caressant care - harriston',
    'champlain long term care residence':'chartwell champlain long term care residence',
    'heartwood (fka versa-care cornwall)':'heartwood',
# lady isabelle MISSING, SEE NOTE 
    'lancaster long term care residence':'chartwell lancaster long term care residence',
# malden MISSING, SEE NOTE 
    'niagara long term care residence':'chartwell niagara long term care residence',
    'north renfrew long-term care services':'north renfrew long-term care services inc.',
    'north shore health network – eldcap unit':'north shore health network - eldcap unit' ,
    'north shore health network – ltc unit' : 'north shore health network - ltc unit',
    'people care centre':'peoplecare  stratford',
# residence saint-louis, OK
    'santé manitouwadge health':'sante manitouwadge health',
    'st. joseph\'s health centre, guelph':'st. joseph\'s health centre - guelph',
    'st. joseph\'s mother house (martha wing)':'st. josephs motherhouse martha wing',
    'st. joseph\'s villa, dundas' : "st. joseph's villa (dundas)",
# strathcona long term care, MISSING SEE NOTE
    'the meadows':'revera inc.  the meadows long term care centre',
    'william a. "bill" george extended care facility': 'william a. \'bill\' george extended care facility',
    'élisabeth-bruyère residence':'elisabeth-bruyere residence'
})

df_on_names = df_on['cleaned_name'].replace({'lisabeth-bruyre residence': 'elisabeth-bruyere residence',
                                             'rsidence saint-louis' : 'residence saint-louis'})

In [20]:
a = set(df_on_names)
b = set(df_merged_names)
def returnNotMatches(a, b):
    return [x for x in b if x not in a]
missing_odhf_names = returnNotMatches(a, b)
print(len(missing_odhf_names))
missing_odhf_names.sort()
missing_odhf_names

3


['lady isabelle nursing home',
 'malden park continuing care centre',
 'strathcona long term care']

## Manual review of discrepancies between LTC data and ODHF
### Missing from ODHF
1. lady isabelle nursing home: http://www.ladyisabelle.ca/Contact_Information.html, MISSING from ODHF
2. MALDEN PARK CONTINUING CARE CENTRE: seems to be attached to Windsor Regional Hospital which is in ODHF if we need the geolocation data
3. Strathcona Long Term Care, https://svch.ca/long-term-care/strathcona/location/, in ODHF have saugeen valley nursing center which is at the same address, https://svch.ca/saugeen-valley-nursing-centre-announces-plan-to-redevelop-87-bed-facility-in-mount-forest/#.Xr2rvRNKjfY

### Other notes:
- people care centre: listed in ODHF but no geodata, https://www.peoplecare.ca/
- sante manitouwadge health: seems to be attached to manitouwadge general hospital