In [168]:
import pandas as pd
import numpy as np

# Set options to show dfs in full
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)

## Clean LTC data

In [132]:
# Convert name and additional info columns to lowercase
df_genLTC = pd.read_csv('df_final_ngan.csv')
df_genLTC['cleaned_name'] = df_genLTC['name'].str.lower()
df_genLTC['additional_info'] = df_genLTC['additional_info'].str.lower()

# Adjust names from general LTC database to match names from LTC COVID database
df_genLTC["cleaned_name"].replace({"vision '74 inc.": "vision nursing home", "finlandia hoivakoti nursing home limited": "finlandia hoivakoti nursing home", "pinecrest nursing home - bobcaygeon": "pinecrest nursing home (bobcaygeon)", "residence saint- louis": "residence saint-louis", "st patrick's home": "st. patrick's home"}, inplace=True)
# df_genLTC.head()

## Review records of homes that have been closed

In [133]:
# We see that 20 homes are closed, and one home was merged
df_genLTC['additional_info'].value_counts()
df_genLTC.additional_info.str.contains("closed", na=False).value_counts()

False    631
True     20 
Name: additional_info, dtype: int64

In [134]:
# Drop rows with homes that are closed
df_genLTC = df_genLTC[~df_genLTC.additional_info.str.contains("closed", na=False)].reset_index()

# review other additional info, there are 5 homes with additional info
df_genLTC.loc[df_genLTC['additional_info'].notnull()]


Unnamed: 0,index,name,address,city and postal code,LHIN,licensee,management,home type,beds,short stay,residents council,family council,accreditation,additional_info,city,postal code,number of beds,total_inspections,quartiles_total_range,quartiles_total_rank,5y_inspections,quartiles_5y_range,quartiles_5y_rank,2y_inspections,quartiles_2y_range,quartiles_2y_rank,cleaned_name
25,26,BENDALE ACRES,2920 Lawrence Avenue East,"Scarborough, M1P2T8",Central East,Toronto Long-term Care Homes And Services,,Municipal,Home with approximately 302 beds,Yes,Yes,Yes,Yes,"pavillon omer deslauriers, 37 beds, long term care services provided in french and english.",Scarborough,M1P2T8,302.0,35.0,"(31.0, 46.0]",2.0,17.0,"(16.0, 23.0]",2.0,10.0,"(7.0, 11.0]",2.0,bendale acres
114,118,CRAIGHOLME,221 Main Street R. R. #1,"Ailsa Craig, N0M1A0",South West,Craigwiel Gardens,,Non-Profit,Home with approximately 83 beds,Yes,Yes,Yes,No,stutti@craigwielgardens.on.ca,Ailsa Craig,N0M1A0,83.0,51.0,"(46.0, 172.0]",3.0,21.0,"(16.0, 23.0]",2.0,10.0,"(7.0, 11.0]",2.0,craigholme
121,126,DEARNESS HOME FOR SENIOR CITIZENS,710 Southdale Road East,"London, N6E1R8",South West,The Corporation Of The City Of London,,Municipal,Home with approximately 243 beds,Yes,Yes,Yes,Yes,"datars bere, dearness home managing director",London,N6E1R8,243.0,76.0,"(46.0, 172.0]",3.0,33.0,"(23.0, 86.0]",3.0,16.0,"(11.0, 44.0]",3.0,dearness home for senior citizens
453,469,SARSFIELD COLONIAL HOME,2861 Colonial Road P.O. Box 130,"Sarsfield, K0A3E0",Champlain,2629693 Ontario Inc.,Taminagi Inc. (As Manager),For-Profit,Home with approximately 46 beds,No,Yes,Yes,Yes,recipient no 346524 – sarsfield colonial home\r\ncomment - change in ownership effective february 19 2020. details transferred to recipient no 664183/ facility no nh1692\r\n,Sarsfield,K0A3E0,46.0,62.0,"(46.0, 172.0]",3.0,42.0,"(23.0, 86.0]",3.0,12.0,"(11.0, 44.0]",3.0,sarsfield colonial home
487,504,"ST. JOSEPH'S HEALTH CARE, LONDON - MOUNT HOPE CENTRE FOR LONG TERM CARE - MARIAN VILLA",200 College Avenue P.O. Box 5777,"London, N6A1Y1",South West,"St. Joseph's Health Care, London",,Non-Profit,Home with approximately 217 beds,Yes,Yes,Yes,Yes,"home was merged with mount hope long-term care centre effective 1st january, 2016. please see mount hope long-term care centre for information.",London,N6A1Y1,217.0,68.0,"(46.0, 172.0]",3.0,17.0,"(16.0, 23.0]",2.0,,,,"st. joseph's health care, london - mount hope centre for long term care - marian villa"


## Manual Review of one home that was merged in Jan 2016

- Address to the merged home is just a general PO BOX to the licensee
- Will remove the merged home from the database

In [135]:
# show rows with information on the 2 homes that were merged
df_genLTC[df_genLTC['cleaned_name'].str.contains('mount hope', regex=False, na=False)]

Unnamed: 0,index,name,address,city and postal code,LHIN,licensee,management,home type,beds,short stay,residents council,family council,accreditation,additional_info,city,postal code,number of beds,total_inspections,quartiles_total_range,quartiles_total_rank,5y_inspections,quartiles_5y_range,quartiles_5y_rank,2y_inspections,quartiles_2y_range,quartiles_2y_rank,cleaned_name
360,373,MOUNT HOPE CENTRE FOR LONG TERM CARE,21 Grosvenor Street P.O. Box 5777,"London, N6A1Y6",South West,"St. Joseph's Health Care, London",,Non-Profit,Home with approximately 177 beds,Yes,Yes,Yes,Yes,,London,N6A1Y6,177.0,87.0,"(46.0, 172.0]",3.0,44.0,"(23.0, 86.0]",3.0,15.0,"(11.0, 44.0]",3.0,mount hope centre for long term care
487,504,"ST. JOSEPH'S HEALTH CARE, LONDON - MOUNT HOPE CENTRE FOR LONG TERM CARE - MARIAN VILLA",200 College Avenue P.O. Box 5777,"London, N6A1Y1",South West,"St. Joseph's Health Care, London",,Non-Profit,Home with approximately 217 beds,Yes,Yes,Yes,Yes,"home was merged with mount hope long-term care centre effective 1st january, 2016. please see mount hope long-term care centre for information.",London,N6A1Y1,217.0,68.0,"(46.0, 172.0]",3.0,17.0,"(16.0, 23.0]",2.0,,,,"st. joseph's health care, london - mount hope centre for long term care - marian villa"


In [136]:
df_genLTC.iloc[487]

index                    504                                                                                                                                            
name                     ST. JOSEPH'S HEALTH CARE, LONDON - MOUNT HOPE CENTRE FOR LONG TERM CARE - MARIAN VILLA                                                         
address                  200 College Avenue P.O. Box 5777                                                                                                               
city and postal code     London, N6A1Y1                                                                                                                                 
LHIN                     South West                                                                                                                                     
licensee                 St. Joseph's Health Care, London                                                                                                  

In [137]:
# drop row with the merged home
df_genLTC = df_genLTC.drop(df_genLTC.index[487])

# check to see it is gone
df_genLTC[df_genLTC['cleaned_name'].str.contains('mount hope', regex=False, na=False)]

Unnamed: 0,index,name,address,city and postal code,LHIN,licensee,management,home type,beds,short stay,residents council,family council,accreditation,additional_info,city,postal code,number of beds,total_inspections,quartiles_total_range,quartiles_total_rank,5y_inspections,quartiles_5y_range,quartiles_5y_rank,2y_inspections,quartiles_2y_range,quartiles_2y_rank,cleaned_name
360,373,MOUNT HOPE CENTRE FOR LONG TERM CARE,21 Grosvenor Street P.O. Box 5777,"London, N6A1Y6",South West,"St. Joseph's Health Care, London",,Non-Profit,Home with approximately 177 beds,Yes,Yes,Yes,Yes,,London,N6A1Y6,177.0,87.0,"(46.0, 172.0]",3.0,44.0,"(23.0, 86.0]",3.0,15.0,"(11.0, 44.0]",3.0,mount hope centre for long term care


## Merge general LTC and covid LTC data

In [138]:
df_covidLTC = pd.read_csv('merged_ltc.csv')
df_covidLTC['cleaned_name'] = df_covidLTC['LTC Home'].str.lower()
# df_covidLTC.head()
# df_covidLTC.info()

In [139]:
# check that homes from COVID database are present in the general LTC homes database
a = set(df_genLTC['cleaned_name'])
b = set(df_covidLTC['cleaned_name'])
def returnNotMatches(a, b):
    return [[x for x in b if x not in a]]
returnNotMatches(a, b)

[[]]

In [140]:
# merge the LTC database with the LTC COVID database
df_merged = pd.merge(left=df_genLTC, right=df_covidLTC, how='outer', left_on='cleaned_name', right_on='cleaned_name')

In [141]:
df_merged.to_csv(r'merge_genltc_covidltc_FINAL.csv', index = False)

## Import and prep ODHF data

In [142]:
# Import Open Database of Healthcare Facilities csv
odhf = pd.read_csv('odhf_v1.csv', engine='python')

# Filter for facilities in ON
df_on = odhf[odhf['province'].isin(['on'])].reset_index()

# Clean facilities name
import unicodedata
df_on['cleaned_name'] = df_on['facility_name'].str.lower().apply(lambda val: unicodedata.normalize('NFKD', val).encode('ascii', 'ignore').decode())
# df_on.info()

In [143]:
# df_on[df_on['cleaned_name'].str.contains('health care london', regex=False, na=False)]

In [144]:
# home = df_on.iloc[[3049]]
# home['cleaned_name']

## List of homes in the LTC database that are not listed in the ODHF

In [145]:
a = set(df_on['cleaned_name'])
b = set(df_merged['cleaned_name'])
def returnNotMatches(a, b):
    return [x for x in b if x not in a]
missing_odhf = returnNotMatches(a, b)

In [146]:
len(missing_odhf)


24

In [147]:
missing_odhf.sort()
missing_odhf

['albright gardens homes, incorporated',
 'bella senior care residences inc.',
 'bon air long term care residence',
 'caressant care cobden',
 'caressant care harriston',
 'champlain long term care residence',
 'heartwood (fka versa-care cornwall)',
 'lady isabelle nursing home',
 'lancaster long term care residence',
 'malden park continuing care centre',
 'niagara long term care residence',
 'north renfrew long-term care services',
 'north shore health network – eldcap unit',
 'north shore health network – ltc unit',
 'people care centre',
 'residence saint-louis',
 'santé manitouwadge health',
 "st. joseph's health centre, guelph",
 "st. joseph's mother house (martha wing)",
 "st. joseph's villa, dundas",
 'strathcona long term care',
 'the meadows',
 'william a. "bill" george extended care facility',
 'élisabeth-bruyère residence']

## Replace cleaned names in the LTC and ODHF df

In [148]:
df_merged_names = df_merged['cleaned_name'].replace({
    'albright gardens homes, incorporated' : 'albright gardens',
    'bella senior care residences inc.':'bella senior care residences',
    'bon air long term care residence':'chartwell bon air long term care residence',
    'caressant care cobden':'caressant care - cobden',
    'caressant care harriston':'caressant care - harriston',
    'champlain long term care residence':'chartwell champlain long term care residence',
    'heartwood (fka versa-care cornwall)':'heartwood',
# lady isabelle MISSING, SEE NOTE 
    'lancaster long term care residence':'chartwell lancaster long term care residence',
# malden MISSING, SEE NOTE 
    'niagara long term care residence':'chartwell niagara long term care residence',
    'north renfrew long-term care services':'north renfrew long-term care services inc.',
    'north shore health network – eldcap unit':'north shore health network - eldcap unit' ,
    'north shore health network – ltc unit' : 'north shore health network - ltc unit',
    'people care centre':'peoplecare  stratford',
# residence saint-louis, OK
    'santé manitouwadge health':'sante manitouwadge health',
    'st. joseph\'s health centre, guelph':'st. joseph\'s health centre - guelph',
    'st. joseph\'s mother house (martha wing)':'st. josephs motherhouse martha wing',
    'st. joseph\'s villa, dundas' : "st. joseph's villa (dundas)",
# strathcona long term care, MISSING SEE NOTE
    'the meadows':'revera inc.  the meadows long term care centre',
    'william a. "bill" george extended care facility': 'william a. \'bill\' george extended care facility',
    'élisabeth-bruyère residence':'elisabeth-bruyere residence'
})

df_on_names = df_on['cleaned_name'].replace({'lisabeth-bruyre residence': 'elisabeth-bruyere residence',
                                             'rsidence saint-louis' : 'residence saint-louis'})

In [149]:
a = set(df_on_names)
b = set(df_merged_names)
def returnNotMatches(a, b):
    return [x for x in b if x not in a]
missing_odhf_names = returnNotMatches(a, b)
print(len(missing_odhf_names))
missing_odhf_names.sort()
missing_odhf_names

3


['lady isabelle nursing home',
 'malden park continuing care centre',
 'strathcona long term care']

## Manual review of discrepancies between LTC data and ODHF
### Missing from ODHF
1. lady isabelle nursing home: http://www.ladyisabelle.ca/Contact_Information.html, MISSING from ODHF, missing 2y_inspections, according to web search this home was closed by the ministry in 2014, in april 2020 there are plans to reopen/redevelop a LTC home here, will remove from this analysis
2. MALDEN PARK CONTINUING CARE CENTRE: seems to be closed, last inspection 2010, telephone disconnected
3. Strathcona Long Term Care, https://svch.ca/long-term-care/strathcona/location/, has replaced saugeen valley nursing center in ODHF which is at the same address, https://svch.ca/saugeen-valley-nursing-centre-announces-plan-to-redevelop-87-bed-facility-in-mount-forest/#.Xr2rvRNKjfY

### Other notes:
- people care centre stratford: listed in ODHF but no geodata, https://www.peoplecare.ca/, closed after flood in 2015 per websearch, tel number does not work, missing 5y_inspections
- sante manitouwadge health: called and this is part of the hospital with 9 LTC beds

### Therefore we will remove from LTC database
- Malden LTC
- people care stratford
- lady isabelle
- St. Joseph's martha wing since there are no predictors 

### We will change Saugeen Valley to Strathcona in ODHF before merging the LTC data with ODHF

In [150]:
# Show information/index on homes to remove
df_merged[df_merged['cleaned_name'].str.contains('malden', regex=False, na=False)]
df_merged[df_merged['cleaned_name'].str.contains('isabelle', regex=False, na=False)]
df_merged[df_merged['cleaned_name'].str.contains('people care', regex=False, na=False)]
df_merged[df_merged['cleaned_name'].str.contains('martha', regex=False, na=False)]

df_merged.iloc[[321, 297, 397, 490]]

# Drop row with malden LTC from merged LTC database
df_merged = df_merged.drop(df_merged.index[[321, 297, 397, 490]])

# Replace saugeen with strathcona in ODHF
df_on['cleaned_name'].replace({'saugeen valley nursing center': 'strathcona long term care'})

# # # df_on[df_on['cleaned_name'].str.contains('saugeen', regex=False, na=False)]
# # # df_on.iloc[2721]



0       (georgian, trillium & sunset manors) corporation of the county of simcoe                   
1       147 elder street inc.                                                                      
2       2 spirited people of the 1st nations                                                       
3       3 c's reintroduction centre, the                                                           
4       rsidence st. franois                                                                       
5       a better way retirement home corp.                                                         
6       a-way express courier service                                                              
7       aamjiwnaag first nation chippewas of sarnia (css)                                          
8       aamjiwnaang first nation chippewas of sarnia                                               
9       abbeylawn manor retirement home                                                            


## Merge LTC data with ODHF

In [151]:
df_all = pd.merge(left=df_merged, right=df_on, 
                  how='left', left_on='cleaned_name', 
                  right_on='cleaned_name')
df_all.info()
df_all.head()


<class 'pandas.core.frame.DataFrame'>
Int64Index: 626 entries, 0 to 625
Data columns (total 53 columns):
index_x                      626 non-null int64
name                         626 non-null object
address                      626 non-null object
city and postal code         626 non-null object
LHIN                         625 non-null object
licensee                     625 non-null object
management                   71 non-null object
home type                    624 non-null object
beds                         625 non-null object
short stay                   625 non-null object
residents council            625 non-null object
family council               625 non-null object
accreditation                625 non-null object
additional_info              4 non-null object
city_x                       626 non-null object
postal code                  626 non-null object
number of beds               625 non-null float64
total_inspections            626 non-null float64
quartiles_total

Unnamed: 0.1,index_x,name,address,city and postal code,LHIN,licensee,management,home type,beds,short stay,residents council,family council,accreditation,additional_info,city_x,postal code,number of beds,total_inspections,quartiles_total_range,quartiles_total_rank,5y_inspections,quartiles_5y_range,quartiles_5y_rank,2y_inspections,quartiles_2y_range,quartiles_2y_rank,cleaned_name,Unnamed: 0,LTC Home,City,Beds,Confirmed Resident Cases,Resident Deaths,Confirmed Staff Cases,Status,level_0,index_y,facility_name,source_facility_type,odhf_facility_type,provider,unit,street_no,street_name,postal_code,city_y,province,source_format_str_address,CSDname,CSDuid,Pruid,latitude,longitude
0,0,AFTON PARK PLACE LONG TERM CARE COMMUNITY,1200 Afton Drive,"Sarnia, N7S6L6",Erie St. Clair,S & R Nursing Homes Ltd.,,For-Profit,Home with approximately 128 beds,No,Yes,Yes,Yes,,Sarnia,N7S6L6,128.0,67.0,"(46.0, 172.0]",3.0,34.0,"(23.0, 86.0]",3.0,9.0,"(7.0, 11.0]",2.0,afton park place long term care community,,,,,,,,,2434.0,2435.0,afton park place long term care community,long-term care home,Nursing and residential care facilities,Province of Ontario,,1200.0,afton drive,N7S6L6,sarnia,on,,Sarnia,3538030.0,35.0,42.976325,-82.361444
1,1,"ALBRIGHT GARDENS HOMES, INCORPORATED",5050 Hillside Drive,"Beamsville, L0R1B2",Hamilton Niagara Haldimand Brant (Hnhb),"Albright Gardens Homes, Incorporated",,Non-Profit,Home with approximately 231 beds,No,Yes,Yes,No,,Beamsville,L0R1B2,231.0,39.0,"(31.0, 46.0]",2.0,25.0,"(23.0, 86.0]",3.0,15.0,"(11.0, 44.0]",3.0,"albright gardens homes, incorporated",0.0,"Albright Gardens Homes, Incorporated",Beamsville,231.0,,0.0,,Inactive,,,,,,,,,,,,,,,,,,
2,2,ALEXANDER PLACE,329 Parkside Drive P. O. Box 50,"Waterdown, L0R2H0",Hamilton Niagara Haldimand Brant (Hnhb),Waterdown Long Term Care Centre Inc.,,For-Profit,Home with approximately 128 beds,Yes,Yes,Yes,Yes,,Waterdown,L0R2H0,128.0,28.0,"(21.0, 31.0]",1.0,17.0,"(16.0, 23.0]",2.0,8.0,"(7.0, 11.0]",2.0,alexander place,,,,,,,,,2441.0,2442.0,alexander place,long-term care home,Nursing and residential care facilities,Province of Ontario,,329.0,parkside drive,L0R2H0,hamilton,on,,Hamilton,3525005.0,35.0,43.342345,-79.90082
3,4,ALGOMA MANOR NURSING HOME,145 Dawson Street,"Thessalon, P0R1L0",North East,Algoma Manor Nursing Home,,,Home with approximately 96 beds,Yes,Yes,Yes,Yes,,Thessalon,P0R1L0,96.0,23.0,"(21.0, 31.0]",1.0,14.0,"(11.0, 16.0]",1.0,7.0,"(5.0, 7.0]",1.0,algoma manor nursing home,,,,,,,,,2448.0,2449.0,algoma manor nursing home,long-term care home,Nursing and residential care facilities,Province of Ontario,,145.0,dawson street,P0R1L0,thessalon,on,,Thessalon,3557028.0,35.0,46.263462,-83.563249
4,5,ALGONQUIN NURSING HOME,207 Turcotte Park Road P.O. Box 270,"Mattawa, P0H1V0",North East,Algonquin Nursing Home Of Mattawa,Hôpital De Mattawa Hospital,Non-Profit,Home with approximately 73 beds,Yes,Yes,Yes,Yes,,Mattawa,P0H1V0,73.0,23.0,"(21.0, 31.0]",1.0,14.0,"(11.0, 16.0]",1.0,5.0,"(0.0, 5.0]",0.0,algonquin nursing home,,,,,,,,,2455.0,2456.0,algonquin nursing home,long-term care home,Nursing and residential care facilities,Province of Ontario,,231.0,10 street,P0H1V0,mattawa,on,,Mattawa,3548021.0,35.0,46.313493,-78.719841


In [152]:
# Check that the number of beds from the general LTC database agree with number of beds from the COVID LTC database
df_all2 = df_all[df_all.Beds.notnull()]
# df_all2[['number of beds', 'Beds']]

# There are 19 homes where the number of beds in the 2 LTC databases do not agree
df_all2['bed_match'] = np.where((df_all2['number of beds'] == df_all2['Beds']), 'ok', 'no_match')
df_all2['bed_match'].value_counts()
# df_all2[['cleaned_name', 'number of beds', 'Beds']]





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


ok          225
no_match    19 
Name: bed_match, dtype: int64

In [153]:
# review homes where value for number of beds differ between the databases
df_all3 = df_all2[df_all2['bed_match'] == 'no_match']
df_all3['diff_beds'] = df_all3['number of beds'] - df_all3['Beds']
df_all3[['cleaned_name', 'number of beds', 'Beds', 'diff_beds']]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,cleaned_name,number of beds,Beds,diff_beds
51,camilla care community,237.0,236.0,1.0
79,centre d'accueil roger seguin,110.0,113.0,-3.0
89,chartwell pine grove long term care residence,101.0,96.0,5.0
95,chartwell westmount long term care residence,161.0,160.0,1.0
96,chartwell white eagle long term care residence,57.0,56.0,1.0
147,elmwood place,78.0,90.0,-12.0
183,extendicare timmins,121.0,180.0,-59.0
226,glen hill strathaven,199.0,210.0,-11.0
231,golden years nursing home,90.0,88.0,2.0
279,ioof seniors home,188.0,182.0,6.0


## Manual review of some beds numbers discrepancies
- For kensington village, st. patrick's home, mount hope centre for long term care	
    - websites support the COVID LTC values
- Therefore where there is a discrepancy in the number of beds, will take the number from the covid LTC database

In [154]:
# create a new column with number of beds as defined above
df_all['number_beds'] = np.nan
df_all['number_beds'] = np.where((df_all.Beds.isnull()), df_all['number of beds'], df_all['Beds'])
df_all.iloc[[620, 499]]

Unnamed: 0.1,index_x,name,address,city and postal code,LHIN,licensee,management,home type,beds,short stay,residents council,family council,accreditation,additional_info,city_x,postal code,number of beds,total_inspections,quartiles_total_range,quartiles_total_rank,5y_inspections,quartiles_5y_range,quartiles_5y_rank,2y_inspections,quartiles_2y_range,quartiles_2y_rank,cleaned_name,Unnamed: 0,LTC Home,City,Beds,Confirmed Resident Cases,Resident Deaths,Confirmed Staff Cases,Status,level_0,index_y,facility_name,source_facility_type,odhf_facility_type,provider,unit,street_no,street_name,postal_code,city_y,province,source_format_str_address,CSDname,CSDuid,Pruid,latitude,longitude,number_beds
620,645,YEE HONG CENTRE - MARKHAM,2780 Bur Oak Avenue,"Markham, L6B1C9",Central,Yee Hong Centre For Geriatric Care,,Non-Profit,Home with approximately 200 beds,Yes,Yes,Yes,Yes,,Markham,L6B1C9,200.0,12.0,"(0.0, 21.0]",0.0,8.0,"(0.0, 11.0]",0.0,3.0,"(0.0, 5.0]",0.0,yee hong centre - markham,62.0,Yee Hong Centre - Markham,Markham,200.0,,0,,Inactive,6108.0,6109.0,yee hong centre - markham,community support service,Ambulatory health care services,Province of Ontario,,2780,bur oak avenue,L6B1C9,markham,on,,Markham,3519036.0,35.0,43.894277,-79.231158,200.0
499,521,SUMAC LODGE,1464 Blackwell Road,"Sarnia, N7S5M4",Erie St. Clair,Revera Long Term Care Inc.,,For-Profit,Home with approximately 100 beds,No,Yes,Yes,Yes,,Sarnia,N7S5M4,100.0,60.0,"(46.0, 172.0]",3.0,25.0,"(23.0, 86.0]",3.0,10.0,"(7.0, 11.0]",2.0,sumac lodge,53.0,Sumac Lodge,Sarnia,100.0,,0,,Inactive,5501.0,5502.0,sumac lodge,long-term care home,Nursing and residential care facilities,Province of Ontario,,1464,blackwell road,N7S5M4,sarnia,on,,Sarnia,3538030.0,35.0,43.01032,-82.346716,100.0


In [155]:
list_col = ['index_x', 'name', 'city and postal code', 'licensee', 'management',
            'beds', 'number of beds', 'additional_info', 'Unnamed: 0', 'LTC Home',
            'City','Beds','level_0','index_y','facility_name', 'source_facility_type', 
            'odhf_facility_type', 'provider', 'unit', 'street_no', 'street_name',
            'postal_code','city_y', 'province', 'source_format_str_address', 'Pruid'
            ]
df_trim = df_all.drop(list_col, axis = 1)
df_trim = df_trim.rename(columns = {'city_x':'city'})
df_trim.head()
df_trim.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 626 entries, 0 to 625
Data columns (total 28 columns):
address                     626 non-null object
LHIN                        625 non-null object
home type                   624 non-null object
short stay                  625 non-null object
residents council           625 non-null object
family council              625 non-null object
accreditation               625 non-null object
city                        626 non-null object
postal code                 626 non-null object
total_inspections           626 non-null float64
quartiles_total_range       626 non-null object
quartiles_total_rank        626 non-null float64
5y_inspections              626 non-null float64
quartiles_5y_range          626 non-null object
quartiles_5y_rank           626 non-null float64
2y_inspections              626 non-null float64
quartiles_2y_range          626 non-null object
quartiles_2y_rank           626 non-null float64
cleaned_name             

In [156]:
df_trim['home type'].value_counts()
df_trim['home type'].value_counts()
df_trim['family council'].value_counts()
df_trim['accreditation'].value_counts()
df_trim.cleaned_name.str.contains("hospital", na=False).value_counts()
df_trim[df_trim['cleaned_name'].str.contains('hospital', regex=False, na=False)]

Unnamed: 0,address,LHIN,home type,short stay,residents council,family council,accreditation,city,postal code,total_inspections,quartiles_total_range,quartiles_total_rank,5y_inspections,quartiles_5y_range,quartiles_5y_rank,2y_inspections,quartiles_2y_range,quartiles_2y_rank,cleaned_name,Confirmed Resident Cases,Resident Deaths,Confirmed Staff Cases,Status,CSDname,CSDuid,latitude,longitude,number_beds
11,120 Dorothy Street,North West,Non-Profit,No,No,Yes,Yes,Atikokan,P0T1C0,11.0,"(0.0, 21.0]",0.0,7.0,"(0.0, 11.0]",0.0,4.0,"(0.0, 5.0]",0.0,atikokan general hospital,,,,,Atikokan,3559001.0,48.754155,-91.597609,26.0
151,825 Mckinnon Drive,North East,Non-Profit,No,Yes,Yes,Yes,Espanola,P5E1R4,10.0,"(0.0, 21.0]",0.0,6.0,"(0.0, 11.0]",0.0,3.0,"(0.0, 5.0]",0.0,espanola general hospital (operating as espanola nursing home-eldcap),,,,,Espanola,3552026.0,46.248295,-81.783171,30.0
152,825 Mckinnon Drive,North East,Non-Profit,No,Yes,Yes,Yes,Espanola,P5E1R4,16.0,"(0.0, 21.0]",0.0,12.0,"(11.0, 16.0]",1.0,4.0,"(0.0, 5.0]",0.0,espanola general hospital (operating as espanola nursing home-ltc),,,,,Espanola,3552026.0,46.248452,-81.780832,32.0
222,500 Hogarth Avenue West,North West,Non-Profit,No,Yes,Yes,Yes,Geraldton,P0T1M0,13.0,"(0.0, 21.0]",0.0,9.0,"(0.0, 11.0]",0.0,2.0,"(0.0, 5.0]",0.0,geraldton district hospital,,,,,Greenstone,3558075.0,49.722694,-86.954712,19.0
270,278 Front Street P.O. Box 190,North East,Non-Profit,No,No,No,Yes,Hornepayne,P0M1Z0,11.0,"(0.0, 21.0]",0.0,7.0,"(0.0, 11.0]",0.0,4.0,"(0.0, 5.0]",0.0,hornepayne community hospital,,,,,Hornepayne,3557096.0,49.212827,-84.774059,12.0
284,2675 Concession Road P. O. Bag 2007,Champlain,Non-Profit,Yes,Yes,Yes,Yes,Kemptville,K0G1J0,13.0,"(0.0, 21.0]",0.0,9.0,"(0.0, 11.0]",0.0,4.0,"(0.0, 5.0]",0.0,kemptville district hospital,,,,,North Grenville,3507065.0,45.009918,-75.641348,12.0
311,8 Richmond Park Drive,,,,,,,Napanee,K7R2Z4,9.0,"(0.0, 21.0]",0.0,7.0,"(0.0, 11.0]",0.0,2.0,"(0.0, 5.0]",0.0,lennox and addington county general hospital,,,,,Greater Napanee,3511015.0,44.240025,-76.967029,
362,155 Ontario Street,Hamilton Niagara Haldimand Brant (Hnhb),Non-Profit,No,Yes,No,Yes,St. Catharines,L2R5K3,30.0,"(21.0, 31.0]",1.0,13.0,"(11.0, 16.0]",1.0,5.0,"(0.0, 5.0]",0.0,"niagara health system, welland hospital site, extended care unit",,,,,Welland,3526032.0,42.979882,-79.248564,115.0
365,125 Hogan Road P O Box 37,North West,Non-Profit,No,Yes,No,Yes,Nipigon,P0T2J0,13.0,"(0.0, 21.0]",0.0,9.0,"(0.0, 11.0]",0.0,5.0,"(0.0, 5.0]",0.0,nipigon district memorial hospital,,,,,Nipigon,3558044.0,49.015575,-88.275905,15.0
465,107 Kelly Road P.O. Box 219,North East,Non-Profit,No,Yes,No,Yes,Smooth Rock Falls,P0L2B0,16.0,"(0.0, 21.0]",0.0,9.0,"(0.0, 11.0]",0.0,4.0,"(0.0, 5.0]",0.0,smooth rock falls hospital,,,,,Smooth Rock Falls,3556048.0,49.272285,-81.610265,20.0


## Review LTC homes with missing values 

In [157]:
df_trim[df_trim['total_inspections'].isnull()]
df_trim.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 626 entries, 0 to 625
Data columns (total 28 columns):
address                     626 non-null object
LHIN                        625 non-null object
home type                   624 non-null object
short stay                  625 non-null object
residents council           625 non-null object
family council              625 non-null object
accreditation               625 non-null object
city                        626 non-null object
postal code                 626 non-null object
total_inspections           626 non-null float64
quartiles_total_range       626 non-null object
quartiles_total_rank        626 non-null float64
5y_inspections              626 non-null float64
quartiles_5y_range          626 non-null object
quartiles_5y_rank           626 non-null float64
2y_inspections              626 non-null float64
quartiles_2y_range          626 non-null object
quartiles_2y_rank           626 non-null float64
cleaned_name             

In [158]:
# Assess home with missing accreditation, beds, all profile information etc.
df_trim[df_trim['accreditation'].isnull()] # lennox and addington county general hospital

# Assess home with missing home_type
df_trim[df_trim['home type'].isnull()] # algoma manor nursing home
df_trim['home type'].value_counts()

For-Profit    354
Non-Profit    170
Municipal     100
Name: home type, dtype: int64

In [159]:
# Fill in some of the missing values for lennox and addington county general hospital manually
df_trim.at[311,'LHIN'] = 'South East'
df_trim.at[311,'home type'] = 'Non-Profit'
df_trim.at[311,'short stay'] = 'Yes'
df_trim.at[311,'number_beds'] = '22'
df_trim.at[3,'home type'] = 'Non-Profit'

In [160]:
df_trim.loc[df_trim['cleaned_name'] == 'algoma manor nursing home']
df_trim.loc[df_trim['cleaned_name'] == 'lennox and addington county general hospital']

Unnamed: 0,address,LHIN,home type,short stay,residents council,family council,accreditation,city,postal code,total_inspections,quartiles_total_range,quartiles_total_rank,5y_inspections,quartiles_5y_range,quartiles_5y_rank,2y_inspections,quartiles_2y_range,quartiles_2y_rank,cleaned_name,Confirmed Resident Cases,Resident Deaths,Confirmed Staff Cases,Status,CSDname,CSDuid,latitude,longitude,number_beds
311,8 Richmond Park Drive,South East,Non-Profit,Yes,,,,Napanee,K7R2Z4,9.0,"(0.0, 21.0]",0.0,7.0,"(0.0, 11.0]",0.0,2.0,"(0.0, 5.0]",0.0,lennox and addington county general hospital,,,,,Greater Napanee,3511015.0,44.240025,-76.967029,22.0


- lennox and addington county general hospital: missing all profile information (eg. LHIN, accreditation etc. ), https://www.southeasthealthline.ca/displayService.aspx?id=151718, 312, 22 beds convalescent (90 days) and resident long term care at a hospital, last inspection 2019, publically funded, LHIN South East
- algoma manor nursing home: missing home type, https://www.northeasthealthline.ca/displayservice.aspx?id=90025

## Import and prep quality data
### 1. Import and partition the data

In [306]:
# Import ltc quality data
qual = pd.read_csv('hqo-2020-quality.csv', engine = 'python')
qual = qual.reset_index()
qual.rename(columns={'level_0':'home_name',
                     'level_1':'Placements for referrals from all prior locations (days)', 
                     'level_2':'Placements for referrals from community (days)', 
                     'level_3':'Placements for referrals from hospital (days)', 
                     'level_4':'Antipsychotic Medication Use (%)', 
                     'level_5':'Pressure Ulcers (%)', 
                     'level_6':'Falls (%)', 
                     'level_7':'Physical Restraints Use (%)', 
                     'level_8':'Depression (%)', 
                     'level_9':'Pain (%)'},inplace=True)

# Drop empty Home Name column
qual.drop(['Home Name'], axis = 1, inplace = True)

# Drop rows without data
qual[qual.isnull().any(axis=1)] # Rows 624 to 670 do not have data
qual = qual.drop(qual.index[range(624, 671)])

# Drop row with Ontario average data
qual = qual.drop(qual.index[0])

# Clean home name 
qual['cleaned_home_name'] = qual['home_name'].str.lower().apply(lambda val: unicodedata.normalize('NFKD', val).encode('ascii', 'ignore').decode())
qual['cleaned_home_name'] = qual['cleaned_home_name'].map(lambda x: x.lstrip('0123456789'))
qual['cleaned_home_name'] = qual['cleaned_home_name'].str.replace(' ltc ',' long term care ')

# Create 2dfs, one where cleaned_home_name contains 0 or 1 or more hyphens
qual0 = qual[~qual['cleaned_home_name'].str.contains(' - ', regex=False, na=False)]
qual1 = qual[qual['cleaned_home_name'].str.contains(' - ', regex=False, na=False)]

print(len(qual0))
print(len(qual1))

# Split the home name and create a new column with what comes after the hyphen
qual1['name1'] = qual['cleaned_home_name'].str.split(' - ', 1).str[1]

qual1 = qual1.reset_index()

# # There are duplicated values in qual1
# qual1['name1'].value_counts()
# # maple view 2, scarborough 2, mississauga 2

# # There are no duplicated values in qual0
# qual0['cleaned_home_name'].value_counts()

# qual.info()
# qual.head()
# qual.tail()
# df_trim.info()



36
587


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [308]:
# Review the duplicated values in qual1
qual1[qual1['name1'].str.contains('mississauga', regex=False, na=False)]

# for each in list_dup:
#     qual1.at[each, 'name1'] = qual1.at[each, 'cleaned_home_name']


Unnamed: 0,index,home_name,Placements for referrals from all prior locations (days),Placements for referrals from community (days),Placements for referrals from hospital (days),Antipsychotic Medication Use (%),Pressure Ulcers (%),Falls (%),Physical Restraints Use (%),Depression (%),Pain (%),cleaned_home_name,name1
194,202,Extendicare (Canada) Inc. - Mississauga,327,327.0,LV,21.6,1.5,17.5,0.0,8.6,2.1,extendicare (canada) inc. - mississauga,extendicare (canada) inc. - mississauga
291,312,Mississauga LTC Facility Inc. - Mississauga LTC Facility,80,87.5,49.5,30.3,6.6,16.8,1.8,10.5,4.0,mississauga long term care facility inc. - mississauga long term care facility,mississauga long term care facility
584,621,Yee Hong Centre For Geriatric Care - Mississauga,807,839.5,LV,14.2,1.7,10.4,1.6,9.3,0.8,yee hong centre for geriatric care - mississauga,yee hong centre for geriatric care - mississauga


### 2. Match names from quality to main

In [309]:
# 27 homes in qual0 are also found in df_trim, 9 not found
print(qual0['cleaned_home_name'].isin(df_trim['cleaned_name']).value_counts())
qual0['matched'] = qual0['cleaned_home_name'].isin(df_trim['cleaned_name'])
mask = qual0['matched'] == False
in_qual0 = qual0[mask]
in_qual0


True     27
False    9 
Name: cleaned_home_name, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,home_name,Placements for referrals from all prior locations (days),Placements for referrals from community (days),Placements for referrals from hospital (days),Antipsychotic Medication Use (%),Pressure Ulcers (%),Falls (%),Physical Restraints Use (%),Depression (%),Pain (%),cleaned_home_name,matched
10,Albright Gardens Homes Inc.,342,331,436.5,26.0,3.8,19.5,5.4,34.8,2.3,albright gardens homes inc.,False
68,Centre d'accueil Roger-Séguin,112,109,153.5,11.5,1.3,16.3,6.1,30.0,0.3,centre d'accueil roger-seguin,False
164,Elm Grove Living Centre Inc.,131,139,LV,28.8,2.0,13.6,0.0,6.5,0.2,elm grove living centre inc.,False
250,Hellenic Care For Seniors (Toronto) Inc.,345,345,NR,8.7,0.6,3.8,0.2,8.4,0.6,hellenic care for seniors (toronto) inc.,False
291,Manitouwadge General Hospital,LV,NR,LV,43.6,2.4,9.7,ST,19.7,3.9,manitouwadge general hospital,False
325,North Renfrew LTC Services Inc.,LV,LV,NR,6.7,0.0,13.9,11.1,38.1,13.0,north renfrew long term care services inc.,False
524,Southbridge Care Home,23,30.5,LV,16.0,1.3,18.2,0.0,27.6,3.4,southbridge care home,False
612,Vision '74 Inc.,203,203,LV,12.8,2.6,15.4,4.3,21.2,2.5,vision '74 inc.,False
616,Wildwood Care Centre Inc.,93,93,LV,9.0,2.4,17.2,3.9,30.2,2.2,wildwood care centre inc.,False


In [211]:
from fuzzywuzzy import process

# Match home names from main with cleaned_home_name from the quality data
in_qual0_list = list()
in_match0_list = list()

for each in in_qual0['cleaned_home_name']:
    in_qual0_list.append(each)
    highest = process.extractOne(each, df_trim['cleaned_name'])
    in_match0_list.append(highest)

dictionary0 = dict(zip(in_qual0_list, in_match0_list))
dictionary0

# Review matches with the full cleaned name in the quality data
df_dictionary0 = pd.DataFrame.from_dict(dictionary0)
df_dictionary0[0].replace({"espanola general hospital (operating as espanola nursing home-eldcap)": "santé manitouwadge health",
                           "bella senior care residences inc.": "vision nursing home"}, inplace=True)

# Will remove southbridge care home as can not find a match in main
df_dictionary0 = df_dictionary0.drop('southbridge care home')

# Drop unneeded columns
df_dictionary0.drop([1,2], axis = 1, inplace = True)

# Convert back to a dictionary
dictionary0_match = df_dictionary0.to_dict()
dictionary0_match = dictionary0_match[0]
dictionary0_match



{'albright gardens homes inc.': 'albright gardens homes, incorporated',
 "centre d'accueil roger-seguin": "centre d'accueil roger seguin",
 'elm grove living centre inc.': 'elm grove living centre',
 'hellenic care for seniors (toronto) inc.': 'hellenic care for seniors (toronto)',
 'manitouwadge general hospital': 'santé manitouwadge health',
 'north renfrew long term care services inc.': 'north renfrew long-term care services',
 "vision '74 inc.": 'vision nursing home',
 'wildwood care centre inc.': 'wildwood care centre'}

In [310]:
# 454 homes in qual1 are also found in df_trim, 133 not found
print(qual1['name1'].isin(df_trim['cleaned_name']).value_counts())
qual1['matched'] = qual1['name1'].isin(df_trim['cleaned_name'])
mask = qual1['matched'] == False
in_qual1 = qual1[mask]
in_qual1.info()

True     454
False    133
Name: name1, dtype: int64
<class 'pandas.core.frame.DataFrame'>
Int64Index: 133 entries, 1 to 586
Data columns (total 14 columns):
index                                                       133 non-null int64
home_name                                                   133 non-null object
Placements for referrals from all prior locations (days)    133 non-null object
Placements for referrals from community (days)              133 non-null object
Placements for referrals from hospital (days)               133 non-null object
Antipsychotic Medication Use (%)                            133 non-null object
Pressure Ulcers (%)                                         133 non-null object
Falls (%)                                                   133 non-null object
Physical Restraints Use (%)                                 133 non-null object
Depression (%)                                              133 non-null object
Pain (%)                                    

In [332]:
# # Fizzy match home names from main with name1 from the quality data
# in_qual1_list = list()
# in_match1_list = list()

# for each in in_qual1['name1']:
#     in_qual1_list.append(each)
#     highest = process.extractOne(each, df_trim['cleaned_name'])
#     in_match1_list.append(highest)

# dictionary1 = dict(zip(in_qual1_list, in_match1_list))
# dictionary1

# Review matches with the full cleaned name in the quality data
df_dictionary1 = pd.DataFrame.from_dict(dictionary1, orient='index')
df_dictionary1 = df_dictionary1.reset_index()

# Drop unneeded columns
df_dictionary1.drop([1,2], axis = 1, inplace = True)

# Remove homes without a match in main
df_dictionary1 = df_dictionary1.drop(df_dictionary1.index[[7, 34, 82, 98]])
'belcrest nursing homes ltd.',
'drs. paul and john rekai centre', 
'odd fellow and rebekah home',
'veterans beds'

df_dictionary1 = df_dictionary1.reset_index()
df_dictionary1.drop(['level_0'], axis = 1, inplace = True)
df_dictionary1

# Replace values for incorrect fuzzy matches
df_dictionary1.loc[28, 0] = "chartwell elmira long term care residence"
df_dictionary1.loc[39, 0] = "country village homes - woodslee"
df_dictionary1.loc[52, 0] = "extendicare london"
df_dictionary1.loc[53, 0] = "extendicare maple view of sault ste. marie"
df_dictionary1.loc[55, 0] = "extendicare mississauga" 
df_dictionary1.loc[61, 0] = "pinecrest (plantagenet)"
df_dictionary1.loc[65, 0] = "extendicare scarborough"
df_dictionary1.loc[78, 0] = "hellenic home - scarborough"
df_dictionary1.loc[79, 0] = "hilltop manor nursing home (merrickville)"
df_dictionary1.loc[82, 0] = "meaford long term care - a peoplecare community"
df_dictionary1.loc[85, 0] = "victoria gardens long term care"
df_dictionary1.loc[87, 0] = "north shore health network – ltc unit"
df_dictionary1.loc[91, 0] = "peoplecare a.r. goudie kitchener"
df_dictionary1.loc[94, 0] = "the perley and rideau veterans' health centre"
df_dictionary1.loc[95, 0] = "pleasant manor retirement village"
df_dictionary1.loc[97, 0] = "errinrung long term care home"
df_dictionary1.loc[98, 0] = "queensway long term care home"
df_dictionary1.loc[99, 0] = "regency long term care home"
df_dictionary1.loc[100, 0] = "shelburne long term care home"
df_dictionary1.loc[101, 0] = "southbridge lakehead"
df_dictionary1.loc[102, 0] = "southbridge pinewood"
df_dictionary1.loc[103, 0] = "rose of sharon korean long term care"
df_dictionary1.loc[104, 0] = "strathcona long term care"
df_dictionary1.loc[113, 0] = "the elliott long term care residence"
df_dictionary1.loc[114, 0] = "parkview home long-term care"
df_dictionary1.loc[117, 0] = "vera m. davis community care centre"
df_dictionary1.loc[118, 0] = "glen hill marnwood"
df_dictionary1.loc[124, 0] = "woodingford lodge - woodstock"
df_dictionary1.loc[126, 0] = "yee hong centre - mississauga"

df_dictionary1 = df_dictionary1.set_index('index')
df_dictionary1

# Convert back to a dictionary
dictionary1_match = df_dictionary1.to_dict()
dictionary1_match = dictionary1_match[0]
dictionary1_match



{'of arbour heights': 'arbour heights',
 'nipissing manor nursing care centre': 'nipissing manor nursing care center',
 'valley view residence': 'valleyview residence',
 'algonquin nursing home of mattawa ltd.': 'algonquin nursing home',
 'centennial place long term care home': 'centennial place long-term care home',
 'moira place long term care home': 'moira place long-term care home',
 'bayhaven nursing home': 'bay haven nursing home',
 'bluewater rest home': 'blue water rest home',
 'pinecrest': 'pinecrest (kenora)',
 'brucelea haven long term care home': 'brucelea haven long term care home - corporation of the county of bruce',
 'elisabeth brueyre residence': 'élisabeth-bruyère residence',
 'arthur nursing home': 'caressant care arthur nursing home',
 'bourget': 'caressant care bourget',
 'cobden': 'caressant care cobden',
 'courtland': 'caressant care courtland',
 'fergus nursing home': 'caressant care fergus nursing home',
 'harriston': 'caressant care harriston',
 'lindsay nursi

### Manual review
- "perley and rideau veterans' health centre - standard beds" and "perley and rideau veterans' health centre - veterans beds" listed as one home in main
    - "the perley and rideau veterans' health centre"
    - some of the veterans quality data is missing so will keep the standard
- In quality data there was one home where nothing came after the hyphen, rose of sharon (ontario) retirement community - 

In [333]:
# Check for duplicates in dictionary1
dictionary1_keys = []
dictionary1_values = []
for k,v in dictionary1_match.items():
    dictionary1_keys.append(k)
    dictionary1_values.append(v)
# dictionary1_keys[0:5]
# dictionary1_values[0:5]

# There are duplicate values in dictionary1
print(len(dictionary1_values))
print(len(set(dictionary1_values)))

129
129
129
129


In [None]:
# Combine the 2 dictionaries
# len(dictionary0_match) + len(dictionary1_match)
dictionary_match = {**dictionary0_match, **dictionary1_match}
print(len(dictionary_match))
print(len(set(dictionary_match)))

In [396]:
qual_full = pd.concat([qual0, qual1])

# Final changes to quality data
qual_full["name1"].replace({'odd fellow and rebekah home': 'ioof seniors home'}, inplace=True)

# qual_full.info()
# qual_full.head()

In [397]:
# # Code to investigate homes in quality
# qual_full[qual_full['name1'].str.contains('belmont', regex=False, na=False)]['name1']

### Merge quality data with main data

In [398]:
df_all = pd.merge(right=df_trim, left=qual_full, 
                  how='outer', right_on='cleaned_name', 
                  left_on='name1')

df_all.info()
df_all

<class 'pandas.core.frame.DataFrame'>
Int64Index: 630 entries, 0 to 629
Data columns (total 41 columns):
home_name                                                   623 non-null object
Placements for referrals from all prior locations (days)    623 non-null object
Placements for referrals from community (days)              623 non-null object
Placements for referrals from hospital (days)               623 non-null object
Antipsychotic Medication Use (%)                            623 non-null object
Pressure Ulcers (%)                                         623 non-null object
Falls (%)                                                   623 non-null object
Physical Restraints Use (%)                                 623 non-null object
Depression (%)                                              623 non-null object
Pain (%)                                                    623 non-null object
cleaned_home_name                                           623 non-null object
matched        

Unnamed: 0,home_name,Placements for referrals from all prior locations (days),Placements for referrals from community (days),Placements for referrals from hospital (days),Antipsychotic Medication Use (%),Pressure Ulcers (%),Falls (%),Physical Restraints Use (%),Depression (%),Pain (%),cleaned_home_name,matched,name1,address,LHIN,home type,short stay,residents council,family council,accreditation,city,postal code,total_inspections,quartiles_total_range,quartiles_total_rank,5y_inspections,quartiles_5y_range,quartiles_5y_rank,2y_inspections,quartiles_2y_range,quartiles_2y_rank,cleaned_name,Confirmed Resident Cases,Resident Deaths,Confirmed Staff Cases,Status,CSDname,CSDuid,latitude,longitude,number_beds
0,Albright Gardens Homes Inc.,342,331,436.5,26.0,3.8,19.5,5.4,34.8,2.3,"albright gardens homes, incorporated",False,"albright gardens homes, incorporated",5050 Hillside Drive,Hamilton Niagara Haldimand Brant (Hnhb),Non-Profit,No,Yes,Yes,No,Beamsville,L0R1B2,39.0,"(31.0, 46.0]",2.0,25.0,"(23.0, 86.0]",3.0,15.0,"(11.0, 44.0]",3.0,"albright gardens homes, incorporated",,0,,Inactive,,,,,231.0
1,Atikokan General Hospital,261,LV,261,41.4,8.4,14.2,9.0,42.4,11.2,atikokan general hospital,True,atikokan general hospital,120 Dorothy Street,North West,Non-Profit,No,No,Yes,Yes,Atikokan,P0T1C0,11.0,"(0.0, 21.0]",0.0,7.0,"(0.0, 11.0]",0.0,4.0,"(0.0, 5.0]",0.0,atikokan general hospital,,,,,Atikokan,3559001.0,48.754155,-91.597609,26.0
2,Bella Senior Care Residences Inc.,91,100,29,29.2,4.5,20.9,2.2,13.0,7.4,bella senior care residences inc.,True,bella senior care residences inc.,8720 Willoughby Drive,Hamilton Niagara Haldimand Brant (Hnhb),For-Profit,No,Yes,Yes,Yes,Niagara Falls,L2G7X3,66.0,"(46.0, 172.0]",3.0,45.0,"(23.0, 86.0]",3.0,12.0,"(11.0, 44.0]",3.0,bella senior care residences inc.,,,,,,,,,161.0
3,Berkshire Care Centre,32.5,34,26,21.2,3.9,15.8,6.4,16.8,3.8,berkshire care centre,True,berkshire care centre,350 Dougall Avenue,Erie St. Clair,For-Profit,No,Yes,Yes,Yes,Windsor,N9A4P4,172.0,"(46.0, 172.0]",3.0,72.0,"(23.0, 86.0]",3.0,25.0,"(11.0, 44.0]",3.0,berkshire care centre,0,0,<5,Active,Windsor,3537039.0,42.316316,-83.04162,231.0
4,Centre d'accueil Roger-Séguin,112,109,153.5,11.5,1.3,16.3,6.1,30.0,0.3,centre d'accueil roger seguin,False,centre d'accueil roger seguin,435 Lemay Street,Champlain,Non-Profit,Yes,Yes,Yes,No,Clarence Creek,K0A1N0,51.0,"(46.0, 172.0]",3.0,30.0,"(23.0, 86.0]",3.0,20.0,"(11.0, 44.0]",3.0,centre d'accueil roger seguin,,0,,Inactive,Clarence-Rockland,3502036.0,45.510257,-75.216169,113.0
5,Copernicus Lodge,275,279,LV,29.8,2.1,10.5,4.9,23.0,2.1,copernicus lodge,True,copernicus lodge,66 Roncesvalles Avenue,Toronto Central,Non-Profit,Yes,Yes,Yes,No,Toronto,M6R3A7,24.0,"(21.0, 31.0]",1.0,18.0,"(16.0, 23.0]",2.0,9.0,"(7.0, 11.0]",2.0,copernicus lodge,0,0,<5,Active,Toronto,3520005.0,43.640208,-79.447311,228.0
6,Elm Grove Living Centre Inc.,131,139,LV,28.8,2.0,13.6,0.0,6.5,0.2,elm grove living centre,False,elm grove living centre,35 Elm Grove Avenue,Toronto Central,For-Profit,No,Yes,No,Yes,Toronto,M6K2J2,14.0,"(0.0, 21.0]",0.0,9.0,"(0.0, 11.0]",0.0,4.0,"(0.0, 5.0]",0.0,elm grove living centre,44,17,43,Active,Toronto,3520005.0,43.640316,-79.430701,126.0
7,Fairview Mennonite Home,110,137,LV,16.1,3.1,21.6,2.9,47.0,2.8,fairview mennonite home,True,fairview mennonite home,515 Langs Drive,Waterloo Wellington,Non-Profit,No,Yes,Yes,Yes,Cambridge,N3H5E4,18.0,"(0.0, 21.0]",0.0,13.0,"(11.0, 16.0]",1.0,6.0,"(5.0, 7.0]",1.0,fairview mennonite home,,,,,Cambridge,3530010.0,43.398556,-80.343146,84.0
8,Geraldton District Hospital,500,LV,504,62.8,1.4,5.2,8.8,7.5,48.0,geraldton district hospital,True,geraldton district hospital,500 Hogarth Avenue West,North West,Non-Profit,No,Yes,Yes,Yes,Geraldton,P0T1M0,13.0,"(0.0, 21.0]",0.0,9.0,"(0.0, 11.0]",0.0,2.0,"(0.0, 5.0]",0.0,geraldton district hospital,,,,,Greenstone,3558075.0,49.722694,-86.954712,19.0
9,Grove Park Home for Senior Citizens,226,226,LV,12.3,4.1,18.3,1.5,30.0,6.4,grove park home for senior citizens,True,grove park home for senior citizens,234 Cook Street,North Simcoe Muskoka,Non-Profit,No,Yes,No,Yes,Barrie,L4M4H5,22.0,"(21.0, 31.0]",1.0,14.0,"(11.0, 16.0]",1.0,8.0,"(7.0, 11.0]",2.0,grove park home for senior citizens,,,,,Barrie,3543042.0,44.406021,-79.669467,143.0


In [399]:
# # Code to investigate homes in quality
# qual[qual['cleaned_home_name'].str.contains('belmont', regex=False, na=False)]
# # for each in qual['cleaned_home_name']:
# #     if each.endswith('- '):
# #         print(each)

In [391]:
# # Code to investigate homes in main
# df_trim[df_trim['cleaned_name'].str.contains('belmont house', regex=False, na=False)]['cleaned_name']
# # df_trim[df_trim['cleaned_name'].str.contains('lakeland', regex=False, na=False)]['cleaned_name']




22    belmont house
Name: cleaned_name, dtype: object

### Manual review 
- lakeland long term care services (80 beds) in main was matched to in quality west parry sound health centre - lakeland long term care services corporation
    - lakeland long term care (eldcap) (20 beds) is not represented
- north shore health network - long term care unit in qual was matched to north shore health network – ltc unit in main	(22 beds)
    - north shore health network – eldcap (10 beds) from main unit is not represented
- espanola general hospital - espanola nursing home in qual was matched to espanola general hospital (operating as espanola nursing home-ltc) (32 beds)
    - espanola general hospital (operating as espanola nursing home-eldcap) (30 beds) from main not represented
- The following in main was not in quality:
    - seaforth long term care home
    - sherbourne place	
    - lennox and addington county general hospital	
    - belmont long term care facility	


In [400]:
df_all.to_csv(r'df_LTCdata_for_stats_FINAL.csv', index = False)