In [1]:
# Last scraped June 10, 2020

import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import re
%matplotlib inline

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)


In [2]:
ltc_requests = requests.get("http://publicreporting.ltchomes.net/en-ca/Search_Selection.aspx")
ltc = BeautifulSoup(ltc_requests.text)

In [3]:
# ltc_requests.status_code
# ltc_requests.reason
# print(ltc_requests.request.headers)
# ltc_requests.headers

## Webscrape list of LTC home names and links to detailed info

In [4]:
# webscrape list of ltc home names and links
ordered_lists = ltc.find_all("ol")
ltc_list = ordered_lists[1]
home = ltc_list.find_all("a", {"class":"rsLink"})

In [5]:
# create lists of names and links
names = []
links = []
for each in home:
    names.append(each.getText())
    links.append(each.get('href'))

In [6]:
# there are 651 records which is correct
print(len(home), len(names), len(links))

# we see that the names are all unique
print(len(set(names)), len(set(links)))

651 651 651
651 651


In [7]:
# modify links list to full url
full_links = []
for i in range(len(links)):
    full_links.append('http://publicreporting.ltchomes.net/en-ca/' + links[i])

## Webscrape detailed profile info for each LTC home

In [8]:
# iterate through each of the links and scrape characteristics of ltc homes
addresses = []
cities_postalcodes = []
LHIN = []
licensee = []
management = []
home_type = []
beds = []
short_stay = []
residents_council = []
family_council = []
accreditation = []
info = []
counter = -1
for each in full_links:
    counter += 1
#     print(counter)
    soup = requests.get(each)
    soup = BeautifulSoup(soup.text)
    addresses.append(soup.find("div", {"id":"ctl00_ContentPlaceHolder1_divHomeAddress"}).getText())
    cities_postalcodes.append(soup.find("div", {"id":"ctl00_ContentPlaceHolder1_divHomeCity"}).getText())
    profiles = soup.find("div", {"id":"ctl00_ContentPlaceHolder1_divHomeProfile_item_Col1"})
    profile_data = profiles.find_all("div", {"class":"Profilerow_col2"})
    try:    
        LHIN.append(profile_data[0].getText())
        licensee.append(profile_data[3].getText())
        management.append(profile_data[4].getText())
        home_type.append(profile_data[5].getText())
        beds.append(profile_data[6].getText())
        short_stay.append(profile_data[7].getText())
        residents_council.append(profile_data[8].getText())
        family_council.append(profile_data[9].getText())
        accreditation.append(profile_data[10].getText())
        info.append(profile_data[12].getText())
     
    except:
        print(counter) # print out records which resulted in error
        LHIN.append(None)
        licensee.append(None)
        management.append(None)
        home_type.append(None)
        beds.append(None)
        short_stay.append(None)
        residents_council.append(None)
        family_council.append(None)
        accreditation.append(None)
        info.append(None)

324
508


### Two homes do not have profile info, tag for removal

In [9]:
# print out links to LTC homes for which an error was raised during webscraping
print(full_links[324]) # LENNOX AND ADDINGTON COUNTY GENERAL HOSPITAL
print(full_links[508]) # ST. JOSEPH'S MOTHER HOUSE (MARTHA WING)

http://publicreporting.ltchomes.net/en-ca/homeprofile.aspx?Home=7089
http://publicreporting.ltchomes.net/en-ca/homeprofile.aspx?Home=C604


In [10]:
# create a df of Ontario LTC homes
df = pd.DataFrame({'name': names, 
                   'address':addresses, 
                   'city_and_postal_code':cities_postalcodes, 
                   'LHIN':LHIN, 
                   'licensee':licensee, 
                   'management':management, 
                   'home_type':home_type, 
                   'beds': beds, 
                   'short_stay':short_stay, 
                   'residents_council':residents_council, 
                   'family_council':family_council, 
                   'accreditation':accreditation, 
                   'additional_info':info})
df.info()
df.head(20)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 651 entries, 0 to 650
Data columns (total 13 columns):
name                    651 non-null object
address                 651 non-null object
city_and_postal_code    651 non-null object
LHIN                    649 non-null object
licensee                649 non-null object
management              649 non-null object
home_type               649 non-null object
beds                    649 non-null object
short_stay              649 non-null object
residents_council       649 non-null object
family_council          649 non-null object
accreditation           649 non-null object
additional_info         649 non-null object
dtypes: object(13)
memory usage: 66.2+ KB


Unnamed: 0,name,address,city_and_postal_code,LHIN,licensee,management,home_type,beds,short_stay,residents_council,family_council,accreditation,additional_info
0,AFTON PARK PLACE LONG TERM CARE COMMUNITY,1200 Afton Drive,"Sarnia, N7S6L6",Erie St. Clair,S & R Nursing Homes Ltd.,,For-Profit,Home with approximately 128 beds,No,Yes,Yes,Yes,
1,"ALBRIGHT GARDENS HOMES, INCORPORATED",5050 Hillside Drive,"Beamsville, L0R1B2",Hamilton Niagara Haldimand Brant (Hnhb),"Albright Gardens Homes, Incorporated",,Non-Profit,Home with approximately 231 beds,No,Yes,Yes,No,
2,ALEXANDER PLACE,329 Parkside Drive P. O. Box 50,"Waterdown, L0R2H0",Hamilton Niagara Haldimand Brant (Hnhb),Waterdown Long Term Care Centre Inc.,,For-Profit,Home with approximately 128 beds,Yes,Yes,Yes,Yes,
3,ALGOMA DISTRICT HOMES FOR THE AGED (ALGOMA MANOR),135 Dawson Street,"Thessalon, P0R1L0",North East,Board Of Management For The District Of Algoma,,,Home with approximately 108 beds,Yes,Yes,Yes,No,Home Closed on 11/01/2011
4,ALGOMA MANOR NURSING HOME,145 Dawson Street,"Thessalon, P0R1L0",North East,Algoma Manor Nursing Home,,,Home with approximately 96 beds,Yes,Yes,Yes,Yes,
5,ALGONQUIN NURSING HOME,207 Turcotte Park Road P.O. Box 270,"Mattawa, P0H1V0",North East,Algonquin Nursing Home Of Mattawa,Hôpital De Mattawa Hospital,Non-Profit,Home with approximately 73 beds,Yes,Yes,Yes,Yes,
6,ALLENDALE,185 Ontario Street South,"Milton, L9T2M4",Mississauga Halton,The Regional Municipality Of Halton,,Municipal,Home with approximately 200 beds,No,Yes,Yes,Yes,
7,ALMONTE COUNTRY HAVEN,333 Country Street P.O. Box 250,"Almonte, K0A1A0",Champlain,0760444 B.C. Ltd. As General Partner On Behalf Of Omni Health Care Limited Partnership,,For-Profit,Home with approximately 82 beds,No,Yes,Yes,Yes,
8,ALTAMONT CARE COMMUNITY,92 Island Road,"Scarborough, M1C2P5",Central East,Vigour Limited Partnership On Behalf Of Vigour General Partner Inc.,,For-Profit,Home with approximately 159 beds,Yes,Yes,Yes,Yes,
9,ANSON PLACE CARE CENTRE,85 Main Street North,"Hagersville, N0A1H0",Hamilton Niagara Haldimand Brant (Hnhb),Rykka Care Centres Lp,,For-Profit,Home with approximately 61 beds,Yes,Yes,No,Yes,


In [11]:
# split the 'city and postal code' column into 2 columns 'city' and 'postal code'
df['city'] = df['city_and_postal_code'].str.split(',').str[0]
df['postal_code'] = df['city_and_postal_code'].str.split(',').str[1]

# use regex to extract the number of beds to a different column
df['number_of_beds'] = df['beds'].str.extract(r'(\d+)', expand=False)

df.head(20)
df.nunique()

name                    651
address                 648
city_and_postal_code    624
LHIN                    14 
licensee                378
management              26 
home_type               4  
beds                    211
short_stay              2  
residents_council       2  
family_council          2  
accreditation           2  
additional_info         24 
city                    268
postal_code             621
number_of_beds          211
dtype: int64

In [12]:
# print out LTC homes with duplicate addresses
df[df.duplicated(['address'], keep=False)]

Unnamed: 0,name,address,city_and_postal_code,LHIN,licensee,management,home_type,beds,short_stay,residents_council,family_council,accreditation,additional_info,city,postal_code,number_of_beds
78,CEDARWOOD LODGE,860 Great Northern Road,"Sault Ste. Marie, P6A5K7",North East,Autumnwood Mature Lifestyle Communities Inc.,Universalcare Canada Inc.,For-Profit,Home with approximately 50 beds,No,Yes,Yes,No,,Sault Ste. Marie,P6A5K7,50
211,FOUNTAIN VIEW CARE COMMUNITY,1800 O'Connor Drive,"East York, M4A1W7",Toronto Central,2063414 Ontario Limited As General Partner Of 2063414 Investment Lp,,For-Profit,Home with approximately 158 beds,No,Yes,Yes,Yes,,East York,M4A1W7,158
246,GREAT NORTHERN NURSING CENTRE,860 Great Northern Road,"Sault Ste Marie, P6A5K7",North East,Extendicare (Canada) Inc.,Extendicare (Canada) Inc.,For-Profit,Home with approximately 95 beds,No,Yes,Yes,No,HOME CLOSED on 04/19/2013,Sault Ste Marie,P6A5K7,95
257,HARMONY HILLS CARE COMMUNITY,1800 O'Connor Drive,"Toronto, M4A1W7",Toronto Central,2063414 Ontario Limited As General Partner Of 2063414 Investment Lp,,For-Profit,Home with approximately 160 beds,No,Yes,Yes,Yes,,Toronto,M4A1W7,160
309,LAKELAND LONG TERM CARE (ELDCAP),6 Albert Street,"Parry Sound, P2A3A4",North East,West Parry Sound Health Centre,,Non-Profit,Home with approximately 20 beds,No,No,No,No,,Parry Sound,P2A3A4,20
310,LAKELAND LONG TERM CARE SERVICES,6 Albert Street,"Parry Sound, P2A3A4",North East,Lakeland Long Term Care Services Corporation,,Non-Profit,Home with approximately 90 beds,No,Yes,No,No,,Parry Sound,P2A3A4,90


### Manual review of 3 duplicated addresses involving 6 homes and tag for removal as needed
- Cedarwood: no website
- Great Northern: home closed in 2013
- Lakeland Eldcap: no website, seems to be connected to Lakeland LTC
- Lakeland LTC: http://www.lakelandltc.com/, owned and connected to WPSHC which is a hospital
- Harmony: https://www.siennaliving.ca/long-term-care/ontario/harmony-hills-care-community, last inspection Feb 2020
- Fountain: https://www.siennaliving.ca/long-term-care/ontario/fountain-view-care-community, last inspection Oct 2019


## Webscrape inspections info for each LTC home

In [13]:
# modify links to access inspection data 
full_links_inspection = [each + '&tab=1' for each in full_links]
    

In [14]:
# webscrape inspection data for each ltc home 
frames = []
counter = -1
for each in full_links_inspection:
    inspection_types = []
    inspection_dates = []
#     counter += 1
#     print(counter)
    soup = requests.get(each)
    soup = BeautifulSoup(soup.text)
    name = soup.find("div", {"class":"HomeName"}).getText()
    inspections = soup.find("div", {"id":"ctl00_ContentPlaceHolder1_divHomeProfile_item_Col3"})
    types = inspections.find_all("div", {"class":"divInspectionTypeDataCol"})
    number_of_types = len(types)
    for each in types:
        inspection_types.append(each.getText())
    dates = inspections.find_all("div", {"class":"divInspectionDateDataCol"})
    number_of_dates = len(dates)
    for each in dates:
        inspection_dates.append(each.getText())
    df_temp = pd.DataFrame({"name": name, 
                            "inspection_types":inspection_types, 
                            "number_of_types":number_of_types, 
                            "inspection_dates":inspection_dates, 
                            "number_of_dates":number_of_dates})
    frames.append(df_temp)
    

### Create a df of raw inspection data

In [15]:
# create a df of inspection data
df2 = pd.concat(frames)
print(df2.nunique()) # there are only 648 unique LTC home names
df2.info()

# display data for first 2 ltc homes
pd.set_option('display.max_rows', None)
df2.head(10)

name                648 
inspection_types    29  
number_of_types     101 
inspection_dates    2573
number_of_dates     101 
dtype: int64
<class 'pandas.core.frame.DataFrame'>
Int64Index: 23754 entries, 0 to 25
Data columns (total 5 columns):
name                23754 non-null object
inspection_types    23754 non-null object
number_of_types     23754 non-null int64
inspection_dates    23754 non-null object
number_of_dates     23754 non-null int64
dtypes: int64(2), object(3)
memory usage: 1.1+ MB


Unnamed: 0,name,inspection_types,number_of_types,inspection_dates,number_of_dates
0,AFTON PARK PLACE LONG TERM CARE COMMUNITY,Complaints Inspection,67,"Jan 17, 2020",67
1,AFTON PARK PLACE LONG TERM CARE COMMUNITY,Complaints Inspection,67,"Nov 27, 2019",67
2,AFTON PARK PLACE LONG TERM CARE COMMUNITY,Critical Incident Inspection,67,"Nov 27, 2019",67
3,AFTON PARK PLACE LONG TERM CARE COMMUNITY,Critical Incident Inspection,67,"Oct 28, 2019",67
4,AFTON PARK PLACE LONG TERM CARE COMMUNITY,Critical Incident Inspection,67,"May 01, 2019",67
5,AFTON PARK PLACE LONG TERM CARE COMMUNITY,Complaints Inspection,67,"Mar 28, 2019",67
6,AFTON PARK PLACE LONG TERM CARE COMMUNITY,Critical Incident Inspection,67,"Mar 06, 2019",67
7,AFTON PARK PLACE LONG TERM CARE COMMUNITY,Resident Quality Inspection,67,"Aug 02, 2018",67
8,AFTON PARK PLACE LONG TERM CARE COMMUNITY,Critical Incident Inspection,67,"Feb 02, 2018",67
9,AFTON PARK PLACE LONG TERM CARE COMMUNITY,Resident Quality Inspection,67,"Jun 26, 2017",67


In [16]:
# convert date column into date datatype
df2['inspection_dates'] = pd.to_datetime(df2['inspection_dates'])

In [17]:
# list 29 different inspection types
df2['inspection_types'].value_counts()

Complaints Inspection                                                        8064
Critical Incident Inspection                                                 6700
Follow-Up Inspection                                                         1851
Resident Quality Inspection                                                  1810
Resident Quality Inspection with Order(s) of the Inspector                   1283
Critical Incident Inspection  with Order(s) of the Inspector                 1004
Complaints Inspection  with Order(s) of the Inspector                        978 
Follow-Up Inspection with Order(s) of the Inspector                          811 
Other Inspection                                                             352 
Complaints Inspection (En français)                                          190 
Critical Incident Inspection (En français)                                   182 
Mandatory Inspection                                                         103 
Other Inspection

In [18]:
# Return LTC home names that do not match with original list of names
orig = set(df['name'])
inspec = set(df2['name'])
def returnNotMatches(a, b):
    return [[x for x in a if x not in b], [x for x in b if x not in a]]
returnNotMatches(orig, inspec)

[["ST. JOSEPH'S MOTHER HOUSE (MARTHA WING)",
  'MARIANHILL - MARGUERITE CENTRE',
  'NORTHUMBERLAND HILLS HOSPITAL'],
 []]

### Manual review of 3 LTC homes that were not represented in the inspections data and tag for removal
- ST. JOSEPH'S MOTHER HOUSE (MARTHA WING), no inspections or profile information
- NORTHUMBERLAND HILLS HOSPITAL: No inspections, closed 2012
- MARIANHILL - MARGUERITE CENTRE: No inspections, closed 2012


## Enumerate number of inspections for each LTC home

In [19]:
# create a df with total number of inspections
df_total = df2.groupby(['name']).size().to_frame('total_inspections').reset_index()
df_total.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 648 entries, 0 to 647
Data columns (total 2 columns):
name                 648 non-null object
total_inspections    648 non-null int64
dtypes: int64(1), object(1)
memory usage: 10.2+ KB


In [20]:
# # plot a histogram of total inspections to see how the number of inspections are distributed
# df_total.hist(column='total_inspections')

# # cut total inspections into quartiles and add a column with quartile ranges
# df_total['quartiles_total_range'] = pd.qcut(df_total['total_inspections'], q=4, precision=0)
# print(df_total['quartiles_total_range'].value_counts())

# # add column with quartile rank values
# df_total['quartiles_total_rank'] = pd.qcut(df_total['total_inspections'], q=4, labels = False, precision=0)

# df_total.head()

In [21]:
# filter df2 and keep only rows with inspections since Jan 2015 inclusive (inspections in the last 5 years)
df_5y = df2[(df2['inspection_dates'] > '2015-01-01')]

# create df with number of inspections in the last 5 years
df_5y = df_5y.groupby(['name']).size().to_frame('5y_inspections')

In [22]:
# # plot a histogram of 5y inspections to see how the number of inspections are distributed
# df_5y.hist(column='5y_inspections')

# # cut total inspections into quartiles and add a column with quartile ranges
# df_5y['quartiles_5y_range'] = pd.qcut(df_5y['5y_inspections'], q=4, precision=0)
# print(df_5y['quartiles_5y_range'].value_counts())

# # add column with quartile rank values
# df_5y['quartiles_5y_rank'] = pd.qcut(df_5y['5y_inspections'], q=4, labels = False, precision=0)

# df_5y.info()
# df_5y.head()


In [23]:
# filter df2 and keep only rows with inspections since Jan 2018 inclusive (inspections in the last 2 years)
df_2y = df2[(df2['inspection_dates'] > '2018-01-01')]

# create df with number of inspections in the last 2 years
df_2y = df_2y.groupby(['name']).size().to_frame('2y_inspections')

In [24]:
# # plot a histogram of 2y inspections to see how the number of inspections are distributed
# df_2y.hist(column='2y_inspections')

# # cut total inspections into quartiles and add a column with quartile ranges
# df_2y['quartiles_2y_range'] = pd.qcut(df_2y['2y_inspections'], q=4, precision=0)
# print(df_2y['quartiles_2y_range'].value_counts())

# # add column with quartile rank values
# df_2y['quartiles_2y_rank'] = pd.qcut(df_2y['2y_inspections'], q=4, labels = False, precision=0)

# df_2y.info()
# df_2y.head()

### Note that there are fewer homes with inspections in the last 5y/2y meaning that some homes have not been inspected in the last 2-5 years. Some homes may be closed. These have not yet been filtered out. 

## Enumerate the number of inspections containing the words "Compliants", "Critical Incident" and "Order(s)" for each home

In [25]:
df_complaints = df2[df2["inspection_types"].str.contains('Complaints', regex=False, na=False) ]
df_complaints_total = df_complaints.groupby(['name']).size().to_frame('total_complaints').reset_index()

df_critical = df2[df2["inspection_types"].str.contains('Critical Incident', regex=False, na=False) ]
df_critical_total = df_critical.groupby(['name']).size().to_frame('total_critical').reset_index()

df_withOrders = df2[df2["inspection_types"].str.contains('Order(s)', regex=False, na=False) ]
df_withOrders_total = df_withOrders.groupby(['name']).size().to_frame('total_withOrders').reset_index()


In [26]:
# Complaints, critical and withOrders in the last 5y
df_complaints = df2[df2["inspection_types"].str.contains('Complaints', regex=False, na=False) ]
df_complaints_5y = df_complaints[(df_complaints['inspection_dates'] > '2015-01-01')]
df_complaints_5y = df_complaints_5y.groupby(['name']).size().to_frame('5y_complaints').reset_index()

df_critical = df2[df2["inspection_types"].str.contains('Critical Incident', regex=False, na=False) ]
df_critical_5y = df_critical[(df_critical['inspection_dates'] > '2015-01-01')]
df_critical_5y = df_critical_5y.groupby(['name']).size().to_frame('5y_critical').reset_index()

df_withOrders = df2[df2["inspection_types"].str.contains('Order(s)', regex=False, na=False) ]
df_withOrders_5y = df_withOrders[(df_withOrders['inspection_dates'] > '2015-01-01')]
df_withOrders_5y = df_withOrders_5y.groupby(['name']).size().to_frame('5y_withOrders').reset_index()

# Complaints, critical and withOrders in the last 2y
df_complaints = df2[df2["inspection_types"].str.contains('Complaints', regex=False, na=False) ]
df_complaints_2y = df_complaints[(df_complaints['inspection_dates'] > '2018-01-01')]
df_complaints_2y = df_complaints_2y.groupby(['name']).size().to_frame('2y_complaints').reset_index()

df_critical = df2[df2["inspection_types"].str.contains('Critical Incident', regex=False, na=False) ]
df_critical_2y = df_critical[(df_critical['inspection_dates'] > '2018-01-01')]
df_critical_2y = df_critical_2y.groupby(['name']).size().to_frame('2y_critical').reset_index()

df_withOrders = df2[df2["inspection_types"].str.contains('Order(s)', regex=False, na=False) ]
df_withOrders_2y = df_withOrders[(df_withOrders['inspection_dates'] > '2018-01-01')]
df_withOrders_2y = df_withOrders_2y.groupby(['name']).size().to_frame('2y_withOrders').reset_index()


In [27]:
# # Inspect the multiple dataframes
# df_total.info()
# df_5y = df_5y.reset_index(); df_5y.info()
# df_2y = df_2y.reset_index(); df_2y.info()
# df_complaints_total.info() # 9243 complaints
# df_critical_total.info() # 7905 critical 
# df_withOrders_total.info() # 4305 orders
# df_complaints_5y.info() 
# df_critical_5y.info()
# df_withOrders_5y.info()
# df_complaints_2y.info()
# df_critical_2y.info()
# df_withOrders_2y.info()

# Merge the dataframes on home name
from functools import reduce
list_inspections_dfs = [df_total, df_5y, df_2y, 
                        df_complaints_total, df_complaints_5y, df_complaints_2y, 
                        df_critical_total, df_critical_5y, df_critical_2y,
                        df_withOrders_total, df_withOrders_5y, df_withOrders_2y]

df_inspections = reduce(lambda x,y: pd.merge(x,y, on='name', how='outer'), list_inspections_dfs)
df_inspections.info()
df_inspections.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 648 entries, 0 to 647
Data columns (total 13 columns):
name                 648 non-null object
total_inspections    648 non-null int64
5y_inspections       632 non-null float64
2y_inspections       626 non-null float64
total_complaints     638 non-null float64
5y_complaints        592 non-null float64
2y_complaints        536 non-null float64
total_critical       635 non-null float64
5y_critical          622 non-null float64
2y_critical          612 non-null float64
total_withOrders     608 non-null float64
5y_withOrders        566 non-null float64
2y_withOrders        429 non-null float64
dtypes: float64(11), int64(1), object(1)
memory usage: 70.9+ KB


Unnamed: 0,name,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders
0,AFTON PARK PLACE LONG TERM CARE COMMUNITY,67,34.0,9.0,29.0,15.0,3.0,28.0,13.0,5.0,5.0,2.0,
1,"ALBRIGHT GARDENS HOMES, INCORPORATED",39,25.0,15.0,18.0,10.0,6.0,12.0,7.0,6.0,7.0,7.0,3.0
2,ALEXANDER PLACE,28,17.0,8.0,11.0,6.0,3.0,8.0,5.0,2.0,4.0,4.0,2.0
3,ALGOMA DISTRICT HOMES FOR THE AGED (ALGOMA MANOR),5,,,1.0,,,1.0,,,1.0,,
4,ALGOMA MANOR NURSING HOME,23,14.0,7.0,7.0,5.0,2.0,3.0,2.0,1.0,6.0,3.0,1.0


## Merge the inspections data with the profile data

In [28]:
# Merge the dfs
ltc_scrape = pd.merge(left=df, right=df_inspections, how='left', left_on='name', right_on='name')

# Replace blank values with NaN
ltc_scrape = ltc_scrape.replace(r'^\s*$', np.nan, regex=True)

ltc_scrape.info()
ltc_scrape.head(10)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 651 entries, 0 to 650
Data columns (total 28 columns):
name                    651 non-null object
address                 651 non-null object
city_and_postal_code    651 non-null object
LHIN                    649 non-null object
licensee                649 non-null object
management              75 non-null object
home_type               643 non-null object
beds                    649 non-null object
short_stay              649 non-null object
residents_council       649 non-null object
family_council          649 non-null object
accreditation           649 non-null object
additional_info         25 non-null object
city                    651 non-null object
postal_code             651 non-null object
number_of_beds          649 non-null object
total_inspections       648 non-null float64
5y_inspections          632 non-null float64
2y_inspections          626 non-null float64
total_complaints        638 non-null float64
5y_complaints

Unnamed: 0,name,address,city_and_postal_code,LHIN,licensee,management,home_type,beds,short_stay,residents_council,family_council,accreditation,additional_info,city,postal_code,number_of_beds,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders
0,AFTON PARK PLACE LONG TERM CARE COMMUNITY,1200 Afton Drive,"Sarnia, N7S6L6",Erie St. Clair,S & R Nursing Homes Ltd.,,For-Profit,Home with approximately 128 beds,No,Yes,Yes,Yes,,Sarnia,N7S6L6,128,67.0,34.0,9.0,29.0,15.0,3.0,28.0,13.0,5.0,5.0,2.0,
1,"ALBRIGHT GARDENS HOMES, INCORPORATED",5050 Hillside Drive,"Beamsville, L0R1B2",Hamilton Niagara Haldimand Brant (Hnhb),"Albright Gardens Homes, Incorporated",,Non-Profit,Home with approximately 231 beds,No,Yes,Yes,No,,Beamsville,L0R1B2,231,39.0,25.0,15.0,18.0,10.0,6.0,12.0,7.0,6.0,7.0,7.0,3.0
2,ALEXANDER PLACE,329 Parkside Drive P. O. Box 50,"Waterdown, L0R2H0",Hamilton Niagara Haldimand Brant (Hnhb),Waterdown Long Term Care Centre Inc.,,For-Profit,Home with approximately 128 beds,Yes,Yes,Yes,Yes,,Waterdown,L0R2H0,128,28.0,17.0,8.0,11.0,6.0,3.0,8.0,5.0,2.0,4.0,4.0,2.0
3,ALGOMA DISTRICT HOMES FOR THE AGED (ALGOMA MANOR),135 Dawson Street,"Thessalon, P0R1L0",North East,Board Of Management For The District Of Algoma,,,Home with approximately 108 beds,Yes,Yes,Yes,No,Home Closed on 11/01/2011,Thessalon,P0R1L0,108,5.0,,,1.0,,,1.0,,,1.0,,
4,ALGOMA MANOR NURSING HOME,145 Dawson Street,"Thessalon, P0R1L0",North East,Algoma Manor Nursing Home,,,Home with approximately 96 beds,Yes,Yes,Yes,Yes,,Thessalon,P0R1L0,96,23.0,14.0,7.0,7.0,5.0,2.0,3.0,2.0,1.0,6.0,3.0,1.0
5,ALGONQUIN NURSING HOME,207 Turcotte Park Road P.O. Box 270,"Mattawa, P0H1V0",North East,Algonquin Nursing Home Of Mattawa,Hôpital De Mattawa Hospital,Non-Profit,Home with approximately 73 beds,Yes,Yes,Yes,Yes,,Mattawa,P0H1V0,73,23.0,14.0,5.0,3.0,1.0,,11.0,7.0,4.0,5.0,3.0,
6,ALLENDALE,185 Ontario Street South,"Milton, L9T2M4",Mississauga Halton,The Regional Municipality Of Halton,,Municipal,Home with approximately 200 beds,No,Yes,Yes,Yes,,Milton,L9T2M4,200,39.0,21.0,16.0,16.0,9.0,6.0,14.0,6.0,6.0,11.0,7.0,6.0
7,ALMONTE COUNTRY HAVEN,333 Country Street P.O. Box 250,"Almonte, K0A1A0",Champlain,0760444 B.C. Ltd. As General Partner On Behalf Of Omni Health Care Limited Partnership,,For-Profit,Home with approximately 82 beds,No,Yes,Yes,Yes,,Almonte,K0A1A0,82,49.0,27.0,10.0,22.0,12.0,4.0,19.0,11.0,5.0,3.0,2.0,1.0
8,ALTAMONT CARE COMMUNITY,92 Island Road,"Scarborough, M1C2P5",Central East,Vigour Limited Partnership On Behalf Of Vigour General Partner Inc.,,For-Profit,Home with approximately 159 beds,Yes,Yes,Yes,Yes,,Scarborough,M1C2P5,159,33.0,19.0,12.0,11.0,4.0,4.0,13.0,8.0,4.0,7.0,7.0,3.0
9,ANSON PLACE CARE CENTRE,85 Main Street North,"Hagersville, N0A1H0",Hamilton Niagara Haldimand Brant (Hnhb),Rykka Care Centres Lp,,For-Profit,Home with approximately 61 beds,Yes,Yes,No,Yes,,Hagersville,N0A1H0,61,22.0,12.0,6.0,11.0,5.0,3.0,6.0,4.0,3.0,1.0,,


## Remove inactive homes
- Homes with 'closed' in additional info
- Homes missing all profile information
- Homes with no inspection reports in the last 2 years

In [29]:
# We see that 20 homes are closed, and one home was merged
ltc_scrape['additional_info'] = ltc_scrape['additional_info'].str.lower()
# ltc_scrape['additional_info'].value_counts()
ltc_scrape.additional_info.str.contains("closed", na=False).value_counts()

False    631
True     20 
Name: additional_info, dtype: int64

In [30]:
# Drop rows with homes that are closed
ltc_scrape = ltc_scrape[~ltc_scrape.additional_info.str.contains("closed", na=False)].reset_index()

# Review other additional info, there are 5 homes with additional info
ltc_scrape.loc[ltc_scrape['additional_info'].notna()]

Unnamed: 0,index,name,address,city_and_postal_code,LHIN,licensee,management,home_type,beds,short_stay,residents_council,family_council,accreditation,additional_info,city,postal_code,number_of_beds,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders
25,26,BENDALE ACRES,2920 Lawrence Avenue East,"Scarborough, M1P2T8",Central East,Toronto Long-term Care Homes And Services,,Municipal,Home with approximately 302 beds,Yes,Yes,Yes,Yes,"pavillon omer deslauriers, 37 beds, long term care services provided in french and english.",Scarborough,M1P2T8,302,35.0,17.0,10.0,16.0,7.0,5.0,12.0,5.0,4.0,6.0,4.0,2.0
114,118,CRAIGHOLME,221 Main Street R. R. #1,"Ailsa Craig, N0M1A0",South West,Craigwiel Gardens,,Non-Profit,Home with approximately 83 beds,Yes,Yes,Yes,No,stutti@craigwielgardens.on.ca,Ailsa Craig,N0M1A0,83,54.0,24.0,13.0,17.0,10.0,5.0,30.0,8.0,4.0,5.0,5.0,5.0
121,126,DEARNESS HOME FOR SENIOR CITIZENS,710 Southdale Road East,"London, N6E1R8",South West,The Corporation Of The City Of London,,Municipal,Home with approximately 243 beds,Yes,Yes,Yes,Yes,"datars bere, dearness home managing director",London,N6E1R8,243,76.0,33.0,16.0,30.0,12.0,6.0,39.0,18.0,10.0,3.0,1.0,
453,469,SARSFIELD COLONIAL HOME,2861 Colonial Road P.O. Box 130,"Sarsfield, K0A3E0",Champlain,2629693 Ontario Inc.,Taminagi Inc. (As Manager),For-Profit,Home with approximately 46 beds,No,Yes,Yes,Yes,recipient no 346524 – sarsfield colonial home\r\ncomment - change in ownership effective february 19 2020. details transferred to recipient no 664183/ facility no nh1692\r\n,Sarsfield,K0A3E0,46,62.0,42.0,12.0,27.0,14.0,2.0,13.0,8.0,4.0,18.0,16.0,4.0
487,504,"ST. JOSEPH'S HEALTH CARE, LONDON - MOUNT HOPE CENTRE FOR LONG TERM CARE - MARIAN VILLA",200 College Avenue P.O. Box 5777,"London, N6A1Y1",South West,"St. Joseph's Health Care, London",,Non-Profit,Home with approximately 217 beds,Yes,Yes,Yes,Yes,"home was merged with mount hope long-term care centre effective 1st january, 2016. please see mount hope long-term care centre for information.",London,N6A1Y1,217,68.0,17.0,,34.0,7.0,,23.0,7.0,,6.0,1.0,


## Manual Review of one home that was merged in Jan 2016

- Address to the merged home is just a general PO BOX to the licensee
- Will remove the merged home from the database

In [31]:
# show rows with information on the 2 homes that were merged
ltc_scrape[ltc_scrape['name'].str.contains('MOUNT HOPE', regex=False, na=False)]

Unnamed: 0,index,name,address,city_and_postal_code,LHIN,licensee,management,home_type,beds,short_stay,residents_council,family_council,accreditation,additional_info,city,postal_code,number_of_beds,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders
360,373,MOUNT HOPE CENTRE FOR LONG TERM CARE,21 Grosvenor Street P.O. Box 5777,"London, N6A1Y6",South West,"St. Joseph's Health Care, London",,Non-Profit,Home with approximately 177 beds,Yes,Yes,Yes,Yes,,London,N6A1Y6,177,87.0,44.0,15.0,39.0,15.0,1.0,34.0,21.0,11.0,19.0,12.0,4.0
487,504,"ST. JOSEPH'S HEALTH CARE, LONDON - MOUNT HOPE CENTRE FOR LONG TERM CARE - MARIAN VILLA",200 College Avenue P.O. Box 5777,"London, N6A1Y1",South West,"St. Joseph's Health Care, London",,Non-Profit,Home with approximately 217 beds,Yes,Yes,Yes,Yes,"home was merged with mount hope long-term care centre effective 1st january, 2016. please see mount hope long-term care centre for information.",London,N6A1Y1,217,68.0,17.0,,34.0,7.0,,23.0,7.0,,6.0,1.0,


In [32]:
ltc_scrape.iloc[487]

index                   504                                                                                                                                            
name                    ST. JOSEPH'S HEALTH CARE, LONDON - MOUNT HOPE CENTRE FOR LONG TERM CARE - MARIAN VILLA                                                         
address                 200 College Avenue P.O. Box 5777                                                                                                               
city_and_postal_code    London, N6A1Y1                                                                                                                                 
LHIN                    South West                                                                                                                                     
licensee                St. Joseph's Health Care, London                                                                                                        

In [33]:
# Drop row with the merged home
ltc_scrape = ltc_scrape.drop(ltc_scrape.index[487])

# Check to see it is gone
ltc_scrape[ltc_scrape['name'].str.contains('MOUNT HOPE', regex=False, na=False)]

Unnamed: 0,index,name,address,city_and_postal_code,LHIN,licensee,management,home_type,beds,short_stay,residents_council,family_council,accreditation,additional_info,city,postal_code,number_of_beds,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders
360,373,MOUNT HOPE CENTRE FOR LONG TERM CARE,21 Grosvenor Street P.O. Box 5777,"London, N6A1Y6",South West,"St. Joseph's Health Care, London",,Non-Profit,Home with approximately 177 beds,Yes,Yes,Yes,Yes,,London,N6A1Y6,177,87.0,44.0,15.0,39.0,15.0,1.0,34.0,21.0,11.0,19.0,12.0,4.0


In [34]:
# Review homes with missing profile information
ltc_scrape.loc[ltc_scrape['LHIN'].isna()]


Unnamed: 0,index,name,address,city_and_postal_code,LHIN,licensee,management,home_type,beds,short_stay,residents_council,family_council,accreditation,additional_info,city,postal_code,number_of_beds,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders
312,324,LENNOX AND ADDINGTON COUNTY GENERAL HOSPITAL,8 Richmond Park Drive,"Napanee, K7R2Z4",,,,,,,,,,,Napanee,K7R2Z4,,9.0,7.0,2.0,,,,2.0,2.0,,,,
491,508,ST. JOSEPH'S MOTHER HOUSE (MARTHA WING),574 Northcliffe Avenue,"Dundas, L9H7L9",,,,,,,,,,,Dundas,L9H7L9,,,,,,,,,,,,,


### The following homes are missing all profile information:
- LENNOX AND ADDINGTON COUNTY GENERAL HOSPITAL
    - lennox and addington county general hospital: missing all profile information (eg. LHIN, accreditation etc.), https://www.southeasthealthline.ca/displayService.aspx?id=151718, 312, 22 beds convalescent (90 days) and resident long term care at a hospital, last inspection 2019, publically funded, LHIN South East, not in quality database
- ST. JOSEPH'S MOTHER HOUSE (MARTHA WING)
    - no inspections data

In [35]:
# Drop rows with missing profile information
ltc_scrape = ltc_scrape[ltc_scrape.name != 'LENNOX AND ADDINGTON COUNTY GENERAL HOSPITAL']
ltc_scrape = ltc_scrape[ltc_scrape.name != 'ST. JOSEPH\'S MOTHER HOUSE (MARTHA WING)']
ltc_scrape.loc[ltc_scrape['LHIN'].isna()]

Unnamed: 0,index,name,address,city_and_postal_code,LHIN,licensee,management,home_type,beds,short_stay,residents_council,family_council,accreditation,additional_info,city,postal_code,number_of_beds,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders


## Review homes without any inspections since January 2018


In [36]:
ltc_scrape.loc[ltc_scrape['2y_inspections'].isna()]

Unnamed: 0,index,name,address,city_and_postal_code,LHIN,licensee,management,home_type,beds,short_stay,residents_council,family_council,accreditation,additional_info,city,postal_code,number_of_beds,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders
297,308,LADY ISABELLE NURSING HOME,102 Corkery Street P.O. Box 10,"Trout Creek, P0H2L0",North East,Lady Isabelle Nursing Home Ltd.,Responsive Health Management Inc.,For-Profit,Home with approximately 66 beds,Yes,Yes,Yes,No,,Trout Creek,P0H2L0,66,65.0,30.0,,15.0,6.0,,13.0,6.0,,36.0,18.0,
321,333,MALDEN PARK CONTINUING CARE CENTRE,1453 Prince Road,"Windsor, N9C3Z4",Erie St. Clair,Windsor Regional Hospital,,,Home with approximately 145 beds,Yes,Yes,Yes,Yes,,Windsor,N9C3Z4,145,2.0,,,2.0,,,,,,,,
397,411,PEOPLE CARE CENTRE,198 Mornington Street,"Stratford, N5A5G3",South West,Peoplecare Stratford Inc.,,For-Profit,Home with approximately 60 beds,Yes,Yes,No,Yes,,Stratford,N5A5G3,60,7.0,,,3.0,,,,,,1.0,,


### The following homes have no inspections data since Jan 2018:
1. LADY ISABELLE NURSING HOME 
    - http://www.ladyisabelle.ca/Contact_Information.html, MISSING from ODHF, missing 2y_inspections, according to web search this home was closed by the ministry in 2014, in april 2020 there are plans to reopen/redevelop a LTC home here, will remove from this analysis
2. MALDEN PARK CONTINUING CARE CENTRE
    - seems to be closed, last inspection 2010, telephone disconnected
3. PEOPLE CARE CENTRE Stratford
    - listed in ODHF but no geodata, https://www.peoplecare.ca/, closed after flood in 2015 per websearch, tel number does not work, missing 5y_inspections

In [37]:
# Drop rows with no inspections since 2018
ltc_scrape.dropna(subset = ['2y_inspections'], inplace = True)
ltc_scrape.loc[ltc_scrape['2y_inspections'].isna()]

Unnamed: 0,index,name,address,city_and_postal_code,LHIN,licensee,management,home_type,beds,short_stay,residents_council,family_council,accreditation,additional_info,city,postal_code,number_of_beds,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders


## Export final dataframe with LTC profile and inspections info

In [38]:
ltc_scrape.info()
ltc_scrape.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 625 entries, 0 to 630
Data columns (total 29 columns):
index                   625 non-null int64
name                    625 non-null object
address                 625 non-null object
city_and_postal_code    625 non-null object
LHIN                    625 non-null object
licensee                625 non-null object
management              71 non-null object
home_type               624 non-null object
beds                    625 non-null object
short_stay              625 non-null object
residents_council       625 non-null object
family_council          625 non-null object
accreditation           625 non-null object
additional_info         4 non-null object
city                    625 non-null object
postal_code             625 non-null object
number_of_beds          625 non-null object
total_inspections       625 non-null float64
5y_inspections          625 non-null float64
2y_inspections          625 non-null float64
total_complaints

Unnamed: 0,index,name,address,city_and_postal_code,LHIN,licensee,management,home_type,beds,short_stay,residents_council,family_council,accreditation,additional_info,city,postal_code,number_of_beds,total_inspections,5y_inspections,2y_inspections,total_complaints,5y_complaints,2y_complaints,total_critical,5y_critical,2y_critical,total_withOrders,5y_withOrders,2y_withOrders
0,0,AFTON PARK PLACE LONG TERM CARE COMMUNITY,1200 Afton Drive,"Sarnia, N7S6L6",Erie St. Clair,S & R Nursing Homes Ltd.,,For-Profit,Home with approximately 128 beds,No,Yes,Yes,Yes,,Sarnia,N7S6L6,128,67.0,34.0,9.0,29.0,15.0,3.0,28.0,13.0,5.0,5.0,2.0,
1,1,"ALBRIGHT GARDENS HOMES, INCORPORATED",5050 Hillside Drive,"Beamsville, L0R1B2",Hamilton Niagara Haldimand Brant (Hnhb),"Albright Gardens Homes, Incorporated",,Non-Profit,Home with approximately 231 beds,No,Yes,Yes,No,,Beamsville,L0R1B2,231,39.0,25.0,15.0,18.0,10.0,6.0,12.0,7.0,6.0,7.0,7.0,3.0
2,2,ALEXANDER PLACE,329 Parkside Drive P. O. Box 50,"Waterdown, L0R2H0",Hamilton Niagara Haldimand Brant (Hnhb),Waterdown Long Term Care Centre Inc.,,For-Profit,Home with approximately 128 beds,Yes,Yes,Yes,Yes,,Waterdown,L0R2H0,128,28.0,17.0,8.0,11.0,6.0,3.0,8.0,5.0,2.0,4.0,4.0,2.0
3,4,ALGOMA MANOR NURSING HOME,145 Dawson Street,"Thessalon, P0R1L0",North East,Algoma Manor Nursing Home,,,Home with approximately 96 beds,Yes,Yes,Yes,Yes,,Thessalon,P0R1L0,96,23.0,14.0,7.0,7.0,5.0,2.0,3.0,2.0,1.0,6.0,3.0,1.0
4,5,ALGONQUIN NURSING HOME,207 Turcotte Park Road P.O. Box 270,"Mattawa, P0H1V0",North East,Algonquin Nursing Home Of Mattawa,Hôpital De Mattawa Hospital,Non-Profit,Home with approximately 73 beds,Yes,Yes,Yes,Yes,,Mattawa,P0H1V0,73,23.0,14.0,5.0,3.0,1.0,,11.0,7.0,4.0,5.0,3.0,


In [39]:
# export final df as csv
ltc_scrape.to_csv(r'webscrape_LTC_general_database.csv', index = False)

In [40]:
# # Test scripts by scraping a single ltc site

# scrape profile data
# soup = requests.get(
# "http://publicreporting.ltchomes.net/en-ca/homeprofile.aspx?Home=2872") 
# soup = BeautifulSoup(soup.text)

# address = soup.find("div", {"id":"ctl00_ContentPlaceHolder1_divHomeAddress"}).getText()
# city = soup.find("div", {"id":"ctl00_ContentPlaceHolder1_divHomeCity"}).getText()
# profiles = soup.find("div", {"id":"ctl00_ContentPlaceHolder1_divHomeProfile_item_Col1"})
# profile_data = profiles.find_all("div", {"class":"Profilerow_col2"})
# profile_data
# LHIN = profile_data[0].getText()
# LHIN

# # scrape inspections data
# soup = requests.get(
# "http://publicreporting.ltchomes.net/en-ca/homeprofile.aspx?Home=2872&tab=1") # add "&tab=1" to url
# soup = BeautifulSoup(soup.text)

# inspections = soup.find("div", {"id":"ctl00_ContentPlaceHolder1_divHomeProfile_item_Col3"})
# inspection_types = inspections.find_all("div", {"class":"divInspectionTypeDataCol"})
# inspection_dates = inspections.find_all("div", {"class":"divInspectionDateDataCol"})

# print(len(inspection_types))
# print(len(inspection_dates))

In [41]:
# a = []
# for each in inspection_types:
#     a.append(each.getText())
# a

In [None]:
df_all.to_csv(r'ltc_covid_odhf_qual_FOR_STATS.csv', index = False)