In [2]:
import pandas as pd
import numpy as np
import re

In [24]:
rdf = pd.read_csv("./data/Aged-Care-Homes-June-2018.csv")

omit_col = ['ALT_NAME', 'PREVIOUS_NAME', 'LOCAL_NAME',
       'COMMGOVT_SUBSIDISED', 'BUS_UNIT_NO', 'BUS_ST_ADDRESS', 'BUS_SUBURB', 'BUS_PCODE', 'BUS_STATE',
       'MAIN_EMAIL_ADDR', 'SERVICE_TYPE', 'ABN', 'APPROVED_PROVIDER', 'ACCREDITATION',
       'ACCREDITATION_PERIOD', 'CERTIFICATION', 'NOTICE_OF_SANCTION', 'MAIN_FAX_NUM',
       'NOTICE_OF_NON_COMPLIANCE', 'AVAILABILITY', 'ROOM_TITLE', 'ROOM_NAME', 'ROOM_TYPE', 'MAX_ROOM_OCCUPANCY',
       'NUM_OF_ROOM_TYPES', 'MAX_RAD', 'MAX_DAP', 'PAYMENT_OPTIONS',
       'PAYMENT_EXAMPLE', 'ROOM_DESC', 'ROOM_SIZE', 'COMMON_AREA_DESC',
       'ADDITIONAL_CARE_INCL', 'ADDITIONAL_CARE_COST',
       'EXTRA_SERVICE', 'EXTRA_SERVICE_FEE_AMOUNT', 'SPECIFIC_FEATURES', 'STREET_UNIT_NO', 'DESCRIPTION']

rdf.drop(labels=omit_col, axis=1, inplace=True)
rdf.drop_duplicates(inplace=True)

rdf.head()

Unnamed: 0,SERVICE_NAME,STREET_ST_ADDRESS,STREET_SUBURB,STREET_PCODE,STREET_STATE,MAIN_PH_NUM,WEBSITE,PARTICULAR_NEED_SERVICES
0,Juniper Cygnet,4-10 HAYMAN Road,BENTLEY,6102.0,WA,611300313000,www.juniper.org.au,Focus on socially and financially disadvantage...
3,Blue Care Labrador Aged Care Facility,83 MUIR Street,LABRADOR,4215.0,QLD,611800838929,http://www.bluecare.org.au,Focus on socially and financially disadvantage...
5,RSL Care Bolton Point - Macquarie Shores,12 The Ridgeway,BOLTON POINT,2283.0,NSW,610249503933,http://www.rslcare.com.au/bolton-point,
6,RSL Care Bolton Point - Macquarie Shores,12 The Ridgeway,BOLTON POINT,2283.0,NSW,610249503933,http://www.rslcare.com.au/bolton-point,"Caters for cultural, spiritual or ethical food..."
9,Ark Health Care Bankstown,1A HIXSON Street,BANKSTOWN,2200.0,NSW,610297919609,http://www.arkhc.com,Focus on socially and financially disadvantage...


In [26]:
def special_serv():
    col_names = ["sfd", "dem", "atsi", "terminal", "veteran", "cald", "lgbti"]
    search_term = ["financially disadvantaged", "dementia", "ATSI", "terminal illness", "veterans", "CALD backgrounds", "LGBTI"]
    for ix, col in enumerate(col_names):
        pattern = re.compile(search_term[ix])
        bool_list = [False] * rdf.shape[0]
        for rdf_ix, service_string in enumerate(rdf["PARTICULAR_NEED_SERVICES"]):
            try:
                if pattern.search(service_string):
                    bool_list[rdf_ix] = True
            except TypeError:
                pass
        rdf[col] = bool_list
    rdf.drop("PARTICULAR_NEED_SERVICES", axis=1, inplace=True)
special_serv()
rdf.shape

(4576, 14)

In [27]:
def remove_illegals(desc):
    try:
        return re.sub(pattern=r'\’|\é|\–|\”|\“',repl='',string=desc)
    except TypeError:
        return ""

    
def remove_newline(desc):
    try:
        return re.sub(pattern=r'\n+',repl=' ',string=desc)
    except TypeError:
        return ""

#rdf["DESCRIPTION"] = rdf["DESCRIPTION"].apply(remove_illegals)
#rdf["DESCRIPTION"] = rdf["DESCRIPTION"].apply(remove_newline)

rdf.drop_duplicates(subset=["SERVICE_NAME"], inplace=True)
rdf.reset_index(drop=True,inplace=True)
rdf.index.name = "id"
rdf.shape


(2863, 14)

In [28]:
rdf["ADDRESS"] = rdf["STREET_ST_ADDRESS"] + rdf["STREET_SUBURB"] + " " + rdf["STREET_PCODE"].map(str) + " " + rdf["STREET_STATE"]

def remove_decimal(desc):
    try:
        return re.sub(pattern=r'\.\d+',repl='',string=desc)
    except TypeError:
        return ""
rdf["ADDRESS"] = rdf["ADDRESS"].apply(remove_decimal)

rdf.head()

Unnamed: 0_level_0,SERVICE_NAME,STREET_ST_ADDRESS,STREET_SUBURB,STREET_PCODE,STREET_STATE,MAIN_PH_NUM,WEBSITE,sfd,dem,atsi,terminal,veteran,cald,lgbti,ADDRESS
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
0,Juniper Cygnet,4-10 HAYMAN Road,BENTLEY,6102.0,WA,611300313000,www.juniper.org.au,True,True,False,False,False,False,False,4-10 HAYMAN Road BENTLEY 6102 WA
1,Blue Care Labrador Aged Care Facility,83 MUIR Street,LABRADOR,4215.0,QLD,611800838929,http://www.bluecare.org.au,True,True,False,True,False,False,False,83 MUIR Street LABRADOR 4215 QLD
2,RSL Care Bolton Point - Macquarie Shores,12 The Ridgeway,BOLTON POINT,2283.0,NSW,610249503933,http://www.rslcare.com.au/bolton-point,False,False,False,False,False,False,False,12 The Ridgeway BOLTON POINT 2283 NSW
3,Ark Health Care Bankstown,1A HIXSON Street,BANKSTOWN,2200.0,NSW,610297919609,http://www.arkhc.com,True,False,False,True,False,False,False,1A HIXSON Street BANKSTOWN 2200 NSW
4,Regis Kirwan,1 Emerald Street,KIRWAN,4817.0,QLD,611300998100,http://www.regis.com.au/residence/regis-kirwan/,False,False,False,False,False,False,False,1 Emerald Street KIRWAN 4817 QLD


In [29]:
rdf.to_csv("./data/clean/residential_basic.csv", na_rep="NaN")