# Data Cleaning & Wrangling
## Location Data

In [61]:
import pandas as pd

# Loading the data
toilet_df = pd.read_csv("toiletmapexport_250301_074429.csv")

toilet_df.head(5)

Unnamed: 0,FacilityID,URL,Name,FacilityType,Address1,Town,State,AddressNote,Latitude,Longitude,...,Ambulant,Accessible,LHTransfer,RHTransfer,ToiletNote,SharpsDisposal,DrinkingWater,SanitaryDisposal,MensPadDisposal,Shower
0,1,https://toiletmap.gov.au/facility/1,Sandy Beach Reserve,Park or reserve,"Lost 15525, West Road",Bassendean,WA,,-31.921836,115.950206,...,False,True,False,False,,True,True,True,False,False
1,2,https://toiletmap.gov.au/facility/2,Point Reserve,Park or reserve,"Lot 197, North Road",Bassendean,WA,,-31.90441,115.960991,...,False,True,False,False,,True,True,True,False,False
2,3,https://toiletmap.gov.au/facility/3,Success Hill Reserve,Park or reserve,"Lot 2838, Seventh Avenue",Bassendean,WA,,-31.896289,115.955781,...,False,True,False,False,,True,True,True,False,True
3,4,https://toiletmap.gov.au/facility/4,Jubilee Reserve,Park or reserve,"Lot 6322, Robinson Road",Eden Hill,WA,,-31.891474,115.940164,...,False,False,False,False,,True,False,True,False,False
4,5,https://toiletmap.gov.au/facility/5,Ashfield Reserve,Park or reserve,"Lot 12061, 2 Coulston Road",Ashfield,WA,,-31.913433,115.936477,...,False,True,False,False,,True,True,True,False,False


In [62]:
toilet_df.columns

Index(['FacilityID', 'URL', 'Name', 'FacilityType', 'Address1', 'Town',
       'State', 'AddressNote', 'Latitude', 'Longitude', 'Parking',
       'ParkingAccessible', 'ParkingNote', 'KeyRequired', 'MLAK24',
       'MLAKAfterHours', 'PaymentRequired', 'AccessNote', 'AdultChange',
       'ChangingPlaces', 'BYOSling', 'ACShower', 'ACMLAK', 'AdultChangeNote',
       'BabyChange', 'BabyCareRoom', 'BabyChangeNote', 'DumpPoint',
       'DPWashout', 'DPAfterHours', 'DumpPointNote', 'OpeningHours',
       'OpeningHoursNote', 'Male', 'Female', 'Unisex', 'AllGender', 'Ambulant',
       'Accessible', 'LHTransfer', 'RHTransfer', 'ToiletNote',
       'SharpsDisposal', 'DrinkingWater', 'SanitaryDisposal',
       'MensPadDisposal', 'Shower'],
      dtype='object')

In [63]:
# Extracting needed columns
toilet_df = toilet_df[['FacilityID', 'Name', 'FacilityType', 'Address1', 'Town', 'State',
                      'Latitude', 'Longitude', 'ParkingAccessible', 'Male', 'Female',
                      'Unisex', 'AllGender', 'Accessible']]



In [64]:
# Rounding latitude and longitude to 6 decimal places
toilet_df['Latitude'] = toilet_df['Latitude'].round(6)
toilet_df['Longitude'] = toilet_df['Longitude'].round(6)

toilet_df.head(5)

Unnamed: 0,FacilityID,Name,FacilityType,Address1,Town,State,Latitude,Longitude,ParkingAccessible,Male,Female,Unisex,AllGender,Accessible
0,1,Sandy Beach Reserve,Park or reserve,"Lost 15525, West Road",Bassendean,WA,-31.921836,115.950206,False,False,False,True,False,True
1,2,Point Reserve,Park or reserve,"Lot 197, North Road",Bassendean,WA,-31.90441,115.960991,False,False,False,True,False,True
2,3,Success Hill Reserve,Park or reserve,"Lot 2838, Seventh Avenue",Bassendean,WA,-31.896289,115.955781,True,True,True,False,False,True
3,4,Jubilee Reserve,Park or reserve,"Lot 6322, Robinson Road",Eden Hill,WA,-31.891474,115.940164,False,True,True,False,False,False
4,5,Ashfield Reserve,Park or reserve,"Lot 12061, 2 Coulston Road",Ashfield,WA,-31.913433,115.936477,True,False,False,True,False,True


In [65]:
# Filtering to only VIC
toilet_df = toilet_df[toilet_df['State'] == 'VIC']

toilet_df.head(5)

Unnamed: 0,FacilityID,Name,FacilityType,Address1,Town,State,Latitude,Longitude,ParkingAccessible,Male,Female,Unisex,AllGender,Accessible
181,252,Lloyd Street,Other,Lloyd Street,Dimboola,VIC,-36.454567,142.026468,False,True,True,False,False,True
182,253,Lloyd Street,Bus station,Lloyd Street,Dimboola,VIC,-36.454121,142.026374,False,True,True,False,False,True
183,254,Charles Street,Other,Charles Street,Jeparit,VIC,-36.142494,141.988242,False,True,True,False,False,True
184,255,Roy Street,Other,Roy Street,Jeparit,VIC,-36.143049,141.987369,False,True,True,False,False,False
185,256,Western Highway,Bus station,24 Victoria Street,Nhill,VIC,-36.333924,141.649202,False,True,True,False,False,True


In [66]:
# Standardising strings to uppercase
toilet_df = toilet_df.map(lambda x: x.upper() if isinstance(x, str) else x)

# Resetting index
toilet_df.reset_index(drop=True, inplace=True)

toilet_df.head(5)

Unnamed: 0,FacilityID,Name,FacilityType,Address1,Town,State,Latitude,Longitude,ParkingAccessible,Male,Female,Unisex,AllGender,Accessible
0,252,LLOYD STREET,OTHER,LLOYD STREET,DIMBOOLA,VIC,-36.454567,142.026468,False,True,True,False,False,True
1,253,LLOYD STREET,BUS STATION,LLOYD STREET,DIMBOOLA,VIC,-36.454121,142.026374,False,True,True,False,False,True
2,254,CHARLES STREET,OTHER,CHARLES STREET,JEPARIT,VIC,-36.142494,141.988242,False,True,True,False,False,True
3,255,ROY STREET,OTHER,ROY STREET,JEPARIT,VIC,-36.143049,141.987369,False,True,True,False,False,False
4,256,WESTERN HIGHWAY,BUS STATION,24 VICTORIA STREET,NHILL,VIC,-36.333924,141.649202,False,True,True,False,False,True


In [67]:
# Checking for duplicates
toilet_df[toilet_df[['Name','FacilityType', 'Latitude', 'Longitude']].duplicated()]

Unnamed: 0,FacilityID,Name,FacilityType,Address1,Town,State,Latitude,Longitude,ParkingAccessible,Male,Female,Unisex,AllGender,Accessible


In [68]:
toilet_df['FacilityType'].unique()

array(['OTHER', 'BUS STATION', 'PARK OR RESERVE', 'SPORTING FACILITY',
       'CAR PARK', 'COMMUNITY BUILDING', 'BEACH', 'SHOPPING CENTRE',
       'JETTY', 'CAMPING GROUND', 'AIRPORT', 'REST AREA', 'SWIMMING POOL',
       'CARAVAN PARK', 'TRAIN STATION', 'FOOD OUTLET', 'CEMETERY',
       'SERVICE STATION'], dtype=object)

In [69]:
# Flitering out Restaurants
toilet_df = toilet_df[toilet_df['FacilityType'] != 'FOOD OUTLET']

# Renaming columns
toilet_df = toilet_df.rename(columns={'FacilityID': 'Location_ID', 'Address1': 'Address', 'Town': 'Suburb',
                                      'Latitude': 'Location_Lat', 'Longitude': 'Location_Lon'})

toilet_df.head(5)

Unnamed: 0,Location_ID,Name,FacilityType,Address,Suburb,State,Location_Lat,Location_Lon,ParkingAccessible,Male,Female,Unisex,AllGender,Accessible
0,252,LLOYD STREET,OTHER,LLOYD STREET,DIMBOOLA,VIC,-36.454567,142.026468,False,True,True,False,False,True
1,253,LLOYD STREET,BUS STATION,LLOYD STREET,DIMBOOLA,VIC,-36.454121,142.026374,False,True,True,False,False,True
2,254,CHARLES STREET,OTHER,CHARLES STREET,JEPARIT,VIC,-36.142494,141.988242,False,True,True,False,False,True
3,255,ROY STREET,OTHER,ROY STREET,JEPARIT,VIC,-36.143049,141.987369,False,True,True,False,False,False
4,256,WESTERN HIGHWAY,BUS STATION,24 VICTORIA STREET,NHILL,VIC,-36.333924,141.649202,False,True,True,False,False,True


In [70]:
# Standardising boolean values
bool_cols = toilet_df.columns[-6:]

for col in bool_cols:
    toilet_df[col] = toilet_df[col].map({True: 'Yes', False: 'No'})

toilet_df.head(5)

Unnamed: 0,Location_ID,Name,FacilityType,Address,Suburb,State,Location_Lat,Location_Lon,ParkingAccessible,Male,Female,Unisex,AllGender,Accessible
0,252,LLOYD STREET,OTHER,LLOYD STREET,DIMBOOLA,VIC,-36.454567,142.026468,No,Yes,Yes,No,No,Yes
1,253,LLOYD STREET,BUS STATION,LLOYD STREET,DIMBOOLA,VIC,-36.454121,142.026374,No,Yes,Yes,No,No,Yes
2,254,CHARLES STREET,OTHER,CHARLES STREET,JEPARIT,VIC,-36.142494,141.988242,No,Yes,Yes,No,No,Yes
3,255,ROY STREET,OTHER,ROY STREET,JEPARIT,VIC,-36.143049,141.987369,No,Yes,Yes,No,No,No
4,256,WESTERN HIGHWAY,BUS STATION,24 VICTORIA STREET,NHILL,VIC,-36.333924,141.649202,No,Yes,Yes,No,No,Yes


# Creating Final Table Structure

In [71]:
toilet_data = toilet_df

# Creating a column with dictionary of metadata
metadata_cols = ['Name', 'FacilityType', 'Address', 'Suburb', 'State']
toilet_data['Metadata'] = toilet_data[metadata_cols].apply(lambda row: row.to_dict(), axis=1)

toilet_data.head(5)

Unnamed: 0,Location_ID,Name,FacilityType,Address,Suburb,State,Location_Lat,Location_Lon,ParkingAccessible,Male,Female,Unisex,AllGender,Accessible,Metadata
0,252,LLOYD STREET,OTHER,LLOYD STREET,DIMBOOLA,VIC,-36.454567,142.026468,No,Yes,Yes,No,No,Yes,"{'Name': 'LLOYD STREET', 'FacilityType': 'OTHE..."
1,253,LLOYD STREET,BUS STATION,LLOYD STREET,DIMBOOLA,VIC,-36.454121,142.026374,No,Yes,Yes,No,No,Yes,"{'Name': 'LLOYD STREET', 'FacilityType': 'BUS ..."
2,254,CHARLES STREET,OTHER,CHARLES STREET,JEPARIT,VIC,-36.142494,141.988242,No,Yes,Yes,No,No,Yes,"{'Name': 'CHARLES STREET', 'FacilityType': 'OT..."
3,255,ROY STREET,OTHER,ROY STREET,JEPARIT,VIC,-36.143049,141.987369,No,Yes,Yes,No,No,No,"{'Name': 'ROY STREET', 'FacilityType': 'OTHER'..."
4,256,WESTERN HIGHWAY,BUS STATION,24 VICTORIA STREET,NHILL,VIC,-36.333924,141.649202,No,Yes,Yes,No,No,Yes,"{'Name': 'WESTERN HIGHWAY', 'FacilityType': 'B..."


In [72]:
# Creating a column with dictionary of tags
tags_cols = ['ParkingAccessible', 'Male', 'Female', 'Unisex', 'AllGender', 'Accessible']
toilet_data['Tags'] = toilet_data[tags_cols].apply(lambda row: row.to_dict(), axis = 1)

# Adding a column for type of accessibility feature
toilet_data['Accesibility_Type_Name'] = 'toilets'

toilet_data.head(5)

Unnamed: 0,Location_ID,Name,FacilityType,Address,Suburb,State,Location_Lat,Location_Lon,ParkingAccessible,Male,Female,Unisex,AllGender,Accessible,Metadata,Tags,Accesibility_Type_Name
0,252,LLOYD STREET,OTHER,LLOYD STREET,DIMBOOLA,VIC,-36.454567,142.026468,No,Yes,Yes,No,No,Yes,"{'Name': 'LLOYD STREET', 'FacilityType': 'OTHE...","{'ParkingAccessible': 'No', 'Male': 'Yes', 'Fe...",toilets
1,253,LLOYD STREET,BUS STATION,LLOYD STREET,DIMBOOLA,VIC,-36.454121,142.026374,No,Yes,Yes,No,No,Yes,"{'Name': 'LLOYD STREET', 'FacilityType': 'BUS ...","{'ParkingAccessible': 'No', 'Male': 'Yes', 'Fe...",toilets
2,254,CHARLES STREET,OTHER,CHARLES STREET,JEPARIT,VIC,-36.142494,141.988242,No,Yes,Yes,No,No,Yes,"{'Name': 'CHARLES STREET', 'FacilityType': 'OT...","{'ParkingAccessible': 'No', 'Male': 'Yes', 'Fe...",toilets
3,255,ROY STREET,OTHER,ROY STREET,JEPARIT,VIC,-36.143049,141.987369,No,Yes,Yes,No,No,No,"{'Name': 'ROY STREET', 'FacilityType': 'OTHER'...","{'ParkingAccessible': 'No', 'Male': 'Yes', 'Fe...",toilets
4,256,WESTERN HIGHWAY,BUS STATION,24 VICTORIA STREET,NHILL,VIC,-36.333924,141.649202,No,Yes,Yes,No,No,Yes,"{'Name': 'WESTERN HIGHWAY', 'FacilityType': 'B...","{'ParkingAccessible': 'No', 'Male': 'Yes', 'Fe...",toilets


In [73]:
# Retrieving final table

toilet_data = toilet_data[['Location_ID', 'Location_Lat', 'Location_Lon', 'Accesibility_Type_Name',
                           'Metadata', 'Tags']]

toilet_data.head(5)

Unnamed: 0,Location_ID,Location_Lat,Location_Lon,Accesibility_Type_Name,Metadata,Tags
0,252,-36.454567,142.026468,toilets,"{'Name': 'LLOYD STREET', 'FacilityType': 'OTHE...","{'ParkingAccessible': 'No', 'Male': 'Yes', 'Fe..."
1,253,-36.454121,142.026374,toilets,"{'Name': 'LLOYD STREET', 'FacilityType': 'BUS ...","{'ParkingAccessible': 'No', 'Male': 'Yes', 'Fe..."
2,254,-36.142494,141.988242,toilets,"{'Name': 'CHARLES STREET', 'FacilityType': 'OT...","{'ParkingAccessible': 'No', 'Male': 'Yes', 'Fe..."
3,255,-36.143049,141.987369,toilets,"{'Name': 'ROY STREET', 'FacilityType': 'OTHER'...","{'ParkingAccessible': 'No', 'Male': 'Yes', 'Fe..."
4,256,-36.333924,141.649202,toilets,"{'Name': 'WESTERN HIGHWAY', 'FacilityType': 'B...","{'ParkingAccessible': 'No', 'Male': 'Yes', 'Fe..."


In [75]:
# Exporting data 
toilet_data.to_csv("toilet_CSV.csv")