# Data Cleaning & Wrangling
## Building Data

In [1]:
import pandas as pd

# Read data
building_df = pd.read_csv("buildings-with-name-age-size-accessibility-and-bicycle-facilities.csv")

building_df.head(5)

Unnamed: 0,census_year,block_id,property_id,base_property_id,building_name,street_address,clue_small_area,construction_year,refurbished_year,number_of_floors_above_ground,predominant_space_use,accessibility_type,accessibility_type_description,accessibility_rating,bicycle_spaces,has_showers,longitude,latitude,location
0,2006,324,110904,110904,,374-376 Dryburgh Street NORTH MELBOURNE 3051,North Melbourne,,,2.0,House/Townhouse,Not determined or not applicable,Building is not considered to be publicly acce...,0.0,,,144.944839,-37.796874,"-37.79687387022628, 144.94483900952014"
1,2006,324,110905,110905,,13-15 Donovans Lane NORTH MELBOURNE 3051,North Melbourne,,,2.0,House/Townhouse,Not determined or not applicable,Building is not considered to be publicly acce...,0.0,,,144.945039,-37.796889,"-37.796889414464786, 144.94503936105005"
2,2006,325,102911,102911,,332-344 Dryburgh Street NORTH MELBOURNE 3051,North Melbourne,,,3.0,Residential Apartment,High level of accessibility,Main Entrance is at grade and has no steps or ...,3.0,,,144.944837,-37.797965,"-37.797965401850504, 144.94483704823057"
3,2006,326,100109,100108,,388-396 Abbotsford Street NORTH MELBOURNE 3051,North Melbourne,,,3.0,Residential Apartment,High level of accessibility,Main Entrance has ramp,3.0,,,144.946825,-37.797481,"-37.79748138461932, 144.94682467313123"
4,2006,326,100110,100108,,384 Abbotsford Street NORTH MELBOURNE 3051,North Melbourne,,,2.0,Residential Apartment,Moderate level of accessibility,Entrance(s) have limited access via a small li...,2.0,,,144.946825,-37.797481,"-37.79748138461932, 144.94682467313123"


In [2]:
# Retrieving relevant information
building_df = building_df[['building_name', 'street_address', 'clue_small_area','number_of_floors_above_ground', 
                           'predominant_space_use','accessibility_type', 'accessibility_type_description',
                           'accessibility_rating', 'longitude', 'latitude']]

building_df.head(5)

Unnamed: 0,building_name,street_address,clue_small_area,number_of_floors_above_ground,predominant_space_use,accessibility_type,accessibility_type_description,accessibility_rating,longitude,latitude
0,,374-376 Dryburgh Street NORTH MELBOURNE 3051,North Melbourne,2.0,House/Townhouse,Not determined or not applicable,Building is not considered to be publicly acce...,0.0,144.944839,-37.796874
1,,13-15 Donovans Lane NORTH MELBOURNE 3051,North Melbourne,2.0,House/Townhouse,Not determined or not applicable,Building is not considered to be publicly acce...,0.0,144.945039,-37.796889
2,,332-344 Dryburgh Street NORTH MELBOURNE 3051,North Melbourne,3.0,Residential Apartment,High level of accessibility,Main Entrance is at grade and has no steps or ...,3.0,144.944837,-37.797965
3,,388-396 Abbotsford Street NORTH MELBOURNE 3051,North Melbourne,3.0,Residential Apartment,High level of accessibility,Main Entrance has ramp,3.0,144.946825,-37.797481
4,,384 Abbotsford Street NORTH MELBOURNE 3051,North Melbourne,2.0,Residential Apartment,Moderate level of accessibility,Entrance(s) have limited access via a small li...,2.0,144.946825,-37.797481


In [3]:
building_df['accessibility_type_description'].unique()

array(['Building is not considered to be publicly accessible so access has not been rated',
       'Main Entrance is at grade and has no steps or ramp',
       'Main Entrance has ramp',
       'Entrance(s) have limited access via a small lip or a steep ramp',
       'All entrances have steps',
       'Configuration of entrance does not fit into any of the other categories',
       'Main entrance has steps; Alternative entrance is step free or has ramp',
       nan, 'Access has not been rated'], dtype=object)

# Creating Final Table Structure

In [4]:
import numpy as np

# Create a mapping dictionary
ramp_mapping = {
    'Building is not considered to be publicly accessible so access has not been rated': 'no',
    'Main Entrance is at grade and has no steps or ramp': 'yes',
    'Main Entrance has ramp': 'yes',
    'Entrance(s) have limited access via a small lip or a steep ramp': 'limited',
    'All entrances have steps': 'no',
    'Configuration of entrance does not fit into any of the other categories': 'no',
    'Main entrance has steps; Alternative entrance is step free or has ramp': 'no',
    np.nan: 'no',
    'Access has not been rated': 'no'
}

# Mapping the accessibility_type_descriptions
building_df['ramp'] = building_df['accessibility_type_description'].map(ramp_mapping)

building_df

Unnamed: 0,building_name,street_address,clue_small_area,number_of_floors_above_ground,predominant_space_use,accessibility_type,accessibility_type_description,accessibility_rating,longitude,latitude,ramp
0,,374-376 Dryburgh Street NORTH MELBOURNE 3051,North Melbourne,2.0,House/Townhouse,Not determined or not applicable,Building is not considered to be publicly acce...,0.0,144.944839,-37.796874,no
1,,13-15 Donovans Lane NORTH MELBOURNE 3051,North Melbourne,2.0,House/Townhouse,Not determined or not applicable,Building is not considered to be publicly acce...,0.0,144.945039,-37.796889,no
2,,332-344 Dryburgh Street NORTH MELBOURNE 3051,North Melbourne,3.0,Residential Apartment,High level of accessibility,Main Entrance is at grade and has no steps or ...,3.0,144.944837,-37.797965,yes
3,,388-396 Abbotsford Street NORTH MELBOURNE 3051,North Melbourne,3.0,Residential Apartment,High level of accessibility,Main Entrance has ramp,3.0,144.946825,-37.797481,yes
4,,384 Abbotsford Street NORTH MELBOURNE 3051,North Melbourne,2.0,Residential Apartment,Moderate level of accessibility,Entrance(s) have limited access via a small li...,2.0,144.946825,-37.797481,limited
...,...,...,...,...,...,...,...,...,...,...,...
291458,,2 Percy Street KENSINGTON 3031,Kensington,1.0,House/Townhouse,,,,144.926420,-37.791434,no
291459,,21 Gordon Crescent KENSINGTON 3031,Kensington,1.0,House/Townhouse,,,,144.926508,-37.791041,no
291460,,17 Gordon Crescent KENSINGTON 3031,Kensington,1.0,House/Townhouse,,,,144.926585,-37.791123,no
291461,,15 Gordon Crescent KENSINGTON 3031,Kensington,1.0,House/Townhouse,,,,144.926617,-37.791236,no


In [5]:
# Create a mapping dictionary for alternate entrance
alternate_entrance_mapping = {
    'Building is not considered to be publicly accessible so access has not been rated': 'no',
    'Main Entrance is at grade and has no steps or ramp': 'no',
    'Main Entrance has ramp': 'no',
    'Entrance(s) have limited access via a small lip or a steep ramp': 'no',
    'All entrances have steps': 'no',
    'Configuration of entrance does not fit into any of the other categories': 'no',
    'Main entrance has steps; Alternative entrance is step free or has ramp': 'yes',
    np.nan: 'no',
    'Access has not been rated': 'no'
}

# Mapping the accessibility_type_descriptions
building_df['alternate_entrance'] = building_df['accessibility_type_description'].map(alternate_entrance_mapping)

building_df


Unnamed: 0,building_name,street_address,clue_small_area,number_of_floors_above_ground,predominant_space_use,accessibility_type,accessibility_type_description,accessibility_rating,longitude,latitude,ramp,alternate_entrance
0,,374-376 Dryburgh Street NORTH MELBOURNE 3051,North Melbourne,2.0,House/Townhouse,Not determined or not applicable,Building is not considered to be publicly acce...,0.0,144.944839,-37.796874,no,no
1,,13-15 Donovans Lane NORTH MELBOURNE 3051,North Melbourne,2.0,House/Townhouse,Not determined or not applicable,Building is not considered to be publicly acce...,0.0,144.945039,-37.796889,no,no
2,,332-344 Dryburgh Street NORTH MELBOURNE 3051,North Melbourne,3.0,Residential Apartment,High level of accessibility,Main Entrance is at grade and has no steps or ...,3.0,144.944837,-37.797965,yes,no
3,,388-396 Abbotsford Street NORTH MELBOURNE 3051,North Melbourne,3.0,Residential Apartment,High level of accessibility,Main Entrance has ramp,3.0,144.946825,-37.797481,yes,no
4,,384 Abbotsford Street NORTH MELBOURNE 3051,North Melbourne,2.0,Residential Apartment,Moderate level of accessibility,Entrance(s) have limited access via a small li...,2.0,144.946825,-37.797481,limited,no
...,...,...,...,...,...,...,...,...,...,...,...,...
291458,,2 Percy Street KENSINGTON 3031,Kensington,1.0,House/Townhouse,,,,144.926420,-37.791434,no,no
291459,,21 Gordon Crescent KENSINGTON 3031,Kensington,1.0,House/Townhouse,,,,144.926508,-37.791041,no,no
291460,,17 Gordon Crescent KENSINGTON 3031,Kensington,1.0,House/Townhouse,,,,144.926585,-37.791123,no,no
291461,,15 Gordon Crescent KENSINGTON 3031,Kensington,1.0,House/Townhouse,,,,144.926617,-37.791236,no,no


In [6]:
# Standardising to lowercase
building_df = building_df.map(lambda x: x.lower() if isinstance(x, str) else x)

building_df.head(5)

Unnamed: 0,building_name,street_address,clue_small_area,number_of_floors_above_ground,predominant_space_use,accessibility_type,accessibility_type_description,accessibility_rating,longitude,latitude,ramp,alternate_entrance
0,,374-376 dryburgh street north melbourne 3051,north melbourne,2.0,house/townhouse,not determined or not applicable,building is not considered to be publicly acce...,0.0,144.944839,-37.796874,no,no
1,,13-15 donovans lane north melbourne 3051,north melbourne,2.0,house/townhouse,not determined or not applicable,building is not considered to be publicly acce...,0.0,144.945039,-37.796889,no,no
2,,332-344 dryburgh street north melbourne 3051,north melbourne,3.0,residential apartment,high level of accessibility,main entrance is at grade and has no steps or ...,3.0,144.944837,-37.797965,yes,no
3,,388-396 abbotsford street north melbourne 3051,north melbourne,3.0,residential apartment,high level of accessibility,main entrance has ramp,3.0,144.946825,-37.797481,yes,no
4,,384 abbotsford street north melbourne 3051,north melbourne,2.0,residential apartment,moderate level of accessibility,entrance(s) have limited access via a small li...,2.0,144.946825,-37.797481,limited,no


In [8]:
# Renaming columns
building_df = building_df.rename(columns={'longitude': 'Location_Lon',
                                          'latitude': 'Location_Lat',
                                          'accessibility_type': 'accessibility_cat',
                                          'clue_small_area': 'suburb'})

# Adding location type
building_df['Accessibility_Type_Name'] = 'buildings'

building_df.head(5)

Unnamed: 0,building_name,street_address,suburb,number_of_floors_above_ground,predominant_space_use,accessibility_cat,accessibility_type_description,accessibility_rating,Location_Lon,Location_Lat,ramp,alternate_entrance,Accessibility_Type_Name
0,,374-376 dryburgh street north melbourne 3051,north melbourne,2.0,house/townhouse,not determined or not applicable,building is not considered to be publicly acce...,0.0,144.944839,-37.796874,no,no,buildings
1,,13-15 donovans lane north melbourne 3051,north melbourne,2.0,house/townhouse,not determined or not applicable,building is not considered to be publicly acce...,0.0,144.945039,-37.796889,no,no,buildings
2,,332-344 dryburgh street north melbourne 3051,north melbourne,3.0,residential apartment,high level of accessibility,main entrance is at grade and has no steps or ...,3.0,144.944837,-37.797965,yes,no,buildings
3,,388-396 abbotsford street north melbourne 3051,north melbourne,3.0,residential apartment,high level of accessibility,main entrance has ramp,3.0,144.946825,-37.797481,yes,no,buildings
4,,384 abbotsford street north melbourne 3051,north melbourne,2.0,residential apartment,moderate level of accessibility,entrance(s) have limited access via a small li...,2.0,144.946825,-37.797481,limited,no,buildings


In [None]:
# Creating metadata column
metadata_cols = ['street_address', 'suburb', 'predominant_space_use']
building_df['Metadata'] = 