# Data Cleaning & Wrangling
## Building Data

In [26]:
import pandas as pd

# Read data
building_df = pd.read_csv("buildings-with-name-age-size-accessibility-and-bicycle-facilities.csv")

building_df.head(5)

Unnamed: 0,census_year,block_id,property_id,base_property_id,building_name,street_address,clue_small_area,construction_year,refurbished_year,number_of_floors_above_ground,predominant_space_use,accessibility_type,accessibility_type_description,accessibility_rating,bicycle_spaces,has_showers,longitude,latitude,location
0,2006,324,110904,110904,,374-376 Dryburgh Street NORTH MELBOURNE 3051,North Melbourne,,,2.0,House/Townhouse,Not determined or not applicable,Building is not considered to be publicly acce...,0.0,,,144.944839,-37.796874,"-37.79687387022628, 144.94483900952014"
1,2006,324,110905,110905,,13-15 Donovans Lane NORTH MELBOURNE 3051,North Melbourne,,,2.0,House/Townhouse,Not determined or not applicable,Building is not considered to be publicly acce...,0.0,,,144.945039,-37.796889,"-37.796889414464786, 144.94503936105005"
2,2006,325,102911,102911,,332-344 Dryburgh Street NORTH MELBOURNE 3051,North Melbourne,,,3.0,Residential Apartment,High level of accessibility,Main Entrance is at grade and has no steps or ...,3.0,,,144.944837,-37.797965,"-37.797965401850504, 144.94483704823057"
3,2006,326,100109,100108,,388-396 Abbotsford Street NORTH MELBOURNE 3051,North Melbourne,,,3.0,Residential Apartment,High level of accessibility,Main Entrance has ramp,3.0,,,144.946825,-37.797481,"-37.79748138461932, 144.94682467313123"
4,2006,326,100110,100108,,384 Abbotsford Street NORTH MELBOURNE 3051,North Melbourne,,,2.0,Residential Apartment,Moderate level of accessibility,Entrance(s) have limited access via a small li...,2.0,,,144.946825,-37.797481,"-37.79748138461932, 144.94682467313123"


In [27]:
# Retrieving relevant information
building_df = building_df[['building_name', 'street_address', 'clue_small_area','number_of_floors_above_ground', 
                           'predominant_space_use','accessibility_type', 'accessibility_type_description',
                           'accessibility_rating', 'longitude', 'latitude']]

building_df.head(5)

Unnamed: 0,building_name,street_address,clue_small_area,number_of_floors_above_ground,predominant_space_use,accessibility_type,accessibility_type_description,accessibility_rating,longitude,latitude
0,,374-376 Dryburgh Street NORTH MELBOURNE 3051,North Melbourne,2.0,House/Townhouse,Not determined or not applicable,Building is not considered to be publicly acce...,0.0,144.944839,-37.796874
1,,13-15 Donovans Lane NORTH MELBOURNE 3051,North Melbourne,2.0,House/Townhouse,Not determined or not applicable,Building is not considered to be publicly acce...,0.0,144.945039,-37.796889
2,,332-344 Dryburgh Street NORTH MELBOURNE 3051,North Melbourne,3.0,Residential Apartment,High level of accessibility,Main Entrance is at grade and has no steps or ...,3.0,144.944837,-37.797965
3,,388-396 Abbotsford Street NORTH MELBOURNE 3051,North Melbourne,3.0,Residential Apartment,High level of accessibility,Main Entrance has ramp,3.0,144.946825,-37.797481
4,,384 Abbotsford Street NORTH MELBOURNE 3051,North Melbourne,2.0,Residential Apartment,Moderate level of accessibility,Entrance(s) have limited access via a small li...,2.0,144.946825,-37.797481


In [28]:
building_df['accessibility_type_description'].unique()

array(['Building is not considered to be publicly accessible so access has not been rated',
       'Main Entrance is at grade and has no steps or ramp',
       'Main Entrance has ramp',
       'Entrance(s) have limited access via a small lip or a steep ramp',
       'All entrances have steps',
       'Configuration of entrance does not fit into any of the other categories',
       'Main entrance has steps; Alternative entrance is step free or has ramp',
       nan, 'Access has not been rated'], dtype=object)

# Creating Final Table Structure

In [29]:
import numpy as np

# Create a mapping dictionary
wheelchair_mapping = {
    'Building is not considered to be publicly accessible so access has not been rated': 'no',
    'Main Entrance is at grade and has no steps or ramp': 'yes',
    'Main Entrance has ramp': 'yes',
    'Entrance(s) have limited access via a small lip or a steep ramp': 'limited',
    'All entrances have steps': 'no',
    'Configuration of entrance does not fit into any of the other categories': 'no',
    'Main entrance has steps; Alternative entrance is step free or has ramp': 'limited',
    np.nan: 'no',
    'Access has not been rated': 'no'
}

# Mapping the accessibility_type_descriptions
building_df['wheelchair'] = building_df['accessibility_type_description'].map(wheelchair_mapping)

building_df

Unnamed: 0,building_name,street_address,clue_small_area,number_of_floors_above_ground,predominant_space_use,accessibility_type,accessibility_type_description,accessibility_rating,longitude,latitude,wheelchair
0,,374-376 Dryburgh Street NORTH MELBOURNE 3051,North Melbourne,2.0,House/Townhouse,Not determined or not applicable,Building is not considered to be publicly acce...,0.0,144.944839,-37.796874,no
1,,13-15 Donovans Lane NORTH MELBOURNE 3051,North Melbourne,2.0,House/Townhouse,Not determined or not applicable,Building is not considered to be publicly acce...,0.0,144.945039,-37.796889,no
2,,332-344 Dryburgh Street NORTH MELBOURNE 3051,North Melbourne,3.0,Residential Apartment,High level of accessibility,Main Entrance is at grade and has no steps or ...,3.0,144.944837,-37.797965,yes
3,,388-396 Abbotsford Street NORTH MELBOURNE 3051,North Melbourne,3.0,Residential Apartment,High level of accessibility,Main Entrance has ramp,3.0,144.946825,-37.797481,yes
4,,384 Abbotsford Street NORTH MELBOURNE 3051,North Melbourne,2.0,Residential Apartment,Moderate level of accessibility,Entrance(s) have limited access via a small li...,2.0,144.946825,-37.797481,limited
...,...,...,...,...,...,...,...,...,...,...,...
291458,,2 Percy Street KENSINGTON 3031,Kensington,1.0,House/Townhouse,,,,144.926420,-37.791434,no
291459,,21 Gordon Crescent KENSINGTON 3031,Kensington,1.0,House/Townhouse,,,,144.926508,-37.791041,no
291460,,17 Gordon Crescent KENSINGTON 3031,Kensington,1.0,House/Townhouse,,,,144.926585,-37.791123,no
291461,,15 Gordon Crescent KENSINGTON 3031,Kensington,1.0,House/Townhouse,,,,144.926617,-37.791236,no


In [30]:
# Create a mapping dictionary for alternate entrance
alternate_entrance_mapping = {
    'Building is not considered to be publicly accessible so access has not been rated': 'no',
    'Main Entrance is at grade and has no steps or ramp': 'no',
    'Main Entrance has ramp': 'no',
    'Entrance(s) have limited access via a small lip or a steep ramp': 'no',
    'All entrances have steps': 'no',
    'Configuration of entrance does not fit into any of the other categories': 'no',
    'Main entrance has steps; Alternative entrance is step free or has ramp': 'yes',
    np.nan: 'no',
    'Access has not been rated': 'no'
}

# Mapping the accessibility_type_descriptions
building_df['alternate_entrance'] = building_df['accessibility_type_description'].map(alternate_entrance_mapping)

building_df


Unnamed: 0,building_name,street_address,clue_small_area,number_of_floors_above_ground,predominant_space_use,accessibility_type,accessibility_type_description,accessibility_rating,longitude,latitude,wheelchair,alternate_entrance
0,,374-376 Dryburgh Street NORTH MELBOURNE 3051,North Melbourne,2.0,House/Townhouse,Not determined or not applicable,Building is not considered to be publicly acce...,0.0,144.944839,-37.796874,no,no
1,,13-15 Donovans Lane NORTH MELBOURNE 3051,North Melbourne,2.0,House/Townhouse,Not determined or not applicable,Building is not considered to be publicly acce...,0.0,144.945039,-37.796889,no,no
2,,332-344 Dryburgh Street NORTH MELBOURNE 3051,North Melbourne,3.0,Residential Apartment,High level of accessibility,Main Entrance is at grade and has no steps or ...,3.0,144.944837,-37.797965,yes,no
3,,388-396 Abbotsford Street NORTH MELBOURNE 3051,North Melbourne,3.0,Residential Apartment,High level of accessibility,Main Entrance has ramp,3.0,144.946825,-37.797481,yes,no
4,,384 Abbotsford Street NORTH MELBOURNE 3051,North Melbourne,2.0,Residential Apartment,Moderate level of accessibility,Entrance(s) have limited access via a small li...,2.0,144.946825,-37.797481,limited,no
...,...,...,...,...,...,...,...,...,...,...,...,...
291458,,2 Percy Street KENSINGTON 3031,Kensington,1.0,House/Townhouse,,,,144.926420,-37.791434,no,no
291459,,21 Gordon Crescent KENSINGTON 3031,Kensington,1.0,House/Townhouse,,,,144.926508,-37.791041,no,no
291460,,17 Gordon Crescent KENSINGTON 3031,Kensington,1.0,House/Townhouse,,,,144.926585,-37.791123,no,no
291461,,15 Gordon Crescent KENSINGTON 3031,Kensington,1.0,House/Townhouse,,,,144.926617,-37.791236,no,no


In [31]:
# Standardising to lowercase
building_df = building_df.map(lambda x: x.lower() if isinstance(x, str) else x)

building_df.head(5)

Unnamed: 0,building_name,street_address,clue_small_area,number_of_floors_above_ground,predominant_space_use,accessibility_type,accessibility_type_description,accessibility_rating,longitude,latitude,wheelchair,alternate_entrance
0,,374-376 dryburgh street north melbourne 3051,north melbourne,2.0,house/townhouse,not determined or not applicable,building is not considered to be publicly acce...,0.0,144.944839,-37.796874,no,no
1,,13-15 donovans lane north melbourne 3051,north melbourne,2.0,house/townhouse,not determined or not applicable,building is not considered to be publicly acce...,0.0,144.945039,-37.796889,no,no
2,,332-344 dryburgh street north melbourne 3051,north melbourne,3.0,residential apartment,high level of accessibility,main entrance is at grade and has no steps or ...,3.0,144.944837,-37.797965,yes,no
3,,388-396 abbotsford street north melbourne 3051,north melbourne,3.0,residential apartment,high level of accessibility,main entrance has ramp,3.0,144.946825,-37.797481,yes,no
4,,384 abbotsford street north melbourne 3051,north melbourne,2.0,residential apartment,moderate level of accessibility,entrance(s) have limited access via a small li...,2.0,144.946825,-37.797481,limited,no


In [36]:
import numpy as np

# Create a mapping dictionary for alternate entrance
category_mapping = {
    'not determined or not applicable': np.nan,
    'low level of accessibility': 'low',
    'moderate level of accessibility': 'moderate',
    'high level of accessibility': 'high',
}

# Mapping the accessibility_type_descriptions
building_df['accessibility_category'] = building_df['accessibility_type'].map(category_mapping)

building_df


Unnamed: 0,building_name,street_address,clue_small_area,number_of_floors_above_ground,predominant_space_use,accessibility_type,accessibility_type_description,accessibility_rating,longitude,latitude,wheelchair,alternate_entrance,accessibility_category
0,,374-376 dryburgh street north melbourne 3051,north melbourne,2.0,house/townhouse,not determined or not applicable,building is not considered to be publicly acce...,0.0,144.944839,-37.796874,no,no,
1,,13-15 donovans lane north melbourne 3051,north melbourne,2.0,house/townhouse,not determined or not applicable,building is not considered to be publicly acce...,0.0,144.945039,-37.796889,no,no,
2,,332-344 dryburgh street north melbourne 3051,north melbourne,3.0,residential apartment,high level of accessibility,main entrance is at grade and has no steps or ...,3.0,144.944837,-37.797965,yes,no,high
3,,388-396 abbotsford street north melbourne 3051,north melbourne,3.0,residential apartment,high level of accessibility,main entrance has ramp,3.0,144.946825,-37.797481,yes,no,high
4,,384 abbotsford street north melbourne 3051,north melbourne,2.0,residential apartment,moderate level of accessibility,entrance(s) have limited access via a small li...,2.0,144.946825,-37.797481,limited,no,moderate
...,...,...,...,...,...,...,...,...,...,...,...,...,...
291458,,2 percy street kensington 3031,kensington,1.0,house/townhouse,,,,144.926420,-37.791434,no,no,
291459,,21 gordon crescent kensington 3031,kensington,1.0,house/townhouse,,,,144.926508,-37.791041,no,no,
291460,,17 gordon crescent kensington 3031,kensington,1.0,house/townhouse,,,,144.926585,-37.791123,no,no,
291461,,15 gordon crescent kensington 3031,kensington,1.0,house/townhouse,,,,144.926617,-37.791236,no,no,


In [37]:
# Renaming columns
building_df = building_df.rename(columns={'longitude': 'Location_Lon',
                                          'latitude': 'Location_Lat',
                                          'clue_small_area': 'suburb'})

# Adding location type
building_df['Accessibility_Type_Name'] = 'buildings'

building_df.head(5)

Unnamed: 0,building_name,street_address,suburb,number_of_floors_above_ground,predominant_space_use,accessibility_type,accessibility_type_description,accessibility_rating,Location_Lon,Location_Lat,wheelchair,alternate_entrance,accessibility_category,Accessibility_Type_Name
0,,374-376 dryburgh street north melbourne 3051,north melbourne,2.0,house/townhouse,not determined or not applicable,building is not considered to be publicly acce...,0.0,144.944839,-37.796874,no,no,,buildings
1,,13-15 donovans lane north melbourne 3051,north melbourne,2.0,house/townhouse,not determined or not applicable,building is not considered to be publicly acce...,0.0,144.945039,-37.796889,no,no,,buildings
2,,332-344 dryburgh street north melbourne 3051,north melbourne,3.0,residential apartment,high level of accessibility,main entrance is at grade and has no steps or ...,3.0,144.944837,-37.797965,yes,no,high,buildings
3,,388-396 abbotsford street north melbourne 3051,north melbourne,3.0,residential apartment,high level of accessibility,main entrance has ramp,3.0,144.946825,-37.797481,yes,no,high,buildings
4,,384 abbotsford street north melbourne 3051,north melbourne,2.0,residential apartment,moderate level of accessibility,entrance(s) have limited access via a small li...,2.0,144.946825,-37.797481,limited,no,moderate,buildings


In [39]:
# Creating metadata column
metadata_cols = ['street_address', 'suburb', 'predominant_space_use']
building_df['Metadata'] = building_df[metadata_cols].apply(lambda row: row.to_dict(), axis=1)

# Creating tags column
tags_cols = ['building_name', 'accessibility_category', 'wheelchair',
             'alternate_entrance', 'accessibility_rating',
             'accessibility_type_description']
building_df['Tags'] = building_df[tags_cols].apply(lambda row: row.to_dict(), axis=1)

building_df = building_df[['Location_Lat', 'Location_Lon', 'Accessibility_Type_Name',
                           'Metadata', 'Tags']]

building_df

Unnamed: 0,Location_Lat,Location_Lon,Accessibility_Type_Name,Metadata,Tags
0,-37.796874,144.944839,buildings,{'street_address': '374-376 dryburgh street no...,"{'building_name': nan, 'accessibility_category..."
1,-37.796889,144.945039,buildings,{'street_address': '13-15 donovans lane north ...,"{'building_name': nan, 'accessibility_category..."
2,-37.797965,144.944837,buildings,{'street_address': '332-344 dryburgh street no...,"{'building_name': nan, 'accessibility_category..."
3,-37.797481,144.946825,buildings,{'street_address': '388-396 abbotsford street ...,"{'building_name': nan, 'accessibility_category..."
4,-37.797481,144.946825,buildings,{'street_address': '384 abbotsford street nort...,"{'building_name': nan, 'accessibility_category..."
...,...,...,...,...,...
291458,-37.791434,144.926420,buildings,{'street_address': '2 percy street kensington ...,"{'building_name': nan, 'accessibility_category..."
291459,-37.791041,144.926508,buildings,{'street_address': '21 gordon crescent kensing...,"{'building_name': nan, 'accessibility_category..."
291460,-37.791123,144.926585,buildings,{'street_address': '17 gordon crescent kensing...,"{'building_name': nan, 'accessibility_category..."
291461,-37.791236,144.926617,buildings,{'street_address': '15 gordon crescent kensing...,"{'building_name': nan, 'accessibility_category..."


In [40]:
# Parse Metadata and Tags into Dictionaries
def safe_parse(s):
    """Safely convert string or NaN to a dictionary."""
    if isinstance(s, dict):
        return s
    if pd.isna(s) or str(s).strip() in ('', 'nan', 'None', '{}'):
        return {}
    try:
        s = str(s).replace("'", '"')  # Fix single quotes for JSON
        return json.loads(s)
    except json.JSONDecodeError:
        return {}
    
# Apply parsing to both DataFrames
for col in ['Metadata', 'Tags']:
    building_df[col] = building_df[col].apply(safe_parse)

building_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  building_df[col] = building_df[col].apply(safe_parse)


Unnamed: 0,Location_Lat,Location_Lon,Accessibility_Type_Name,Metadata,Tags
0,-37.796874,144.944839,buildings,{'street_address': '374-376 dryburgh street no...,"{'building_name': nan, 'accessibility_category..."
1,-37.796889,144.945039,buildings,{'street_address': '13-15 donovans lane north ...,"{'building_name': nan, 'accessibility_category..."
2,-37.797965,144.944837,buildings,{'street_address': '332-344 dryburgh street no...,"{'building_name': nan, 'accessibility_category..."
3,-37.797481,144.946825,buildings,{'street_address': '388-396 abbotsford street ...,"{'building_name': nan, 'accessibility_category..."
4,-37.797481,144.946825,buildings,{'street_address': '384 abbotsford street nort...,"{'building_name': nan, 'accessibility_category..."


In [41]:
building_df.to_json('final_buildings.json', orient='records', indent=2)