# Development Activity Monitor dataset including API

In [6]:
import requests
import pandas as pd
import os
import numpy as np

## API function

In [2]:
def fetch_data(base_url, dataset, api_key, num_records, max_offset, offset=0):
    all_records =[]
    #Maximum number of requests

    while True:
        # Maximum limit check
        if offset > max_offset:
            break
        
        # Create API request URL
        filters = f'{dataset}/records?&limit={num_records}&offset={offset}'
        url = f'{base_url}{filters}&api_key={api_key}'
        
        # Start request
        try:
            result = requests.get(url, timeout=10)
            result.raise_for_status()
            records = result.json().get('results')
        except requests.exceptions.RequestException as e:
            raise Exception(f'API request failed: {e}')
        
        if records is None:
            break
        
        all_records.extend(records)
        if len(records) < num_records:
            break
        
        # next cycle offset
        offset += num_records
    
    # Dataframe all data
    df = pd.DataFrame(all_records)
    return df



## Creating the DataSet .
#### - API key is obtained from the MOP
#### - API for dataset is displayed below:
https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/development-activity-monitor/records?limit=20
#### - Fetching the dataset
#### - Display the dataset

In [3]:
# API decontructed below:
API_KEY = 'd503386bd7565cee5ef152d9dec187036f46a47236cb9ffff66a05b6'
BASE_URL = "https://data.melbourne.vic.gov.au/api/explore/v2.1/catalog/datasets/"
DATASET = 'development-activity-monitor'
NUM_RECORD = 20
MAX_OFFSET = 1406

In [4]:
# Fetching the Dataset
df = fetch_data(BASE_URL, DATASET, API_KEY, NUM_RECORD, MAX_OFFSET)

In [5]:
# Displaying the Dataset
df.head(2)

Unnamed: 0,data_format,development_key,status,year_completed,clue_small_area,clue_block,street_address,property_id,property_id_2,property_id_3,...,hospital_flr,recreation_flr,publicdispaly_flr,community_flr,car_spaces,bike_spaces,town_planning_application,longitude,latitude,geopoint
0,Pre May 16,X000479,COMPLETED,2006,North Melbourne,342,191-201 Abbotsford Street NORTH MELBOURNE VIC ...,100023,,,...,0,0,0,0,0,0,0,144.94503,-37.802822,"{'lon': 144.9450298, 'lat': -37.80282184}"
1,Pre May 16,X000459,COMPLETED,2005,North Melbourne,333,218-224 Abbotsford Street NORTH MELBOURNE VIC ...,100119,,,...,0,0,0,0,0,0,0,144.945947,-37.802049,"{'lon': 144.9459475, 'lat': -37.80204879}"


## Steps for cleaning the dataset:
#### - Check for Null values
#### - General information such as Shape and description
#### - Checking for Duplicates

In [7]:
# Checking for Null values
df.isna().sum()

data_format                       0
development_key                   0
status                            0
year_completed                  377
clue_small_area                   0
clue_block                        0
street_address                    0
property_id                       0
property_id_2                  1239
property_id_3                  1367
property_id_4                  1393
property_id_5                  1404
floors_above                      0
resi_dwellings                    0
studio_dwe                        0
one_bdrm_dwe                      0
two_bdrm_dwe                      0
three_bdrm_dwe                    0
student_apartments                0
student_beds                      0
student_accommodation_units       0
institutional_accom_beds          0
hotel_rooms                       0
serviced_apartments               0
hotels_serviced_apartments        0
hostel_beds                       0
childcare_places                  0
office_flr                  

In [8]:
# Checking the Shape of the DataSet
df.shape

(1407, 42)

In [9]:
# Describe
df.describe()

Unnamed: 0,clue_block,floors_above,resi_dwellings,studio_dwe,one_bdrm_dwe,two_bdrm_dwe,three_bdrm_dwe,student_apartments,student_beds,student_accommodation_units,...,storage_flr,education_flr,hospital_flr,recreation_flr,publicdispaly_flr,community_flr,car_spaces,bike_spaces,longitude,latitude
count,1407.0,1407.0,1407.0,1407.0,1407.0,1407.0,1407.0,1407.0,1407.0,1407.0,...,1407.0,1407.0,1407.0,1407.0,1407.0,1407.0,1407.0,1407.0,1407.0,1407.0
mean,551.705046,13.852168,78.78678,1.314854,23.115849,29.083866,4.879886,12.68941,13.662402,17.790334,...,71.891258,260.282161,179.448472,121.103056,15.769012,19.739161,49.086709,51.73774,144.952397,-37.810723
std,507.43941,15.688088,156.933143,10.708723,63.554807,75.944719,15.724167,72.064789,89.849474,96.187654,...,724.76088,2203.75906,3719.272921,1690.7678,435.004756,226.245831,115.252495,129.339036,0.01595,0.012219
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,144.901272,-37.849229
25%,114.5,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,144.941898,-37.820009
50%,430.0,8.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,144.954708,-37.81106
75%,790.0,19.0,82.5,0.0,3.0,9.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,41.0,35.0,144.962986,-37.802463
max,2547.0,100.0,1139.0,181.0,537.0,627.0,185.0,783.0,909.0,909.0,...,14000.0,40832.0,100000.0,58935.0,16073.0,5332.0,1275.0,2068.0,144.990561,-37.776195


In [10]:
# Checking for Duplicates
bool_list = df['development_key'].isna()
df[bool_list].loc[:,['development_key']].duplicated().sum()

0

In [11]:
# Checking for 'year_completed'
bool_list_2 = df['year_completed'].isna()
df[bool_list_2].shape

(377, 42)

In [12]:
df[bool_list_2]

Unnamed: 0,data_format,development_key,status,year_completed,clue_small_area,clue_block,street_address,property_id,property_id_2,property_id_3,...,hospital_flr,recreation_flr,publicdispaly_flr,community_flr,car_spaces,bike_spaces,town_planning_application,longitude,latitude,geopoint
348,Post May 16,X0004409,APPLIED,,West Melbourne (Industrial),502,207 Kensington Road WEST MELBOURNE VIC 3003,105226,105227,,...,0,0,0,0,422,146,TP-2018-512,144.916507,-37.800872,"{'lon': 144.9165074, 'lat': -37.80087212}"
349,Post May 16,X0005801,APPLIED,,Parkville,928,11-49 Galada Avenue PARKVILLE VIC 3052,653559,612700,,...,0,0,0,0,174,148,TPM-2016-18/A,144.940200,-37.781500,"{'lon': 144.9402, 'lat': -37.7815}"
350,Post May 16,X0007401,APPLIED,,Carlton,206,"225-227 Lygon Street, Carlton, 3053",106120,,,...,0,0,0,0,0,10,TP-2019-524,144.966741,-37.800046,"{'lon': 144.966741, 'lat': -37.80004561}"
351,Post May 16,X0011803,APPLIED,,Melbourne (CBD),11,"566-580 Flinders Street, Melbourne, 3000",103984,,,...,0,0,0,0,0,20,TP-2021-704,144.955336,-37.820711,"{'lon': 144.9553364, 'lat': -37.82071126}"
352,Post May 16,X0012600,APPLIED,,East Melbourne,613,"204-208 Albert Street, East Melbourne 3002",100363,,,...,0,0,0,0,27,42,TP-2022-23,144.984330,-37.810136,"{'lon': 144.9843298, 'lat': -37.81013567}"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1402,Post May 16,X0011801,UNDER CONSTRUCTION,,Carlton,245,86-94 Pelham Street CARLTON VIC 3053,107554,,,...,0,0,0,0,56,137,TP-2015-742/C,144.964800,-37.802300,"{'lon': 144.9648, 'lat': -37.8023}"
1403,Post May 16,X0009410,UNDER CONSTRUCTION,,East Melbourne,620,"250-260 Albert Street, East Melbourne, 3002",100361,,,...,0,0,0,0,35,25,TP-2020-574,144.982577,-37.809940,"{'lon': 144.9825767, 'lat': -37.80993999}"
1404,Post Oct 22,X0014017,UNDER CONSTRUCTION,,Southbank,807,84-90 Queens Bridge Street SOUTHBANK VIC 3006,108056,110440,,...,0,0,0,0,111,153,TPM-2013-30/D,144.959900,-37.825000,"{'lon': 144.9599, 'lat': -37.825}"
1405,Post Oct 22,X0013802,UNDER CONSTRUCTION,,East Melbourne,621,364-370 Albert Street EAST MELBOURNE VIC 3002,100355,,,...,0,0,0,0,58,39,TP-2019-835/B,144.978950,-37.809569,"{'lon': 144.9789497, 'lat': -37.80956912}"


## Conclusion:
### - Null Values:
        - Although there are Null values in the 'year_completed' as the build is not completed.
        - This feature do not need further work.
### - Shape:
        - The dataset has 1406 rows.
### - Duplicates:
        - No duplicates in dataset.

## Selecting features for the new dataset

In [17]:
df.head(2)

Unnamed: 0,data_format,development_key,status,year_completed,clue_small_area,clue_block,street_address,property_id,property_id_2,property_id_3,...,hospital_flr,recreation_flr,publicdispaly_flr,community_flr,car_spaces,bike_spaces,town_planning_application,longitude,latitude,geopoint
0,Pre May 16,X000479,COMPLETED,2006,North Melbourne,342,191-201 Abbotsford Street NORTH MELBOURNE VIC ...,100023,,,...,0,0,0,0,0,0,0,144.94503,-37.802822,"{'lon': 144.9450298, 'lat': -37.80282184}"
1,Pre May 16,X000459,COMPLETED,2005,North Melbourne,333,218-224 Abbotsford Street NORTH MELBOURNE VIC ...,100119,,,...,0,0,0,0,0,0,0,144.945947,-37.802049,"{'lon': 144.9459475, 'lat': -37.80204879}"


In [13]:
# Selecting columns as per index
data = df.iloc[:,[0,2,3,4,13,14,15,16,17,18]]

In [14]:
data.head(6)

Unnamed: 0,data_format,status,year_completed,clue_small_area,resi_dwellings,studio_dwe,one_bdrm_dwe,two_bdrm_dwe,three_bdrm_dwe,student_apartments
0,Pre May 16,COMPLETED,2006,North Melbourne,17,0,0,0,0,0
1,Pre May 16,COMPLETED,2005,North Melbourne,12,0,0,0,0,0
2,Pre May 16,COMPLETED,2013,West Melbourne (Residential),3,0,0,0,3,0
3,Pre May 16,COMPLETED,2014,West Melbourne (Residential),28,0,17,11,0,0
4,Pre May 16,COMPLETED,2007,North Melbourne,0,0,0,0,0,0
5,Pre May 16,COMPLETED,2002,Carlton,14,0,0,0,0,0


In [15]:
# Checking out the new dataset
data.loc[:,['status','clue_small_area','resi_dwellings','studio_dwe','one_bdrm_dwe','two_bdrm_dwe','three_bdrm_dwe','student_apartments']].groupby(['status','clue_small_area']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,resi_dwellings,studio_dwe,one_bdrm_dwe,two_bdrm_dwe,three_bdrm_dwe,student_apartments
status,clue_small_area,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
APPLIED,Carlton,87,3,26,29,29,206
APPLIED,Docklands,1424,229,528,576,91,0
APPLIED,East Melbourne,0,0,0,0,0,0
APPLIED,Kensington,397,33,87,219,58,0
APPLIED,Melbourne (CBD),821,0,370,405,46,436
APPLIED,Melbourne (Remainder),83,0,5,24,54,0
APPLIED,North Melbourne,1419,312,521,495,91,0
APPLIED,Parkville,125,0,44,74,7,0
APPLIED,Port Melbourne,0,0,0,0,0,0
APPLIED,South Yarra,16,0,0,5,11,0


In [16]:
# Saving data to be used offline
data.to_csv('data_dwelling.csv')