# Suburb Convert to Postcode

In [1]:
import pandas as pd
import requests
import json

## Fetches Postcode Data and Saves as a JSON File.

In [4]:
BASE_URL = 'http://v0.postcodeapi.com.au/suburbs/{}.json'
HEADERS = {
    'Accept': 'application/json; indent=4'
}

all_data = []

# Fetch data for postcodes from 3000 to 4000
for postcode in range(3000, 4001):
    response = requests.get(BASE_URL.format(postcode), headers=HEADERS)
    
    if response.status_code == 200:
        data = response.json()
        all_data.extend(data)
    else:
        print(f"Failed to fetch data for postcode: {postcode}")

# Save the collected data as a JSON file
with open('../data/landing/postcodes_3000_to_4000.json', 'w', encoding='utf-8') as file:
    json.dump(all_data, file, ensure_ascii=False, indent=4)

In [5]:
df = pd.read_json('../data/landing/postcodes_3000_to_4000.json')

## Processes historical rental price data
Extracting suburb names, and saving them in a separate CSV file for later use.

In [7]:
# Read data from a historical rental price dataset
df_hist = pd.read_csv('../data/raw/historical_rental_price_by_suburb.csv')

# Extract the 'Name' column and save it as a separate CSV file
df_hist['Name'].to_csv('../data/raw/suburb_postcode.csv', index=False)

# Read the newly created CSV file containing suburb names and postcodes
df_hist = pd.read_csv('../data/raw/suburb_postcode.csv')

## Split "Name" Column and Expand it into Separate Rows.

In [8]:
def expand_names_by_dash(df: pd.DataFrame) -> pd.DataFrame:
    """
    Splits the "Name" column of a DataFrame by "-" and expands into separate rows.
    
    Parameters:
    df: Input dataframe with "Name" and "merge_key" columns.
    
    Returns:
    pd.DataFrame: Expanded dataframe.
    """
    
    rows_list = []
    for index, row in df.iterrows():
        if '-' in row['Name']:
            for name in row['Name'].split('-'):
                new_row = {'Name': name}
                rows_list.append(new_row)
        else:
            rows_list.append(row.to_dict())

    return pd.DataFrame(rows_list)


In [9]:
# Expand the "Name" column by splitting on dashes and removing rows with "Name" equal to 'Group Total'
df_hist = expand_names_by_dash(df_hist)
df_hist = df_hist[df_hist['Name'] != 'Group Total']
df_hist.head()

Unnamed: 0,Name
0,Albert Park
1,Middle Park
2,West St Kilda
3,Armadale
4,Carlton North


## Rename "Name" Column According to the name_changes Dictionary.

In [10]:
# Rename specified values in the "Name" column of the DataFrame based on the name_changes dictionary
name_changes = {
    'St Kilda Rd': 'St Kilda Road Central',
    'East St Kilda': 'St Kilda East',
    'West St Kilda': 'St Kilda West',
    'East Hawthorn': 'Hawthorn East',
    'East Brunswick': 'Brunswick East',
    'West Brunswick': 'Brunswick West',
    'Yarra Ranges': 'Yarra Junction',
    'Mt Eliza': 'Mount Eliza',
    'Wanagaratta': 'Wangaratta South',
    'CBD': 'Melbourne',
    'Mt Martha': 'Mount Martha',
    'Bendigo East': 'East Bendigo',
    'Newcombe': 'Newcomb'
}

df_hist['Name'] = df_hist['Name'].replace(name_changes)
df_hist

Unnamed: 0,Name
0,Albert Park
1,Middle Park
2,St Kilda West
3,Armadale
4,Carlton North
...,...
222,Traralgon
223,Wangaratta South
224,Warragul
225,Warrnambool


## Merge the DataFrames and Select Specific Columns

In [11]:
# Merge the df_hist and df DataFrames using a left join on the "Name" and "name" columns
df_merge = df_hist.merge(df, left_on='Name', right_on='name', how='left')

# Select specific columns from the merged DataFrame
df_merge = df_merge[['Name', 'name', 'postcode', 'latitude', 'longitude']]

df_merge.head()

Unnamed: 0,Name,name,postcode,latitude,longitude
0,Albert Park,Albert Park,3206,-37.8445,144.953
1,Middle Park,Middle Park,3206,-37.8512,144.9621
2,St Kilda West,St Kilda West,3182,-37.8579,144.9715
3,Armadale,Armadale,3143,-37.8576,145.0202
4,Carlton North,Carlton North,3054,-37.7847,144.9632


## Reverse the Name Changes Made Earlier.

In [12]:
# Create a dictionary to reverse the name changes applied earlier
reversed_name_changes = {v: k for k, v in name_changes.items()}

# Reverse the modified values in the "Name" column of the df_merge DataFrame back to their original values
df_merge['Name'] = df_merge['Name'].replace(reversed_name_changes)

df_merge.head()

Unnamed: 0,Name,name,postcode,latitude,longitude
0,Albert Park,Albert Park,3206,-37.8445,144.953
1,Middle Park,Middle Park,3206,-37.8512,144.9621
2,West St Kilda,St Kilda West,3182,-37.8579,144.9715
3,Armadale,Armadale,3143,-37.8576,145.0202
4,Carlton North,Carlton North,3054,-37.7847,144.9632


In [13]:
df_merge.to_csv('../data/raw/suburb_postcode.csv', index=False)