In [None]:
import pandas as pd
import numpy as np
import plotly
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import matplotlib.pyplot as plt

In [None]:
hdb_data = pd.read_csv("ResaleflatpricesbasedonregistrationdatefromJan2017onwards.csv")

In [None]:
hdb_data.head()

In [None]:
#get coordinates and plot on map
#psf in each town

In [None]:
hdb_data.town.unique()

In [None]:
hdb_data['floor_area_sqft'] = hdb_data["floor_area_sqm"] * 10.764
hdb_data['resale_price_psf'] = hdb_data["resale_price"] / hdb_data["floor_area_sqft"] 

In [None]:
hdb_data.describe()

In [None]:
hdb_data.columns

In [None]:
fig = px.scatter(hdb_data, 
                 x="month", 
                 y="resale_price_psf", 
                 color="town",
                 labels={"resale_price": "Resale Price", "floor_area_sqft": "Floor Area (sqft)"}) # Optional, for better axis labels

fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))


# Show the plot
fig.show()


In [None]:
fig = px.scatter(hdb_data, 
                 x="flat_type", 
                 y="resale_price_psf", 
                 color="town")

fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))


# Show the plot
fig.show()


In [None]:
hdb_data[hdb_data.remaining_lease > "80 years"]

In [None]:
hdb_data[hdb_data.town == "SERANGOON"]

In [None]:
hdb_data[hdb_data.remaining_lease > "97 years"]

### Function to bucket remaining lease column

In [None]:
def remaining_lease_bucket(row):
    #Split the year out only 
    remaining_year = int(row.split()[0])
    
    for start in range(40,100,5):
        end = start + 5
        if start <  remaining_year <= end:
            return f"{start} to {end} years"

In [None]:
hdb_data["remaining_year_bucket"] = hdb_data.remaining_lease.apply(remaining_lease_bucket)

In [None]:
hdb_data

In [None]:
fig = px.scatter(hdb_data, 
                 x="remaining_year_bucket", 
                 y="resale_price_psf", 
                 color="town")

fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_xaxes(categoryorder='category descending')


# Show the plot
fig.show()


In [None]:
hdb_data.describe()

In [None]:
fig = px.box(hdb_data, 
                 x="remaining_year_bucket", 
                 y="resale_price_psf", 
                 color="town")

fig.update_layout(legend=dict(
    orientation="h",
    yanchor="bottom",
    y=1.02,
    xanchor="right",
    x=1
))

fig.update_xaxes(categoryorder='category descending')
fig.update_layout(yaxis_range=[-100,1500])


# Show the plot
fig.show()


### Function for Resale Price Psf over time per town per street name (interactive)

> Box and whiskers plot/MinMax

In [None]:
def resale_psf_over_time(town):

    unique_street_names = hdb_data[hdb_data.town == town].street_name.unique()
    unique_town_data = hdb_data[hdb_data.town == town]
    
    fig = px.box(unique_town_data, x='month', y='resale_price_psf', color='street_name')
    fig.update_layout(title_text=f"Resale Price over time of {town}")

    fig.show()

In [None]:
# Loop over every town, takes time to load
for town in hdb_data.town.unique():
    resale_psf_over_time(town)

### Function for Resale Price PSF over flat_type, block, storey_range, flat_model, remaining_year_bucket, month (static)

In [None]:
# flat_type, block, storey_range, flat_model, remaining_year_bucket, month

def multi_comparison_plots(town):

    # town = "SERANGOON"
    unique_town_data = hdb_data[hdb_data.town == town]
    unique_street_names = hdb_data[hdb_data.town == town].street_name.unique()

    selected_columns = ["flat_type", "block", "storey_range", "flat_model", "remaining_year_bucket", "month"]
    
    for u_street_name in unique_street_names:
        
        unique_street_data = unique_town_data[unique_town_data.street_name == u_street_name]
        
        fig, axs = plt.subplots(3, 2, figsize=(12,10))
        
        y = "resale_price_psf"
        
        for col, ax in zip(selected_columns, axs.ravel()):
                ax.set_title(f"{town}-{u_street_name}")
                ax.scatter(unique_street_data[col],unique_street_data["resale_price_psf"],s=5, alpha=0.5)
                ax.set_xlabel(col)
                ax.set_ylabel(y)
                ax.grid(True)
                ax.tick_params(axis="x", rotation=90, labelsize=5)
        
        plt.title("town")
        plt.tight_layout()  
        plt.show()

multi_comparison_plots('SERANGOON')

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def multi_comparison_plots(town):
    # Filter data for the selected town
    unique_town_data = hdb_data[hdb_data.town == town]
    unique_street_names = hdb_data[hdb_data.town == town].street_name.unique()

    selected_columns = ["flat_type", "block", "storey_range", "flat_model", "remaining_year_bucket", "month"]

    # Loop over each unique street name
    for u_street_name in unique_street_names:
        # Filter data for the current street
        unique_street_data = unique_town_data[unique_town_data.street_name == u_street_name]
        
        # Create a 3x2 grid for subplots
        fig = make_subplots(
            rows=3, cols=2, 
            shared_yaxes=True, 
            vertical_spacing=0.1, 
            subplot_titles=selected_columns
        )

        y = "resale_price_psf"

        # Loop over the selected columns and add scatter plots
        for i, col in enumerate(selected_columns):
            row = i // 2 + 1
            col_num = i % 2 + 1
            fig.add_trace(
                go.Scatter(
                    x=unique_street_data[col],
                    y=unique_street_data[y],
                    mode='markers',
                    marker=dict(size=5, opacity=0.5),
                ), row=row, col=col_num
            )

        # Update layout with titles and labels
        fig.update_layout(
            height=1200, 
            width=1000,
            title_text=f"{town} - {u_street_name}",
            showlegend=False,
            xaxis_tickangle=90,
        )

        # Show the plot
        fig.show()

# Call the function using TOWN (printed for reference)
print(hdb_data.town.unique())
multi_comparison_plots('SERANGOON')


### Median Prices in Each Town & Each Street (interactive)

In [None]:
def median_price_line(town):
    
    median_hdb_data = hdb_data.groupby(["town","street_name","month"])["resale_price_psf"].median().reset_index()
    
    town_median_hdb_data = median_hdb_data[median_hdb_data["town"]==town]
    
    fig = px.line(town_median_hdb_data, 
                  x="month", 
                  y="resale_price_psf", 
                  color="street_name",  
                  line_group="street_name",  
                  labels={'resale_price_psf': 'Median Resale Price PSF', 'month': 'Month'})
    
    # Customize the legend to display horizontally and adjust its position
    fig.update_layout(legend=dict(
        orientation="h",  
        yanchor="bottom", 
        y=1.02,  
        xanchor="right",  
        x=1 
    ))
    
    # Show the plot
    fig.show()

hdb_data.town.unique()

In [None]:
for town in hdb_data.town.unique():

    median_price_line(town)