## Loading the housing transaction data

In [38]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from typing import Any

In [2]:
data = pd.read_parquet("data/HMA.parquet")
data["type"] = data["type"].astype(str).replace({
    "kt": "Kerrostalo",
    "rt": "Rivitalo",
    "ok": "Omakotitalo"
})

data["elevator"] = data["elevator"].astype(str).replace({
    "on": True,
    "ei": False,
})

data["plot_ownership"] = data["plot_ownership"].astype(str).replace({
    "oma": "Owned",
    "vuokra": "Leased",
})


  data["elevator"] = data["elevator"].astype(str).replace({


In [10]:
df = data["type"].value_counts().reset_index()
fig = px.pie(
    data_frame=df,
    values="count",
    names="type",
    title="Building type",
    hole=0.3,
    color="type",
    color_discrete_map={
        "Kerrostalo": "#264653",
        "Rivitalo": "#e9c46a",
        "Omakotitalo": "#e76f51",
    },
)
fig.show()

In [117]:
fig = px.scatter(
    data_frame=data,
    x="area_m2",
    y="price_per_m2_e",
    template="simple_white",
    log_x=True,
    color="type",
    color_discrete_map={
        "Kerrostalo": "#e9c46a",
        "Rivitalo": "#e76f51",
        "Omakotitalo": "#264653",
    },
    hover_data={
        "price_per_m2_e":":.0f",
        "area_m2": ":.0f",
        "type": False
        },
    hover_name="type",
    labels=dict(price_per_m2_e="Price per sqm (€)", area_m2="Area (m<sup>2</sup>)", type="Building type")
)
fig.update_traces(hovertemplate='Area: %{x} m<sup>2</sup><br>Price Per sqm:%{y}€') #
fig.update_traces(hovertemplate=None, selector={'name':'Europe'}) 
fig.update_xaxes(showgrid=True)
fig.update_yaxes(showgrid=True)
fig.show()

In [96]:
df = data["plot_ownership"].value_counts().reset_index()
fig = go.Figure(go.Pie(
    name = "",
    values = df["count"],
    labels = df["plot_ownership"],
    text = df["plot_ownership"],
    hovertemplate = "%{label}: <br>Count: %{count}",
    marker={
        "colors":["#264653","#e76f51"]
    },
    title="Plot Ownership"
))
fig.update_layout(showlegend=False)
fig.show()

In [97]:
df = data["elevator"].replace({
    True: "Has Elevator",
    False:"No Elevator"
    }).value_counts().reset_index()
fig = go.Figure(go.Pie(
    name = "",
    values = df["count"],
    labels = df["elevator"],
    text = df["elevator"],
    hovertemplate = "%{label}: <br>Count: %{count}",
    marker={
        "colors":["#264653","#e76f51"]
    },
    title="Elevator"
))
fig.update_layout(showlegend=False)
fig.show()

In [98]:
data

Unnamed: 0,region,rooms,type,area_m2,price_e,price_per_m2_e,construction_year,floor,elevator,quality,plot_ownership,energy_grade,municipality
0,Ruoholahti,1h+kk,Kerrostalo,2300.0,170000.0,7391.0,1926,3/5,True,tyyd.,Owned,F2018,Helsinki
1,Taka-töölö,"1h,kk",Kerrostalo,2400.0,155000.0,6458.0,1940,2/3,False,huono,Owned,E2018,Helsinki
2,Taka-töölö,"1h, kk, kph",Kerrostalo,2700.0,169000.0,6259.0,1937,4/7,True,tyyd.,Owned,D2018,Helsinki
3,Munkkivuori,1h+kk,Kerrostalo,2800.0,164000.0,5857.0,1957,2/3,False,tyyd.,Owned,E2018,Helsinki
4,"Laajasalo, yliskylä",1h+kk+kph,Kerrostalo,2950.0,128200.0,4346.0,1974,2/8,True,tyyd.,Owned,E2018,Helsinki
...,...,...,...,...,...,...,...,...,...,...,...,...,...
762,Asola,"4h, k, wc, kph,...",Kerrostalo,8550.0,253800.0,2968.0,2007,2/5,True,hyvä,Owned,D2007,Vantaa
763,Kuninkaanmäki,"Oh,k ja ruokati...",Omakotitalo,23600.0,620000.0,2627.0,2016,2/2,False,hyvä,Owned,C2013,Vantaa
765,Havukoski,4h+k+kh+s+ter.,Rivitalo,9900.0,241000.0,2434.0,1979,1/2,False,hyvä,Owned,B2018,Vantaa
766,Tikkurila,4h+k+s,Kerrostalo,8700.0,223000.0,2563.0,1988,3/3,False,tyyd.,Owned,D2013,Vantaa


In [166]:
df = data.energy_grade.value_counts().reset_index()
fig = px.bar(
    data_frame=df,
    x="energy_grade",
    y="count",
    template="simple_white",
    hover_data={
        "energy_grade":False,
        "count": ":.0f",
        },
    hover_name="energy_grade",
    labels=dict(energy_grade="Energy Grade"),
)
fig.update_traces(marker_color='#e9c46a')
fig.update_xaxes(showgrid=True)
fig.show()

In [163]:
df = data.construction_year.round(-1).value_counts().sort_index().reset_index()
fig = px.bar(
    data_frame=df,
    y="count",
    x="construction_year",
    template="simple_white",
    hover_data={
        "construction_year":False,
        "count": ":.0f",
        },
    hover_name="construction_year",
    labels=dict(construction_year="Construction decade"),
)
fig.update_traces(marker_color='#e9c46a')
fig.update_xaxes(showgrid=True)
fig.show()

In [164]:
df

Unnamed: 0,construction_year,count
0,1850,3
1,1870,2
2,1880,1
3,1890,9
4,1900,19
5,1910,74
6,1920,48
7,1930,197
8,1940,248
9,1950,173


In [155]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54 entries, 0 to 53
Data columns (total 3 columns):
 #   Column             Non-Null Count  Dtype   
---  ------             --------------  -----   
 0   type               54 non-null     object  
 1   construction_year  54 non-null     category
 2   value              54 non-null     int64   
dtypes: category(1), int64(1), object(1)
memory usage: 1.7+ KB


In [159]:
df = pd.crosstab(
    index=data.type,
    columns=data.construction_year.round(decimals=-1),
).reset_index().melt(id_vars="type")
df["construction_year"] = df["construction_year"].astype("str")
fig = px.histogram(
    data_frame=df,
    y="value",
    x="construction_year",
    color="type",
    template="simple_white",
    barmode='group',
    color_discrete_map={
        "Kerrostalo": "#e9c46a",
        "Rivitalo": "#e76f51",
        "Omakotitalo": "#264653",
    },
    hover_name="type",
    labels=dict(construction_year="Construction Decade", value="Count", type="Building type")
)
fig.update_xaxes(showgrid=True)
fig.show()