In [1]:
import pandas as pd

df = pd.read_csv("final_cleaned_data.csv")

# converting float to int
df = df.apply(lambda x: x.astype("Int64") if x.dtype == float and (x.dropna() % 1 == 0).all() else x)

# converting objects to strings
df['property_ID'] = df['property_ID'].astype('string')
df['locality_name'] = df['locality_name'].astype('string')
df['type'] = df['type'].astype('string')
df['subtype'] = df['subtype'].astype('string')
df['state_of_building'] = df['state_of_building'].astype('string')

# removing properties that do not have the price
df = df.dropna(subset=["price (€)"])
#display(df.dtypes)
df.to_csv("cleaned_data_int.csv", index=False)
display(df.dtypes)

property_ID                      string[python]
locality_name                    string[python]
postal_code                               Int64
type                             string[python]
subtype                          string[python]
price (€)                                 Int64
number_of_bedrooms                        Int64
living_area (m²)                          Int64
equiped_kitchen (yes:1, no:0)             int64
furnished (yes:1, no:0)                   int64
open_fire (yes:1, no:0)                   int64
terrace (yes:1, no:0)                     int64
terrace_area (m²)                         Int64
garden (yes:1, no:0)                      int64
number_facades                            Int64
swimming_pool (yes:1, no:0)               int64
state_of_building                string[python]
dtype: object

In [2]:
import plotly.express as px
# ensures that postal_code is treated as categorical data

df['postal_code'] = df['postal_code'].astype(str)

# computes avearge price per postal code
avg_price_by_location = df.groupby("postal_code", as_index=False)["price (€)"].mean().round(2)

fig = px.scatter(avg_price_by_location, x="postal_code", y="price (€)", title="Average Price by Postal Code", labels={"postal_code": "Postal Code", "price (€)": "Average Price (€)"})

fig.show()

print(avg_price_by_location.head())

  postal_code  price (€)
0        1000  534727.16
1        1020  407177.78
2        1030  480358.67
3        1040   552675.0
4        1050  774616.82


In [3]:
import plotly.express as px

# correlate the price with number_of_bedrooms
avg_price_by_bedrooms = df.groupby("number_of_bedrooms", as_index=False)["price (€)"].mean()

# trendline="ols" adds a linear regression line, had to do this:' pip install statsmodels'
fig = px.scatter(avg_price_by_bedrooms, x="number_of_bedrooms", y="price (€)", trendline = "ols", title="Price correlation based on number of bedrooms", labels={"number_of_bedrooms": "Number of Bedrooms", "price (€)": "Price (€)"})

fig.show()

print(avg_price_by_bedrooms.head())

   number_of_bedrooms      price (€)
0                   0   209335.72973
1                   1   237236.24837
2                   2  316360.700075
3                   3  406625.487518
4                   4  499243.585197


In [4]:
import plotly.express as px

# correlate the price with living area (m²)
avg_price_by_living_area = df.groupby("living_area (m²)", as_index=False)["price (€)"].mean()

fig = px.scatter(avg_price_by_living_area, x="living_area (m²)", y="price (€)", title="Price correlation based on living area (m²)", labels={"living_area (m²)": "Living area (m²)", "price (€)": "Price (€)"})

fig.show()

print(avg_price_by_living_area.head())

   living_area (m²)      price (€)
0                 1  337195.833333
1                12       156000.0
2                13       156000.0
3                14       156000.0
4                15       232500.0


In [5]:
import plotly.express as px

# correlate the price with swimming_pool (yes:1, no:0)
avg_price_by_swimming_pool = df.groupby("swimming_pool (yes:1, no:0)", as_index=False)["price (€)"].mean()

fig = px.bar(avg_price_by_swimming_pool, x="swimming_pool (yes:1, no:0)", y="price (€)", text="price (€)", title="Price correlation based on swimming pool (yes:1, no:0)", labels={"swimming_pool (yes:1, no:0)": "Swimming pool (yes:1, no:0)", "price (€)": "Price (€)"})

fig.update_traces(texttemplate="€%{text:,.0f}", textposition="outside")

fig.show()

print(avg_price_by_swimming_pool.head())

   swimming_pool (yes:1, no:0)      price (€)
0                            0  372860.071147
1                            1  802791.007833


In [6]:
import plotly.express as px

# correlate the price with garden (yes:1, no:0)
avg_price_by_garden = df.groupby("garden (yes:1, no:0)", as_index=False)["price (€)"].mean()

fig = px.bar(avg_price_by_garden, x="garden (yes:1, no:0)", y="price (€)", text="price (€)", title="Price correlation based on garden (yes:1, no:0)", labels={"garden (yes:1, no:0)": "Garden (yes:1, no:0)", "price (€)": "Price (€)"})

fig.update_traces(texttemplate="€%{text:,.0f}", textposition="outside")

fig.show()

print(avg_price_by_garden.head())

   garden (yes:1, no:0)      price (€)
0                     0  333581.127105
1                     1  426127.061679


In [7]:
import plotly.express as px

# correlate the price with number_facades
avg_price_by_number_facades = df.groupby("number_facades", as_index=False)["price (€)"].mean()

fig = px.bar(avg_price_by_number_facades, x="number_facades", y="price (€)", text="price (€)", title="Price correlation based on number_facades", labels={"number_facades": "Number Facades", "price (€)": "Price (€)"})

fig.update_traces(texttemplate="€%{text:,.0f}", textposition="outside")

fig.show()

print(avg_price_by_number_facades.head())

   number_facades      price (€)
0               1  180428.571429
1               2  341637.873252
2               3   374339.83272
3               4  500110.635477


In [8]:
import plotly.express as px

# correlate the price with terrace_area (m²)
avg_price_by_terrace_area = df.groupby("terrace_area (m²)", as_index=False)["price (€)"].mean()

fig = px.scatter(avg_price_by_terrace_area, x="terrace_area (m²)", y="price (€)", title="Price correlation based on terrace area (m²)", labels={"terrace_area (m²)": "Terrace Area (m²)", "price (€)": "Price (€)"})

fig.show()

print(avg_price_by_terrace_area.head())

   terrace_area (m²)      price (€)
0                  1  273842.857143
1                  2  246455.384615
2                  3  295396.737374
3                  4   307142.46789
4                  5  299107.539326


In [None]:
# correlate the price with terrace (yes:1, no:0)
avg_price_by_terrace = df.groupby("terrace (yes:1, no:0)", as_index=False)["price (€)"].mean()

fig = px.bar(avg_price_by_terrace, x="terrace (yes:1, no:0)", y="price (€)", text="price (€)", title="Price correlation based on terrace (yes:1, no:0)", labels={"terrace (yes:1, no:0)": "Terrace (yes:1, no:0)", "price (€)": "Price (€)"})

fig.update_traces(texttemplate="€%{text:,.0f}", textposition="outside")

fig.show()

print(avg_price_by_terrace.head())

   terrace (yes:1, no:0)      price (€)
0                      0  316890.761076
1                      1  400070.237276


In [10]:
# correlate the price with open_fire (yes:1, no:0)
avg_price_by_open_fire = df.groupby("open_fire (yes:1, no:0)", as_index=False)["price (€)"].mean()

fig = px.bar(avg_price_by_open_fire, x="open_fire (yes:1, no:0)", y="price (€)", text="price (€)", title="Price correlation based on open_fire (yes:1, no:0)", labels={"open_fire (yes:1, no:0)": "Open Fire (yes:1, no:0)", "price (€)": "Price (€)"})

fig.update_traces(texttemplate="€%{text:,.0f}", textposition="outside")

fig.show()

print(avg_price_by_open_fire.head())

   open_fire (yes:1, no:0)      price (€)
0                        0  379618.757978
1                        1  642863.636364


In [11]:
# correlate the price with furnished (yes:1, no:0)
avg_price_by_furnished = df.groupby("furnished (yes:1, no:0)", as_index=False)["price (€)"].mean()

fig = px.bar(avg_price_by_furnished, x="furnished (yes:1, no:0)", y="price (€)", text="price (€)", title="Price correlation based on furnished (yes:1, no:0)", labels={"furnished (yes:1, no:0)": "Furnished (yes:1, no:0)", "price (€)": "Price (€)"})

fig.update_traces(texttemplate="€%{text:,.0f}", textposition="outside")

fig.show()

print(avg_price_by_furnished.head())

   furnished (yes:1, no:0)      price (€)
0                        0  382827.798952
1                        1  421808.962712


In [12]:
# correlate the price with equiped_kitchen (yes:1, no:0)
avg_price_by_eqiuped_kitchen = df.groupby("equiped_kitchen (yes:1, no:0)", as_index=False)["price (€)"].mean()

fig = px.bar(avg_price_by_eqiuped_kitchen, x="equiped_kitchen (yes:1, no:0)", y="price (€)", text="price (€)", title="Price correlation based on equiped_kitchen (yes:1, no:0)", labels={"equiped_kitchen (yes:1, no:0)": "Equiped Kitchen (yes:1, no:0)", "price (€)": "Price (€)"})

fig.update_traces(texttemplate="€%{text:,.0f}", textposition="outside")

fig.show()

print(avg_price_by_eqiuped_kitchen.head())

   equiped_kitchen (yes:1, no:0)      price (€)
0                              0  359861.985618
1                              1  439267.549139


In [None]:
# correlate the price with equiped_kitchen (yes:1, no:0)
avg_price_by_eqiuped_kitchen = df.groupby("equiped_kitchen (yes:1, no:0)", as_index=False)["price (€)"].mean()

fig = px.bar(avg_price_by_eqiuped_kitchen, x="equiped_kitchen (yes:1, no:0)", y="price (€)", text="price (€)", title="Price correlation based on equiped_kitchen (yes:1, no:0)", labels={"equiped_kitchen (yes:1, no:0)": "Equiped Kitchen (yes:1, no:0)", "price (€)": "Price (€)"})

fig.update_traces(texttemplate="€%{text:,.0f}", textposition="outside")

fig.show()

print(avg_price_by_eqiuped_kitchen.head())