# Aarhusbolig Visualised
We will explore the Aarhusbolig dataset with different plot types in order to explain the dataset and draw insights from it.


In [2]:
# if you have not installed following packages, please install them by running:
#!pip install numpy pandas matplotlib plotly

In [3]:
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go

In [4]:
# if you have this error:
# ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed
# run this command:
# ! pip install nbformat

## 3.1 Load the Data


In [5]:
# load the Aarhusbolig data
aarhusbolig = pd.read_csv(
    r"C:\Users\marja\Desktop\google\Aarhus_Bolig\data\Gold\tenancies_gold.csv"
)
aarhusbolig.head()

Unnamed: 0,CompanyId,DepartmentId,TenancyId,TenancyType,ApartmentType,IsAvailable,IsStandard,UnrestrictedRentalPeriod,MovingPercentage,Rooms,...,Water,Modernization,Communications,Improvement Rights,Maintanence,IsPetAllowed,HasElevator,HasAdvantage,HasFloorPlan,CompanyName
0,698,1,230,Familiebolig,Lejlighed,False,True,False,12.72,2,...,0.0,0.0,0.0,0.0,0.0,False,False,False,False,Boligforeningen Århus Omegn
1,698,1,230,Familiebolig,Lejlighed,False,True,False,12.72,2,...,0.0,0.0,0.0,0.0,0.0,False,False,False,False,Boligforeningen Århus Omegn
2,698,1,230,Familiebolig,Lejlighed,False,True,False,12.72,2,...,0.0,0.0,0.0,0.0,0.0,False,False,False,False,Boligforeningen Århus Omegn
3,698,1,230,Familiebolig,Lejlighed,False,True,False,12.72,2,...,0.0,0.0,0.0,0.0,0.0,False,False,False,False,Boligforeningen Århus Omegn
4,698,1,333,Familiebolig,Lejlighed,False,True,False,12.72,2,...,0.0,0.0,260.0,0.0,0.0,False,False,False,False,Boligforeningen Århus Omegn


In [6]:
# Delete the columns that are not needed
aarhusbolig.drop(columns=["DepartmentId", "TenancyId"], inplace=True)

## 3.2 Visualize Categorical Distributions
In this section, we will visualize some of the categorical variables of the dataset: `company`, `type` and `rooms`

In [7]:
# change this to any of the other variable names to perform the plot
variable_name = (
    "CompanyName"  # Change this to 'TenancyType' or 'Rooms' for other variables
)

In [8]:
# find the count of each value in the category
values, counts = (
    aarhusbolig[variable_name].value_counts().index,
    aarhusbolig[variable_name].value_counts(),
)

In [9]:
# create the bar plot
fig = px.bar(x=values, y=counts, title=f"Distribution of {variable_name}")
fig.update_layout(
    title={
        "text": f"Distribution of {variable_name}",
        "font": {"size": 20, "color": "darkblue"},
    },
    xaxis_title=variable_name,
    yaxis_title="Count",
    xaxis_tickangle=-45,  # Rotate x-axis labels for better readability
    xaxis=dict(title_font=dict(size=14, color="darkgreen")),
    yaxis=dict(title_font=dict(size=14, color="darkgreen")),
)
fig.show()

## 3.3 Visualize Location Data
In this section, we will visualize the location data (`long`, `lat`) of the listed housings in the dataset.
- Create a geo scatter plot, that includes the Aarhus area (e.g. using `go.Scattermapbox`)
- Add the location data to the scatter plot
- Add axis labels and plot title
- Color the data points according to one of the categorical variables: `company`, `type` and `rooms`
- *Optional*: Change the size of the data points according to the size of the housing `size`
- *Optional*: Center the map on the average `long`, `lat` location
- *Optional*: Create two extra plots with coloring for the other categorical variables
- *Optional*: Add text on hower to the datapoints that holds some information that you think is valuable. Could be rent, size, number of rooms, etc.

In [10]:
# get the location data
# Get the location data and prepare for visualization
# Define the variable to use for coloring
coloring_variable = "CompanyName"

# Get the location data, along with additional data needed for plotting
aarhusbolig_locations = aarhusbolig[
    ["Lng", "Lat", coloring_variable, "Size", "TotalRent", "Rooms"]
]

# Drop rows with N/A values
aarhusbolig_locations = aarhusbolig_locations.dropna(how="any")

In [11]:
# Color based on category
# Convert to categorical codes for coloring
aarhusbolig_locations["color"] = (
    aarhusbolig_locations[coloring_variable].astype("category").cat.codes
)

# Create hover text column with rent, size and number of rooms
aarhusbolig_locations["display_text"] = (
    aarhusbolig_locations["TotalRent"]
    .astype(str)
    .apply(lambda x: f"Rent: {x} DKK</br>")
    + aarhusbolig_locations["Size"].astype(str).apply(lambda x: f"Size: {x} m2</br>")
    + aarhusbolig_locations["Rooms"].astype(str).apply(lambda x: f"Rooms: {x}")
)

In [12]:
# plot locations on scattergeo plot
# Pick a value that scales the size of the markers
size_scale = 15

# Setup marker size and coloring
markers = dict(
    size=aarhusbolig_locations["Size"] / size_scale,
    color=aarhusbolig_locations["color"],
)

# Setup scatter on a mapbox plot
trace = go.Scattermapbox(
    lat=aarhusbolig_locations["Lat"],
    lon=aarhusbolig_locations["Lng"],
    mode="markers",
    marker=markers,
    hoverinfo="text+lat+lon",
    text=aarhusbolig_locations["display_text"],
)

# Define center of map by mean of latitude and longitude
center_coordinates = dict(
    lat=aarhusbolig_locations["Lat"].mean(), lon=aarhusbolig_locations["Lng"].mean()
)

# Change layout
layout = go.Layout(
    title="Locations of Aarhusbolig listings",
    autosize=True,
    hovermode="closest",
    mapbox=dict(
        center=center_coordinates,
        zoom=10,
        style="carto-positron",
    ),
)

fig = go.Figure(data=[trace], layout=layout)
fig.show()


*scattermapbox* is deprecated! Use *scattermap* instead. Learn more at: https://plotly.com/python/mapbox-to-maplibre/



## 3.4 Pricing Distributions and Correlations
In this section we will look at distribution of pricing, along with visualizing the correlation of the rent, or other price variable, to another variable.

### 3.4.1 Price Distribution for heat, electricity and water
We want to visualize the distribution of the cost of `heat`, `electricity`, `water` and `tv_internet`.
- Create a histogram plot displaying one of the
    - Add axis labels and plot title
- Create a histogram containing all the variables
    - Add axis labels and plot title
    - Add plot legends

In [13]:
# plot histogram of a single variable
variable_name = "Heating"  # Change to 'Electricity', 'Water', or 'Communications'

fig = go.Figure()
fig.add_trace(go.Histogram(x=aarhusbolig[variable_name].values, name=variable_name))

fig.update_layout(
    title_text=f"Distribution of {variable_name}",
    xaxis_title_text="Price (DKK)",
    yaxis_title_text="Count",
    barmode="stack",
)
fig.show()

In [14]:
# plot histogram containing all the four variables
# plot histogram of a single variable
variable_names = [
    "Heating",
    "Electricity",
    "Water",
    "Communications",
]  # Change to 'Electricity', 'Water', or 'Communications'

fig = go.Figure()
for variable in variable_names:
    fig.add_trace(go.Histogram(x=aarhusbolig[variable].values, name=variable))

fig.update_layout(
    title_text=f'Distribution of {", ".join(variable_names)}',
    xaxis_title_text="Price (DKK)",
    yaxis_title_text="Count",
    barmode="stack",
)
fig.show()

### 3.4.2 Compare rent to other variables
We want to visualize how the `rent` changes, depending on one of the other variables: `heat`, `electricity`, `water`, `deposit`, `tv_internet` or `size_m2`.

- Create a scatter plot with `rent` on the y-axis and the other variable on the x-axis
- Add title and axis labels
- Color the data points according to the number of rooms in the housing, remember to add legend

In [15]:
# Create a scatter plot with rent on the y-axis and another variable on the x-axis
comparing_variable = "Size"  # Change to 'Heating', 'Electricity', 'Water', 'Deposit', or 'Communications'
coloring_variable = "Rooms"

# Plot the rent vs. the comparing variable, colored according to number of rooms
fig = px.scatter(
    aarhusbolig, x=comparing_variable, y="TotalRent", color=coloring_variable
)
fig.update_layout(
    title=f"Rent vs. {comparing_variable} for housing, colored by {coloring_variable}",
    xaxis_title=comparing_variable,
    yaxis_title="Rent (DKK)",
)
fig.show()