# Analysis
We first load the data saved in the preprocessing steps.

In [None]:
import pandas as pd
import os
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import helpers
from settings import data_folder, preprocessed_folder
%load_ext autoreload
%autoreload 2

helpers.set_plotting() 

In [None]:
measure_name = [
    "Envelope \nrenovation (M-01)",
    "Wood/pellets\nheating (M-02)",
    "Wood heating\n <70kW (M-03)",
    "Wood heating\n >70kW (M-04)",
    "Air/water heat \npump (M-05)",
    "Elec. heat \npump (M-06)",
    "Connection to dec.\n heating (M-07)",
]
# Load database
db_with_terrain_class = helpers.load_database()


Number of renovations per measures (according to Nr. HFM 2015)

In [None]:
db_with_terrain_class.groupby("Nr. HFM 2015").apply(len).plot(kind="bar")
plt.ylabel("Number of supported measures")
helpers.save("nb_measures")


# Which regions get most of the subisidies?

We are interested to which region (urban/rural/alpine/intermediate) gets the largest amount of subsidy. We split the analysis into two parts:

- The building envelope renovation: it corresponds to the measure M-01
- The heating system replacement: it corresponds to the measures M-02, ..., M-07. For this analysis, we don't distinguish these measures.
## Building envelope renovation

We first start with the number of supported renovation in each region for the measure M-01.

In [None]:
# Only measure M-01
m01 = db_with_terrain_class[db_with_terrain_class["Nr. HFM 2015"] == "M-01"].copy()

per_region = m01.groupby(["Alpine", "Typology"])["Nr. HFM 2015"].size().reset_index()
per_region = per_region.pivot_table(
    columns="Typology", index="Alpine", values="Nr. HFM 2015"
).reindex(columns=["Rural", "Intermediate", "Urban"])
# Plotting
ax = per_region.plot(kind="bar", ylabel="Number of building renovation (M-01)")
helpers.set_ylim(ax, ymax=14000)
helpers.save("nb_alpine_type")


and its progression over years. The number of supported buildings envelope renovations increase until 2019 and reach a plateau after that year.

In [None]:
per_region_year = (
    m01.groupby(["Alpine", "Typology", "Jahr Auszahlung 1"])
    .size()
    .reset_index()
    .rename(
        columns={"Jahr Auszahlung 1": "Year", 0: "Building envelope renovation (M-01)"}
    )
)
# Convert to Year
per_region_year["Year"] = pd.to_datetime(
    per_region_year["Year"], format="%Y"
).dt.strftime("%Y")
hue_order = ["Rural", "Intermediate", "Urban"]

# Plotting
g = sns.relplot(
    kind="line",
    x="Year",
    y="Building envelope renovation (M-01)",
    data=per_region_year,
    hue="Typology",
    col="Alpine",
    hue_order=hue_order,
    height=6,
)
helpers.set_ylim(g, ymax=4500, seaborn=True)
helpers.save("nb_alpine_type_peryear")


## Heating system replacement

We perform the same analysis for the measures M-02,...,M-07. 

In [None]:
# Only measures M-02,...,M-07
heating_codes = ["M-02", "M-03", "M-04", "M-05", "M-06", "M-07"]
m02_m07 = db_with_terrain_class[
    db_with_terrain_class["Nr. HFM 2015"].isin(heating_codes)
].copy()

per_region = m02_m07.groupby(["Alpine", "Typology"]).apply(len).reset_index()
# Plotting
ax = (
    per_region.pivot_table(columns="Typology", index="Alpine", values=0)
    .reindex(columns=["Rural", "Intermediate", "Urban"])
    .plot(kind="bar", ylabel="Renovated heating system (M-02,...,M-07)")
)
helpers.set_ylim(ax, ymax=14000)
helpers.save("subside_heating_alpine_type")


The progression over the years shows an exponential increase of the number of supported measures concerning the replacement of heating systems. This progression is observed in all regions.

In [None]:
per_region_year = (
    m02_m07.groupby(["Alpine", "Typology", "Jahr Auszahlung 1"])
    .size()
    .reset_index()
    .rename(
        columns={
            "Jahr Auszahlung 1": "Year",
            0: "Number of renovated heating system (M-02,...,M-07)",
        }
    )
)
# Convert to Year
per_region_year["Year"] = pd.to_datetime(
    per_region_year["Year"], format="%Y"
).dt.strftime("%Y")
hue_order = ["Rural", "Intermediate", "Urban"]
# Plotting
g = sns.relplot(
    kind="line",
    x="Year",
    y="Number of renovated heating system (M-02,...,M-07)",
    data=per_region_year,
    hue="Typology",
    col="Alpine",
    hue_order=hue_order,
    height=6,
)
helpers.set_ylim(g, ymax=4500, seaborn=True)
helpers.save("nb_heating_alpine_type_peryear")


## Relative numbers
The previous analysis gives the number in absolute. Obviously, the number of supported measures is expected to be higher in urban region, where the population is larger. Visualising the relative number per capita can provide additional informations. See ['additional plot'](#Additional-plots) at the end of the notebook for a deeper analysis.

In [None]:
# Get the number of inhabitants/region
hab_alpin_typ_sum = helpers.get_nb_hab_per_region()
hab_alpin_typ_sum


### Envelope renovation
We first consider the measure M-01

In [None]:
# Compute the ratio
per_region_per_hab = (
    m01.groupby(["Alpine", "Typology"]).apply(len) / hab_alpin_typ_sum
).reset_index()
# Reshaping data
per_region_per_hab = per_region_per_hab.pivot_table(
    columns="Typology", index="Alpine", values=0
).reindex(columns=["Rural", "Intermediate", "Urban"])
# Plotting
ax = per_region_per_hab.plot(kind="bar",figsize=(10,6))
helpers.set_ylim(ax, 0.008)
helpers.set_title(
    "# re-insulated buildings/capita", size=24
)
helpers.save("nb_alpine_type_per_capita")


### Heating system
We consider the measures M-02,...,M-07

In [None]:
# Compute the ratio
per_region = (
    m02_m07.groupby(["Alpine", "Typology"]).apply(len) / hab_alpin_typ_sum
).reset_index()
# Reshaping data
per_region = per_region.pivot_table(
    columns="Typology", index="Alpine", values=0
).reindex(columns=["Rural", "Intermediate", "Urban"])
# Plotting
ax = per_region.plot(kind="bar")
helpers.set_ylim(ax, 0.008)
helpers.set_label(ylabel="Renovated heating system per capita (M-02,...,M-07)", size=14)
helpers.save("nb_heating_alpine_type_per_capita")


# Additional plots 

We check if the plots obtained in the first part change if we consider the number of rooms rather than the number of buildings. This analysis might take into account that the renovated buildings are larger in urban locations. 

In [None]:
# Load the regbl (all the buildings in CH)
regbl = pd.read_pickle(os.path.join(preprocessed_folder, "rebgl.pickle")).astype(
    {"EGID": "Int64"}
)

# Repeat some rows since some values of the EGID column of the db_with_terrain_class database contains multiple EGID in the same cell.
db_with_terrain_class_exploded = helpers.explode_db(db_with_terrain_class)

# Combine regbl with our database and identify which buildings have done which measure.
combined_regbl = pd.merge(regbl, db_with_terrain_class_exploded, on="EGID", how="left")

combined_regbl = helpers.add_renov_indicator(combined_regbl)
list_considered_buildings = [
    "Usage d'habitation",
    "Maison avec usage annexe",
    "Part. à usage d'hab.",
    np.nan,
]
# Filter the data by keeping only existing buildings and those with habitation purpose
combined_regbl_sub = combined_regbl[
    (combined_regbl.Statut_bat == "existant")
    & (combined_regbl.Cat_bat.isin(list_considered_buildings))
].copy()


We observe that buildings in urban areas have more rooms. Hence, the rate of supported measure (expressed in rooms)/capita is largest in urban area.

In [None]:
measure = "M-01"
ratio = False  # to change to True to get the number per capita
nb_renovated_rooms = (
    combined_regbl_sub.groupby(["Alpine", "Typology", measure])
    .WAZIM.sum()
    .reset_index()
)

per_region = (
    nb_renovated_rooms[nb_renovated_rooms[measure] == 1]
    .drop(columns=[measure])
    .set_index(["Alpine", "Typology"])
)
if ratio:
    per_hab = per_region.div(hab_alpin_typ_sum, axis=0)
    ylabel="Number of building envelope renovations (M-01)\n expressed in rooms per capita"
    ylabel="# re-insulated rooms/capita"
else:
    per_hab = per_region
    ylabel="Number of building envelope renovations (M-01)\n expressed in rooms"
    
# Reshape data    
per_region = per_hab.pivot_table(
    columns="Typology", index="Alpine", values="WAZIM"
).reindex(columns=["Rural", "Intermediate", "Urban"])
sns.set(font_scale=2)
ax = per_region.plot(
    kind="bar", title=ylabel
)
if not ratio:
    helpers.set_ylim(ax, ymax=220000)
if not ratio:
    helpers.save("nb_insulatedroom")
else:
    helpers.save("nb_insulatedroom_per_capita")

We compute the same quantity for the heating system replacement

In [None]:
measure = "Heating_renov"
nb_renovated_rooms = (
    combined_regbl_sub.groupby(["Alpine", "Typology", measure])
    .WAZIM.sum()
    .reset_index()
)
ratio = False  # to change to get the number per capita
per_region = (
    nb_renovated_rooms[nb_renovated_rooms[measure] == 1]
    .drop(columns=[measure])
    .set_index(["Alpine", "Typology"])
)
if ratio:
    per_hab = per_region.div(hab_alpin_typ_sum, axis=0)
    ylabel="Renovated heating system (M-02,...,M-07)\n expressed in rooms per capita"
else:
    per_hab = per_region
    ylabel="Renovated heating system (M-02,...,M-07)\n expressed in rooms"
    
# Reshape data    
per_region = per_hab.pivot_table(
    columns="Typology", index="Alpine", values="WAZIM"
).reindex(columns=["Rural", "Intermediate", "Urban"])

ax = per_region.plot(
    kind="bar", title=ylabel
)
if not ratio:
    helpers.set_ylim(ax, ymax=100000)
if not ratio:
    helpers.save("nb_renov_per_room")
else:
    helpers.save("nb_renov_per_room_per_capita")