# Analysis
We first load the data saved in the preprocessing steps.

In [None]:
import pandas as pd
import os
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from plot_maps import single_map,map_with_progress_bar
import helpers
from functools import partial
from settings import data_folder, preprocessed_folder

%load_ext autoreload
%autoreload 2

helpers.set_plotting() 

In [None]:
measure_name = [
    "Envelope \nrenovation (M-01)",
    "Wood/pellets\nheating (M-02)",
    "Wood heating\n <70kW (M-03)",
    "Wood heating\n >70kW (M-04)",
    "Air/water heat \npump (M-05)",
    "Elec. heat \npump (M-06)",
    "Connection to dec.\n heating (M-07)",
]

db_with_terrain_class = helpers.load_database()
m01 = db_with_terrain_class[db_with_terrain_class["Nr. HFM 2015"] == "M-01"].copy()
heating_codes = ["M-02", "M-03", "M-04", "M-05", "M-06", "M-07"]
m02_m07 = db_with_terrain_class[
    db_with_terrain_class["Nr. HFM 2015"].isin(heating_codes)
].copy()


# Which measure is the most effective?

We first check the difference in energy consumption and CO2 reduction with and without the supported measure per subsidy amount. But before moving, we preprocess the data.

In [None]:
all_codes = ["M-01", "M-02", "M-03", "M-04", "M-05", "M-06", "M-07"]
m01_m07 = db_with_terrain_class[
    db_with_terrain_class["Nr. HFM 2015"].isin(all_codes)
].copy()
col_no_measure = "Energiewirkung über die Massnahmenlebensdauer ggü. nicht energetischer Massnahme (Instandhaltung) (MWh)"
col_with_measure = (
    "Energiewirkung über die Massnahmenlebensdauer ggü. Referenz HFM 2015 (MWh)"
)
m01_m07["diff_conso"] = m01_m07[col_no_measure] - m01_m07[col_with_measure]
m01_m07["diff_conso_per_chf"] = (
    m01_m07["diff_conso"] * 1000 / m01_m07["Verpflichteter Beitrag"]
)
col_no_measure = "CO2-Wirkung über die Massnahmenlebensdauer ggü. nicht energetischer Massnahme (Instandhaltung) (t CO2)"
col_with_measure = (
    "CO2-Wirkung über die Massnahmenlebensdauer ggü. Referenz HFM 2015 (t CO2)"
)

m01_m07["diff_co2"] = m01_m07[col_no_measure] - m01_m07[col_with_measure]
m01_m07["diff_co2_per_chf"] = (
    m01_m07["diff_co2"] * 1000 / m01_m07["Verpflichteter Beitrag"]
)


### Energy consumption
We first compute the median for each supported measure. The measure M-07 and M-04 provides the largest decrease in energy consumption for the same amount invested (subsidy).

In [None]:
ax = sns.barplot(
    data=m01_m07,
    x="Nr. HFM 2015",
    y="diff_conso_per_chf",
    order=["M-01", "M-02", "M-03", "M-04", "M-05", "M-06", "M-07"],
    estimator=np.median,
    ci=95,
)
ax.set_xticklabels(measure_name, fontdict={"fontsize": 15})
helpers.set_label(
    xlabel="Supported measure",
    ylabel="Difference in consumption with/without supported \n measure per subsidy amount (median, kWh/CHF)",
)

helpers.save("diff_conso_per_chf_permeasure_median")


We show how the value vary between cantons.

In [None]:
# Compute the median/measure/canton
median_val = (
    m01_m07.groupby(["Nr. HFM 2015", "Kanton"])
    .diff_conso_per_chf.median()
    .reset_index()
)
# Get 4 smallest values
smallest_4 = (
    median_val.sort_values(["Nr. HFM 2015", "diff_conso_per_chf"])
    .dropna()
    .groupby("Nr. HFM 2015")
    .head(4)
    .reset_index(drop=True)
)
smallest_4["rank"] = smallest_4.groupby("Nr. HFM 2015").diff_conso_per_chf.rank(
    "first", ascending=True
)
smallest_4["stats"] = "Cantons with smallest median"

# Get 4 largest values
biggest_4 = (
    median_val.sort_values(["Nr. HFM 2015", "diff_conso_per_chf"])
    .dropna()
    .groupby("Nr. HFM 2015")
    .tail(4)
    .reset_index(drop=True)
)
biggest_4["rank"] = (
    biggest_4.groupby("Nr. HFM 2015").diff_conso_per_chf.rank("first", ascending=True)
    + 26
    - 4
)
biggest_4["stats"] = "Cantons with largest median"
ranked = (
    pd.concat((smallest_4, biggest_4), axis=0, ignore_index=True)
    .rename(columns={"Kanton": "Canton"})
    .sort_values(["rank", "Nr. HFM 2015", "diff_conso_per_chf"])
    .reset_index(drop=True)
)
ranked = ranked.astype({"rank": "int"})

# Plotting
fig, ax = plt.subplots(figsize=(15, 6))
ax = sns.barplot(
    data=ranked,
    x="Nr. HFM 2015",
    y="diff_conso_per_chf",
    order=["M-01", "M-02", "M-03", "M-04", "M-05", "M-06", "M-07"],
    hue="rank",
    ax=ax,
    palette="magma",
)
ax.set_xticklabels(measure_name)

# Add annotations
for i, p in enumerate(ax.patches):
    h, w, x = p.get_height(), p.get_width(), p.get_x()
    xy = (x + w / 2.0, h + 5)
    text = ranked.loc[i, "Canton"]
    ax.annotate(
        text=text,
        xy=xy,
        ha="center",
        va="center",
        rotation=90,
        size=13,
        weight="semibold",
        fontname="Verdana",
    )

helpers.set_label(
    xlabel="Supported measure",
    ylabel="Difference in consumption with/without  supported \n measure per subsidy amount (kWh/CHF)",
)
helpers.save("diff_conso_per_chf_permeasure_median_top4")


### CO2 reduction
We perform the same computation as before, but with the difference in CO2/subsidy. As before measures M-04 and M-07 provides the largest decrease in CO2 production for the same investment. Note that we don't take into account if the installation they are upgrading is particularly old in one category, which could explain the differences. 

In [None]:
ax = sns.barplot(
    data=m01_m07,
    x="Nr. HFM 2015",
    y="diff_co2_per_chf",
    order=["M-01", "M-02", "M-03", "M-04", "M-05", "M-06", "M-07"],
    estimator=np.median,
    ci=95,
)
ax.set_xticklabels(measure_name, fontdict={"fontsize": 13})
helpers.set_label(
    xlabel="Supported measure",
    ylabel="Difference in CO2 production with/without measure\n per subsidy amount (median, kg CO2/CHF)",
)
helpers.save("diff_co2_per_chf_permeasure_median")


We compare the values between the cantons

In [None]:
# Compute median/measure/Canton
median_val = (
    m01_m07.groupby(["Nr. HFM 2015", "Kanton"]).diff_co2_per_chf.median().reset_index()
)
smallest_4 = (
    median_val.sort_values(["Nr. HFM 2015", "diff_co2_per_chf"])
    .dropna()
    .groupby("Nr. HFM 2015")
    .head(4)
    .reset_index(drop=True)
)
smallest_4["rank"] = smallest_4.groupby("Nr. HFM 2015").diff_co2_per_chf.rank(
    "first", ascending=True
)
smallest_4["stats"] = "Cantons with smallest median"
biggest_4 = (
    median_val.sort_values(["Nr. HFM 2015", "diff_co2_per_chf"])
    .dropna()
    .groupby("Nr. HFM 2015")
    .tail(4)
    .reset_index(drop=True)
)
biggest_4["rank"] = (
    biggest_4.groupby("Nr. HFM 2015").diff_co2_per_chf.rank("first", ascending=True)
    + 26
    - 4
)
biggest_4["stats"] = "Cantons with largest median"
ranked = (
    pd.concat((smallest_4, biggest_4), axis=0, ignore_index=True)
    .rename(columns={"Kanton": "Canton"})
    .sort_values(["rank", "Nr. HFM 2015", "diff_co2_per_chf"])
    .reset_index(drop=True)
)
ranked = ranked.astype({"rank": "int"})
fig, ax = plt.subplots(figsize=(15, 6))
ax = sns.barplot(
    data=ranked,
    x="Nr. HFM 2015",
    y="diff_co2_per_chf",
    order=["M-01", "M-02", "M-03", "M-04", "M-05", "M-06", "M-07"],
    hue="rank",
    ax=ax,
    palette="magma",
)
ax.set_xticklabels(measure_name)

# Add annotations
for i, p in enumerate(ax.patches):
    h, w, x = p.get_height(), p.get_width(), p.get_x()
    xy = (x + w / 2.0, h + 2)
    text = ranked.loc[i, "Canton"]
    ax.annotate(
        text=text,
        xy=xy,
        ha="center",
        va="center",
        rotation=90,
        size=13,
        weight="semibold",
        fontname="Verdana",
    )

helpers.set_label(
    xlabel="Supported measure",
    ylabel="Difference in CO2 production with/without supported \n measure per subsidy amount (median, kg CO2/CHF)",
)
helpers.save("diff_co2_per_chf_permeasure_median_top4")


## Which installation provides the largest power for the same amount of subsidies?
### Heating replacement measures
We compute the number of installed power / subsidies (M-02,...M-07)

In [None]:
db_heating = m02_m07
db_heating["Anzahl Stückholz-/Pelletsfeuerungen mit Tagesbehälter"].fillna(
    1, inplace=True
)

# Set to 25 kW for measure M-02 (according to HFM 2015 Table 10)
db_heating.loc[db_heating["Nr. HFM 2015"] == "M-02", "Thermische Nennleistung"] = 25
# Remove missing values (just 2 for measure M-03)
db_heating = db_heating[~db_heating["Thermische Nennleistung"].isnull()].copy()
db_heating["puissance"] = (
    db_heating["Thermische Nennleistung"]
    * db_heating["Anzahl Stückholz-/Pelletsfeuerungen mit Tagesbehälter"]
)
db_heating["subside_per_kw"] = (
    db_heating["Verpflichteter Beitrag"] / db_heating["puissance"]
)


We use several visualizations for the data. We first compute the min,median,max.

In [None]:
# Compute min,median,max
subside = db_heating.groupby(["Nr. HFM 2015"]).agg(
    {"subside_per_kw": ["min", "median", "max"]}
)
# Reshape data
data = (
    pd.melt(subside, col_level=1, ignore_index=False)
    .reset_index()
    .rename(columns={"value": "Subsides per kW"})
)

# Plotting
ax = sns.barplot(data=data, x="Nr. HFM 2015", y="Subsides per kW", hue="variable")
helpers.set_label(ylabel="Subsidy/kW", ax=ax)

# Setting to a log scale
helpers.set_log_yscale(17, ax)

helpers.set_label(xlabel="Supported measures", ax=ax)

ax.set_xticklabels(measure_name[1:])
helpers.save("subside_heating_permeasure_log_scale")


With quantiles instead of min and max

In [None]:
statistics = [
    partial(np.quantile, q=0.05),
    "median",
    partial(np.quantile, q=0.95),
]
subside = db_heating.groupby(["Nr. HFM 2015"]).agg({"subside_per_kw": statistics})
subside.columns = ["quantile 0.05", "median", "quantile 0.95"]
plt.rcParams["figure.figsize"] = (12, 6)
sns.set(font_scale=1.5)
data = (
    pd.melt(subside, ignore_index=False)
    .reset_index()
    .rename(columns={"value": "Subsides per kW"})
)

# Plotting
ax = sns.barplot(data=data, x="Nr. HFM 2015", y="Subsides per kW", hue="variable")
helpers.set_label(ylabel="Subsidy/kW", ax=ax)

helpers.set_label(xlabel="Supported measures", ax=ax)
ax.set_xticklabels(measure_name[1:])
helpers.save("subside_heating_permeasure_log_scale_quantiles")


Same plot, but just with the median

In [None]:
sns.set(font_scale=1.4)
ax = sns.barplot(
    data=db_heating,
    x="Nr. HFM 2015",
    y="subside_per_kw",
    estimator=np.median,
    ci=95,
    order=["M-02", "M-03", "M-04", "M-05", "M-06", "M-07"],
)
helpers.set_label(ylabel="Subsidy/kW", ax=ax)
helpers.set_label(xlabel="Supported measures", ax=ax)
ax.set_xticklabels(measure_name[1:])
helpers.save("subside_heating_permeasure_median")
sns.set(font_scale=1.5)


Explicit numbers

In [None]:
year = 2021 # set to None if you want the entire duration
if year is not None:
    df=db_heating[db_heating["Jahr Auszahlung 1"] == year]
else:
    df=db_heating
statistics = ["min", "median", "max"]
np.round(
    df.groupby(["Nr. HFM 2015"]).agg({"subside_per_kw": statistics}),
    1,
)


A boxplot variation of the previous plot.

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(12, 6))
ax = sns.boxplot(
    x="Nr. HFM 2015",
    y="subside_per_kw",
    data=db_heating,
    order=["M-02", "M-03", "M-04", "M-05", "M-06", "M-07"],
    ax=ax,
)
helpers.set_label(ylabel="Subsidy/kW", ax=ax)
# Setting to a log scale
helpers.set_log_yscale(17, ax)
helpers.set_label(xlabel="Supported measures", ax=ax)
ax.set_xticklabels(measure_name[1:], fontdict={"fontsize": 14})
helpers.save("subside_heating_permeasure_boxplot")


Progression over the years of the amount of subsidies/kW

In [None]:
sns.set(font_scale=1.3)
subside = (
    db_heating.groupby(["Nr. HFM 2015", "Jahr Auszahlung 1"])
    .agg({"subside_per_kw": ["min", "median", "max"]})
    .reset_index()
    .rename(columns={"Jahr Auszahlung 1": "Year"})
)

hue_order = ["min", "median", "max"]
subside["Year"] = pd.to_datetime(subside["Year"], format="%Y").dt.strftime("%Y")
subside = subside.set_index(["Year", "Nr. HFM 2015"])
data = (
    pd.melt(subside, col_level=1, ignore_index=False)
    .reset_index()
    .rename(columns={"value": "Subsides per kW"})
)

# Plotting
fig, axs = plt.subplots(2, 3, figsize=(16, 10), sharey=True)
axs = axs.flatten()
for i, ax in enumerate(axs):
    sns.lineplot(
        x="Year",
        y="Subsides per kW",
        hue="variable",
        data=data[data["Nr. HFM 2015"] == f"M-0{i+2}"],
        hue_order=hue_order,
        ax=ax,
    )
for i, ax in enumerate(axs):
    helpers.set_label(ylabel="Subsidy/kW", ax=ax)
    # Setting to a log scale
    helpers.set_log_yscale(17, ax)
    ax.set_title(measure_name[1:][i])
    if i > 0:
        ax.get_legend().remove()
helpers.save("subside_heating_permeasure_peryear_log_scale")


Same plot, but using boxplot visualization

In [None]:
sns.set(font_scale=1.3)
subside = db_heating.rename(
    columns={"Jahr Auszahlung 1": "Year", "subside_per_kw": "Subsides per kW"}
)
subside["Year"] = pd.to_datetime(subside["Year"], format="%Y").dt.strftime("%Y")
fig, axs = plt.subplots(2, 3, figsize=(16, 10), sharey=True)
axs = axs.flatten()
for i, ax in enumerate(axs):
    sns.boxplot(
        x="Year",
        y="Subsides per kW",
        data=subside[subside["Nr. HFM 2015"] == f"M-0{i+2}"],
        order=["2017", "2018", "2019", "2020", "2021"],
        ax=ax,
    )
for i, ax in enumerate(axs):
    helpers.set_label(ylabel="Subsidy/kW", ax=ax)
    # Setting to a log scale
    helpers.set_log_yscale(17, ax)
    ax.set_title(measure_name[1:][i])
helpers.save("subside_heating_permeasure_peryear_log_scale_boxplot")


## Building envelope replacement
We compute the subsidies/m2. This is similar to subsidies/kW computed before.

In [None]:
# Check missing values -> none
surface_col = [
    "Wärmegedämmte Fläche Fassade",
    "Wärmegedämmte Fläche Dach",
    "Wärmegedämmte Fläche Wand und Boden gegen Erdreich",
]
print(
    "Number of rows with no surface area values specified:",
    len(m01[(m01[surface_col].isna().all(axis=1))]),
)


In [None]:
not_missing_surface = m01
not_missing_surface["surface"] = not_missing_surface[surface_col].sum(axis=1)

not_missing_surface["subside_per_m2"] = (
    not_missing_surface["Verpflichteter Beitrag"] / not_missing_surface["surface"]
)


Median subsidy/m2 over 2017-2021

In [None]:
not_missing_surface.agg({"subside_per_m2": ["min", "max", "median"]})


In 2021

In [None]:
not_missing_surface[not_missing_surface["Jahr Auszahlung 1"] == 2021].agg(
    {"subside_per_m2": ["min", "max", "median"]}
)


In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 6))
ax = sns.boxplot(y="subside_per_m2", data=not_missing_surface, ax=ax)
helpers.set_label(ylabel="Subsidy/m2", ax=ax)
# Setting to a log scale
helpers.set_log_yscale(17, ax, offset=0.5)
helpers.set_label(xlabel="All years", ax=ax)
helpers.save("subside_envelope_boxplot")


Breakdown per year

In [None]:
subside = (
    not_missing_surface.groupby(["Jahr Auszahlung 1"])
    .agg({"subside_per_m2": ["min", "median", "max"]})
    .reset_index()
    .rename(columns={"Jahr Auszahlung 1": "Year"})
)

subside["Year"] = pd.to_datetime(subside["Year"], format="%Y").dt.strftime("%Y")
subside = subside.set_index(["Year"])
g = sns.relplot(
    kind="line",
    x="Year",
    y="Subsides per m2",
    facet_kws={"sharey": True, "sharex": True},
    data=pd.melt(subside, col_level=1, ignore_index=False)
    .reset_index()
    .rename(columns={"value": "Subsides per m2"}),
    hue="variable",
    aspect=1.3,
)
ax = g.ax
helpers.set_label(ylabel="Subsidy/m2", ax=ax)
# Setting to a log scale
helpers.set_log_yscale(17, ax, offset=0.5)
leg = g._legend
leg.set_bbox_to_anchor([1, 0.9])
helpers.save("subside_envelope_peryear_log_scale")


Alternative representation

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(10, 6))
ax = sns.boxplot(
    x="Jahr Auszahlung 1", y="subside_per_m2", data=not_missing_surface, ax=ax
)
helpers.set_label(ylabel="Subsidy/m2", ax=ax)
# Setting to a log scale
helpers.set_log_yscale(17, ax, offset=0.5)
helpers.set_label(xlabel="Year", ax=ax)
helpers.save("subside_envelope_peryear_boxplot")


## Breakdown/Canton
Until now we only considered the data per year at the national level. In this section, we compute some statistics per canton and per year.

We first start with measures M-02,...,M-07

In [None]:
measure = "M-07"  # select measure among m-02,...,m-07
subside_per_kw_tot = (
    db_heating.groupby(["Nr. HFM 2015", "Kanton"])
    .agg({"subside_per_kw": ["min", "max", "median", "size"]})
    .droplevel(0, axis=1)
    .reset_index()
)
data = helpers.merge_swiss_map(
    subside_per_kw_tot[(subside_per_kw_tot["Nr. HFM 2015"] == measure)]
).astype({"min": float, "max": float, "median": float})


map = single_map(
    data,
    "median",
    measure,
    ["min", "max", "median", "size"],
    col_name="Median",
    cols_hover_name=["min", "max", "median", "n"],
    vmax=2650,
    text_div=f"Median subsidies/kW/Canton for measure {measure} over 5 years (2017-2021). Minimum and maximum subsidies/kW/Canton as well as the number of supported measure (n) is given.",
    name_map="2017-2021",
)
helpers.save_map(f"subside_per_kw_entire_duration_{measure}", map)
map


For measure M-01, we have

In [None]:
measure = "M-01"
subside_per_m2 = (
    not_missing_surface.groupby(["Kanton"])
    .agg({"subside_per_m2": ["min", "max", "median", "size"]})
    .droplevel(0, axis=1)
    .reset_index()
)

data = helpers.merge_swiss_map(subside_per_m2).astype(
    {"min": float, "max": float, "median": float}
)
text_div = f"Median subsidies/m2/Canton for measure {measure} over 5 years (2017-2021). Minimum and maximum subsidies/m2/Canton as well as the number of supported measure (n) is given."
map = single_map(
    data,
    "median",
    measure,
    ["min", "max", "median", "size"],
    col_name="Median",
    cols_hover_name=["min", "max", "median", "n"],
    vmax=100,
    text_div=text_div,
    name_map="2017-2021",
)
helpers.save_map(f"subside_per_m2_entire_duration_{measure}", map)
map


### Over the years
For the measures M-02,...M-07, we have

In [None]:
subside_per_kw_per_year = (
    db_heating.groupby(["Nr. HFM 2015", "Jahr Auszahlung 1", "Kanton"])
    .agg({"subside_per_kw": ["min", "max", "median", "size"]})
    .droplevel(0, axis=1)
    .reset_index()
)

In [None]:
measure = "M-07"

tmp = helpers.tensor_index_canton_year(
    subside_per_kw_per_year,
    subside_per_kw_per_year["Jahr Auszahlung 1"].unique(),
    measure,
)

# Plotting
subside_per_kw_per_year_map = helpers.merge_swiss_map(tmp, "inner").astype(
    {"min": float, "max": float, "median": float}
)
cols = ["NAME", "size", "min", "max", "median", "Jahr Auszahlung 1", "geometry"]
data = (
    subside_per_kw_per_year_map[cols]
    .rename(columns={"size": "n", "Jahr Auszahlung 1": "Year", "NAME": "Canton"})
    .copy()
)
fig = map_with_progress_bar(
    data,
    "median",
    {"min": ":.2f", "max": ":.2f", "n": ":d"},
    vmax=2850,
    height=None,
    width=None,
)
text_div = f"Median subsidies/kW/Canton/per year for measure {measure}. Minimum and maximum subsidies/kW/Canton/per year as well as the number of supported measure (n) is given. The same scale is used across supported measures."
map = helpers.add_legend_map(fig, text_div)
# Saving
helpers.save_map(f"subside_per_kw_{measure}", map)
fig


Measure M-01

In [None]:
measure = "M-01"
name_abbr = helpers.get_name_abbr()
subside_per_m2_per_year = (
    not_missing_surface.groupby(["Jahr Auszahlung 1", "Kanton"])
    .agg({"subside_per_m2": ["min", "max", "median", "size"]})
    .droplevel(0, axis=1)
    .reset_index()
)


tmp = helpers.tensor_index_canton_year(
    subside_per_m2_per_year,
    subside_per_m2_per_year["Jahr Auszahlung 1"].unique(),
    None,
)

cols = ["NAME", "size", "min", "max", "median", "Jahr Auszahlung 1", "geometry"]
data = (
    helpers.merge_swiss_map(tmp)
    .astype({"min": float, "max": float, "median": float})[cols]
    .rename(columns={"size": "n", "Jahr Auszahlung 1": "Year", "NAME": "Canton"})
    .copy()
)

fig = map_with_progress_bar(
    data,
    "median",
    {"min": ":.2f", "max": ":.2f", "n": ":d"},
    width=None,
    height=None,
    vmax=120,
)
text_div = f"Median subsidies/m2/Canton/per year for measure {measure}. Minimum and maximum subsidies/m2/Canton/per year as well as the number of supported measure (n) is given. The same scale is used across supported measures."
map = helpers.add_legend_map(fig, text_div)
helpers.save_map(f"subside_per_m2_{measure}", map)
fig


### Subsidy per capita
Instead of looking at subsidies/m2/canton or subsidies/kW/canton, we check the subsidies/capita/canton

In [None]:
# Load population in each canton
nb_hab_per_canton = helpers.get_nb_hab_per_canton()


In [None]:
# Compute the subsidies/capita/canton and the number of measures/capita/canton
subside_per_canton_per_pop = (
    m01_m07.groupby(["Kanton", "Nr. HFM 2015"])
    .agg({"Verpflichteter Beitrag": ["sum", "size"]})
    .droplevel(0, axis=1)
    .reset_index()
)
# Set missing values where there is not supported measures
subside_per_canton_per_pop.loc[subside_per_canton_per_pop["size"] == 0, "sum"] = np.nan

# Add the number of inhabitants in each canton
subside_per_canton_per_pop = subside_per_canton_per_pop.merge(
    nb_hab_per_canton, on="Kanton"
)

# Divide the population size in each canton
subside_per_canton_per_pop["Subsidies/capita"] = (
    subside_per_canton_per_pop[("sum")] / subside_per_canton_per_pop["Pop"]
)

# Total number of measures/capita
subside_per_canton_per_pop["n/capita"] = (
    subside_per_canton_per_pop[("size")] / subside_per_canton_per_pop["Pop"] * 1000
)
subside_per_canton_per_pop = subside_per_canton_per_pop.rename(
    columns={"sum": "Subsides", "size": "n"}
)
subside_per_canton_per_pop


Plot the map (select measure between M-01,...,M-07)

In [None]:
measure = "M-07" # Select here the measure
data = subside_per_canton_per_pop[subside_per_canton_per_pop["Nr. HFM 2015"] == measure]

data = helpers.merge_swiss_map(data, how="left").astype({"Subsidies/capita": float})
text_div = f"Total subsidies/capita/Canton for measure {measure} over 5 years (2017-2021). Hovering over a canton gives also the total subsidies and the number of supported measures."

map = single_map(
    data,
    "Subsidies/capita",
    measure,
    ["Subsidies/capita", "Subsides", "n", "n/capita"],
    cols_hover_name=[
        "Total Subsidies/hab",
        "Total subsides (CHF)",
        "Number of supported measures",
        "Number of supported measures/1000 hab.",
    ],
    text_div=text_div,
    vmax=134,
    name_map="2017-2021",
)
helpers.save_map(f"subside_per_capita_entire_duration_{measure}", map)
map


All supported measures together

In [None]:
# Compute the subsidies/canton (all measures combined)
subside_per_canton_per_pop = (
    db_with_terrain_class.groupby(["Kanton"])
    .agg({"Verpflichteter Beitrag": ["sum", "size"]})
    .droplevel(0, axis=1)
    .reset_index()
)
# Add the number of inhabitants in each canton
subside_per_canton_per_pop = subside_per_canton_per_pop.merge(
    nb_hab_per_canton, on="Kanton"
)
# Divide the population size in each canton
subside_per_canton_per_pop["Subside/capita"] = (
    subside_per_canton_per_pop["sum"] / subside_per_canton_per_pop["Pop"]
)
subside_per_canton_per_pop = subside_per_canton_per_pop.rename(
    columns={"sum": "Subsides", "size": "n"}
)
data = subside_per_canton_per_pop

data = helpers.merge_swiss_map(data, how="left").astype(
    {"Subside/capita": float}
) 
text_div = f"Total subside/capita/Canton over 5 years (2017-2021) (all supported measures combined). Hovering over a canton gives also the total subsidy amount and the number of supported measures."

map = single_map(
    data,
    "Subside/capita",
    measure,
    ["Subside/capita", "Subsides", "n"],
    cols_hover_name=["Total Subsides/hab", "Total subsides (CHF)", "n"],
    text_div=text_div,
    vmax=247,
    caption="Subside/capita (all measures combined)",
)
helpers.save_map(f"subside_per_capita_entire_duration", map)
map


## Efficiency/Canton

In the previous section, we explore what is the amount of subsidy required to get the same power (or surface) in each canton and per year. However, we don't know if the supported measure is used.

In this section, we compare the subsidy/m2 (or subsidy/kW depending on the selected measure), with the number of buildings that are renovated with the funds. The number of buildings is expressed in terms of number of rooms since a renovation might concern a small building as a large building. But both would count as 1, if we consider only the number of buildings.

We first need the number of rooms per buildings. This is found in the RegBL

In [None]:
# Load the regbl (all the buildings in CH)
regbl = pd.read_pickle(os.path.join(preprocessed_folder, "rebgl.pickle")).astype(
    {"EGID": "Int64"}
)

# Repeat some rows since some values of the EGID column of the db_with_terrain_class database contains multiple EGID in the same cell.
db_with_terrain_class_exploded = helpers.explode_db(db_with_terrain_class)

# Combine regbl with our database and identify which measure concerns which building.
combined_regbl = pd.merge(regbl, db_with_terrain_class_exploded, on="EGID", how="left")

combined_regbl = helpers.add_renov_indicator(combined_regbl)
hab_to_keep = [
    "Usage d'habitation",
    "Maison avec usage annexe",
    "Part. à usage d'hab.",
    np.nan,
]
# Filter the data by keeping only existing buildings and those with habitation purpose
combined_regbl_sub = combined_regbl[
    (combined_regbl.Statut_bat == "existant")
    & (combined_regbl.Cat_bat.isin(hab_to_keep))
].copy()


We consider the measure m-01 as an example

In [None]:
measure = "M-01"  # Change here if you want other measures
ratio_all = []
# Cumulative
for year in range(2017, 2022):
    ratio = helpers.precentage_renovated_rooms(
        measure, combined_regbl_sub, year, cumul=True
    )
    ratio_all.append(ratio)
ratio_cumul = pd.concat(ratio_all, axis=0)
ratio_all = []
# Not cumulative
for year in range(2017, 2022):
    ratio = helpers.precentage_renovated_rooms(
        measure, combined_regbl_sub, year, cumul=False
    )
    ratio_all.append(ratio)
ratio_per_year = pd.concat(ratio_all, axis=0)


In [None]:
def merge_ratio_db(db, ratio):
    df = pd.merge(
        db,
        ratio.reset_index(),
        left_on=["Jahr Auszahlung 1", "Kanton"],
        right_on=["year", "Canton"],
    )
    cols = ["min", "max", "median", "size", "renov_rooms"]
    # Workaround: such that the missing values do not appear on the plot
    df[cols] = df[cols].fillna(-1000)
    df = pd.merge(
        df, helpers.get_nb_hab_per_canton(), on="Kanton", how="left"
    ).sort_values("Jahr Auszahlung 1")
    df = df.sort_values(["Kanton", "Jahr Auszahlung 1"])
    return df


sub_tmp = subside_per_kw_per_year[subside_per_kw_per_year["Nr. HFM 2015"] == measure]
data = subside_per_m2_per_year if measure == "M-01" else sub_tmp
d_min = data["median"].min()
df1 = merge_ratio_db(data, ratio_cumul)
df2 = merge_ratio_db(data, ratio_per_year)


Generate image

In [None]:
import plotly.express as px

cumul = "Cumulative"  # To change if needed, either Cumulative or ""


unit = "m2"
if measure != "M-01":
    unit = "kW"
if cumul == "Cumulative":
    df = df1
    if measure != "M-01":
        unit = "kW"
    msg = f"Efficiency of the supported measure {measure}. <br>Gives the (median) subsidy/{unit} over time and the proportion of renovated habitable buildings (expressed in number of rooms) in each canton."
else:
    df = df2
    msg = f"Efficiency of the supported measure {measure}. <br>Gives the (median) subsidy/{unit} over time and the renovation rate /year (expressed in number of rooms of habitable buildings/year) in each canton."


fig = px.scatter(
    df,
    "median",
    "renov_rooms",
    "Kanton",
    size="Pop",
    size_max=55,
    animation_frame="year",
    range_x=[d_min / 1.1, df["median"].max() * 1.1],
    range_y=[0, df["renov_rooms"].max() * 1.1],
    height=800,
    labels={
        "median": f"Median subsidies/{unit}",
        "renov_rooms": "Percentage of rooms that <br>are concerned by the measure since 2017"
        if cumul == "Cumulative"
        else "Percentage of rooms that <br>are concerned by the measure per year",
        "Kanton": "Canton",
        "Pop": "Poluation size",
    },
    title=msg,
    category_orders={"year": [2017, 2018, 2019, 2020, 2021]},
)
fig.write_html(
    os.path.join("../figure", f"subside_{measure}_per_canton_{cumul}.html"),
    include_plotlyjs="cdn",
)
fig
