In [1]:
import os

import pandas as pd
import numpy as np
import plotly.express as px

from scipy.stats import pearsonr

# Table of Contents

1. [Data loading](#data-loading)
2. [General analysis](#general-analysis)
    - [System sizes](#system-sizes)
    - [System investments](#system-investments)
3. [Comparison](#comparison)
    - [Yearly savings](#yearly-savings)
    - [Energy costs](#energy-costs)
4. [Correlations](#correlations)
    - [On absolute savings](#on-absolute-savings)
    - [On relative savings](#on-relative-savings)

# Data loading

In [2]:
URI = os.getenv("DB_URI")

In [3]:
master = pd.read_sql("SELECT * FROM vea_industrial_load_profiles.master", URI)
master.set_index("id", inplace=True)
master.sort_index(inplace=True)

# in the authors database, the following columns are already present in master
# this will lead to problems later on, so we drop them here
# if you have created your own features by the "create_master_features.py", you
# do not need to worry about it
cols_to_drop = ["energy_costs_eur", "capacity_costs_eur", "total_costs_eur"]
for col_to_drop in cols_to_drop:
    try:
        master.drop(columns=col_to_drop, inplace=True)
    except KeyError as e:
        continue

master.head()

Unnamed: 0_level_0,sector_group_id,sector_group,capacity_price_over_2500h_eur_per_kw,energy_price_over_2500h_eur_per_kwh,capacity_price_under_2500h_eur_per_kw,energy_price_under_2500h_eur_per_kwh,zip_code,grid_level,number_of_peak_load,peak_load_kw,mean_load_kw,variance_kw,total_energy_kwh,full_load_hours_h,is_over_2500h,std_kw
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,B,Mining and quarrying,94.6,0.006,14.67,0.0379,85,MS,1,2227.36,646.151302,423413.418622,5675793.04,2548.215394,True,650.70225
1,B,Mining and quarrying,96.11,0.0052,9.92,0.0397,91,MS,1,1096.0,208.6949,109792.561757,1833176.0,1672.605839,False,331.349607
2,B,Mining and quarrying,96.11,0.0052,9.92,0.0397,92,MS,1,816.0,129.444604,36662.22051,1137041.4,1393.433088,False,191.473812
3,B,Mining and quarrying,96.11,0.0052,9.92,0.0397,85,MS,1,701.6,114.201389,19954.089684,1003145.0,1429.79618,False,141.258946
4,C,Manufacturing industry,101.04,0.0078,12.6,0.0432,30,MS,2,3480.0,1845.483607,610080.465591,16210728.0,4658.255172,True,781.076479


In [4]:
baseline = pd.read_sql("SELECT * FROM vea_results.overview WHERE name LIKE '%%base%%'", URI)
baseline["id"] = baseline["name"].str.split("_").str[0].astype(int)
baseline.set_index("id", inplace=True)
baseline.sort_index(inplace=True)
baseline.head()

Unnamed: 0_level_0,name,energy_costs_eur,grid_energy_costs_eur,grid_capacity_costs_eur,grid_capacity_kw,storage_invest_eur,storage_annuity_eur,storage_capacity_kwh,inverter_invest_eur,inverter_annuity_eur,inverter_capacity_kw,solar_invest_eur,solar_annuity_eur,solar_capacity_kwp,total_yearly_costs_eur,total_annuity_eur,total_invest_eur
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,0_baseline,1021643.0,271558.090967,41249.388603,2227.36,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1334450.0,0.0,0.0
1,1_baseline,329971.7,12033.847188,132976.873344,1096.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,474982.4,0.0,0.0
2,2_baseline,204667.5,7464.085529,99004.679424,816.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,311136.2,0.0,0.0
3,3_baseline,180566.1,6585.12529,85124.611622,701.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,272275.8,0.0,0.0
4,4_baseline,2917931.0,884063.074775,55353.7152,3480.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3857348.0,0.0,0.0


In [5]:
storage_pv = pd.read_sql("SELECT * FROM vea_results.overview WHERE name LIKE '%%storage_pv'", URI)
storage_pv["id"] = storage_pv["name"].str.split("_").str[0].astype(int)
storage_pv.set_index("id", inplace=True)
storage_pv.sort_index(inplace=True)
storage_pv.head()

Unnamed: 0_level_0,name,energy_costs_eur,grid_energy_costs_eur,grid_capacity_costs_eur,grid_capacity_kw,storage_invest_eur,storage_annuity_eur,storage_capacity_kwh,inverter_invest_eur,inverter_annuity_eur,inverter_capacity_kw,solar_invest_eur,solar_annuity_eur,solar_capacity_kwp,total_yearly_costs_eur,total_annuity_eur,total_invest_eur
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
0,0_storage_pv,286747.016555,76218.886318,21044.444499,1136.345422,3166214.0,246412.117268,11109.523595,583418.0,45404.77904,3241.210945,5702094.0,254598.072836,4751.745351,930425.3,546415.0,9451727.0
1,1_storage_pv,94527.878586,3447.368713,44484.739952,366.644769,1210266.0,94189.494088,4246.546065,205016.3,15955.487538,1138.979243,1938530.0,86555.230259,1615.441973,339160.2,196700.2,3353812.0
2,2_storage_pv,47226.400185,1722.31533,24815.730172,204.532108,820471.2,63853.559607,2878.846361,137084.8,10668.688954,761.582198,1301573.0,58115.148013,1084.644441,206401.8,132637.4,2259129.0
3,3_storage_pv,58093.209878,2118.620635,33883.509811,279.269062,522548.9,40667.613591,1833.504853,94260.56,7335.872306,523.669757,963558.3,43022.804184,802.965268,185121.6,91026.29,1580368.0
4,4_storage_pv,846922.126789,256597.078286,29153.994733,1832.865261,8148609.0,634169.34644,28591.610654,1574782.0,122558.173572,8748.790369,15738040.0,702702.388803,13115.03568,2592103.0,1459430.0,25461430.0


# General analysis

In [6]:
total_profiles_analyzed = len(storage_pv)
profiles_using_pv = storage_pv[storage_pv["inverter_invest_eur"] > 0]
n_profiles_using_pv = len(profiles_using_pv)

print(f"{total_profiles_analyzed=}")
print(f"{n_profiles_using_pv=}")
print(f"Percentage of profiles using storage: {((n_profiles_using_pv / total_profiles_analyzed) * 100):.2f} %")

total_profiles_analyzed=5353
n_profiles_using_pv=5352
Percentage of profiles using storage: 99.98 %


## System sizes

In [7]:
storage_size = profiles_using_pv["storage_capacity_kwh"].copy()
storage_size.name = "Storage capacity"
print(storage_size.describe().drop("count").to_markdown())

print("")
inverter_size = profiles_using_pv["inverter_capacity_kw"].copy()
inverter_size.name = "Inverter capacity"
print(inverter_size.describe().drop("count").to_markdown())

print("")
solar_size = profiles_using_pv["solar_capacity_kwp"].copy()
solar_size.name = "PV system capacity"
print(solar_size.describe().drop("count").to_markdown())

system_size_fig_df = profiles_using_pv.copy()
system_size_fig_df = system_size_fig_df.rename(columns={
    "storage_capacity_kwh": "Storage",
    "inverter_capacity_kw": "Inverter",
    "solar_capacity_kwp": "PV system"})
system_size_fig = px.box(
    data_frame=system_size_fig_df,
    x=["Inverter", "Storage", "PV system"],
    title="System sizes")
system_size_fig.update_layout(xaxis_title="Capacity in kWh (storage) / kW (inverter, PV)", yaxis_title="")
system_size_fig.update_xaxes(range=[0, 15e3])
system_size_fig.show()

|      |   Storage capacity |
|:-----|-------------------:|
| mean |      4801.66       |
| std  |     12574.7        |
| min  |         0.00960694 |
| 25%  |       381.078      |
| 50%  |      1236.63       |
| 75%  |      4278.57       |
| max  |    319210          |

|      |   Inverter capacity |
|:-----|--------------------:|
| mean |        1559.49      |
| std  |        4038.63      |
| min  |           0.0149096 |
| 25%  |         130.358     |
| 50%  |         418.926     |
| 75%  |        1429.01      |
| max  |      100689         |

|      |   PV system capacity |
|:-----|---------------------:|
| mean |             2316.46  |
| std  |             6176.91  |
| min  |                0     |
| 25%  |              192.557 |
| 50%  |              626.708 |
| 75%  |             2060.9   |
| max  |           160548     |


## System investments

In [8]:
storage_invest = profiles_using_pv["storage_invest_eur"]
storage_invest.name = "Absolute storage investment"
print(storage_invest.describe().drop("count").to_markdown())

print("")
inverter_invest = profiles_using_pv["inverter_invest_eur"]
inverter_invest.name = "Absolute inverter investment"
print(inverter_invest.describe().drop("count").to_markdown())

print("")
solar_invest = profiles_using_pv["solar_invest_eur"]
solar_invest.name = "Absolute PV system investment"
print(solar_invest.describe().drop("count").to_markdown())

system_invest_fig_df = profiles_using_pv.copy()
system_invest_fig_df = system_invest_fig_df.rename(columns={
    "storage_invest_eur": "Storage",
    "inverter_invest_eur": "Inverter",
    "solar_invest_eur": "PV system"})
system_invest_fig = px.box(
    data_frame=system_invest_fig_df,
    x=["Inverter", "Storage", "PV system"],
    title="System investments")
system_invest_fig.update_layout(xaxis_title="System investments in €", yaxis_title="")
system_invest_fig.update_xaxes(range=[0, 7e6])
system_invest_fig.show()

|      |   Absolute storage investment |
|:-----|------------------------------:|
| mean |                   1.36847e+06 |
| std  |                   3.58378e+06 |
| min  |                   2.73798     |
| 25%  |              108607           |
| 50%  |              352438           |
| 75%  |                   1.21939e+06 |
| max  |                   9.0975e+07  |

|      |   Absolute inverter investment |
|:-----|-------------------------------:|
| mean |                280708          |
| std  |                726954          |
| min  |                     2.68372    |
| 25%  |                 23464.4        |
| 50%  |                 75406.6        |
| 75%  |                257223          |
| max  |                     1.8124e+07 |

|      |   Absolute PV system investment |
|:-----|--------------------------------:|
| mean |                     2.77975e+06 |
| std  |                     7.41229e+06 |
| min  |                     0           |
| 25%  |                231068      

# Comparison

In [9]:
abs_diff = baseline.drop(columns="name") - storage_pv.drop(columns="name")

# drop those that could not be optimized
abs_diff.dropna(subset="total_yearly_costs_eur", inplace=True)

abs_diff.head()

Unnamed: 0_level_0,energy_costs_eur,grid_energy_costs_eur,grid_capacity_costs_eur,grid_capacity_kw,storage_invest_eur,storage_annuity_eur,storage_capacity_kwh,inverter_invest_eur,inverter_annuity_eur,inverter_capacity_kw,solar_invest_eur,solar_annuity_eur,solar_capacity_kwp,total_yearly_costs_eur,total_annuity_eur,total_invest_eur
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,734895.7,195339.204649,20204.944104,1091.014578,-3166214.0,-246412.117268,-11109.523595,-583418.0,-45404.77904,-3241.210945,-5702094.0,-254598.072836,-4751.745351,404024.9,-546415.0,-9451727.0
1,235443.8,8586.478475,88492.133392,729.355231,-1210266.0,-94189.494088,-4246.546065,-205016.3,-15955.487538,-1138.979243,-1938530.0,-86555.230259,-1615.441973,135822.2,-196700.2,-3353812.0
2,157441.1,5741.770199,74188.949252,611.467892,-820471.2,-63853.559607,-2878.846361,-137084.8,-10668.688954,-761.582198,-1301573.0,-58115.148013,-1084.644441,104734.4,-132637.4,-2259129.0
3,122472.9,4466.504654,51241.101812,422.330938,-522548.9,-40667.613591,-1833.504853,-94260.56,-7335.872306,-523.669757,-963558.3,-43022.804184,-802.965268,87154.21,-91026.29,-1580368.0
4,2071009.0,627465.996489,26199.720467,1647.134739,-8148609.0,-634169.34644,-28591.610654,-1574782.0,-122558.173572,-8748.790369,-15738040.0,-702702.388803,-13115.03568,1265245.0,-1459430.0,-25461430.0


In [10]:
rel_diff = (baseline.drop(columns="name") - storage_pv.drop(columns="name")) / baseline.drop(columns="name")

# drop those that could not be optimized
rel_diff.dropna(subset="total_yearly_costs_eur", inplace=True)

rel_diff

Unnamed: 0_level_0,energy_costs_eur,grid_energy_costs_eur,grid_capacity_costs_eur,grid_capacity_kw,storage_invest_eur,storage_annuity_eur,storage_capacity_kwh,inverter_invest_eur,inverter_annuity_eur,inverter_capacity_kw,solar_invest_eur,solar_annuity_eur,solar_capacity_kwp,total_yearly_costs_eur,total_annuity_eur,total_invest_eur
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,0.719328,0.719328,0.489824,0.489824,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,0.302765,-inf,-inf
1,0.713527,0.713527,0.665470,0.665470,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,0.285952,-inf,-inf
2,0.769253,0.769253,0.749348,0.749348,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,0.336619,-inf,-inf
3,0.678272,0.678272,0.601954,0.601954,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,0.320095,-inf,-inf
4,0.709753,0.709753,0.473315,0.473315,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,0.328009,-inf,-inf
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5354,0.663513,0.663513,0.424624,0.424624,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,0.316672,-inf,-inf
5355,0.529893,0.529893,0.492824,0.492824,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,0.222315,-inf,-inf
5356,0.734541,0.734541,0.520725,0.520725,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,0.370800,-inf,-inf
5357,0.640727,0.640727,0.572371,0.572371,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,-inf,0.306970,-inf,-inf


## Yearly savings

In [11]:
abs_yearly_savings = abs_diff["total_yearly_costs_eur"]
abs_yearly_savings.name = "Total yearly savings in Eur"
print(abs_yearly_savings.describe().drop("count").to_markdown())

abs_tot_yearly_savings_fig_df = abs_diff.copy()
abs_tot_yearly_savings_fig_df = abs_tot_yearly_savings_fig_df.rename(columns={"total_yearly_costs_eur": "Savings"})
abs_tot_yearly_savings_fig = px.box(
    data_frame=abs_tot_yearly_savings_fig_df,
    x="Savings",
    title="Total yearly savings")
abs_tot_yearly_savings_fig.update_layout(xaxis_title="Total yearly savings in €", yaxis_title="")
abs_tot_yearly_savings_fig.update_xaxes(range=[0, 500e3])
abs_tot_yearly_savings_fig.show()

|      |   Total yearly savings in Eur |
|:-----|------------------------------:|
| mean |              209562           |
| std  |              542538           |
| min  |                   0           |
| 25%  |               20843.7         |
| 50%  |               60860.6         |
| 75%  |              184155           |
| max  |                   1.31677e+07 |


In [12]:
rel_yearly_savings = 100 * rel_diff["total_yearly_costs_eur"]
rel_yearly_savings.name = "Relative yearly savings in %"
print(rel_yearly_savings.describe().drop("count").to_markdown())

rel_yearly_savings_fig_df = rel_diff.copy() * 100
rel_yearly_savings_fig_df = rel_yearly_savings_fig_df.rename(columns={"total_yearly_costs_eur": "Savings"})
rel_yearly_savings_fig = px.box(
    data_frame=rel_yearly_savings_fig_df,
    x="Savings",
    title="Relative yearly savings")
rel_yearly_savings_fig.update_layout(xaxis_title="Relative yearly savings in %", yaxis_title="")
rel_yearly_savings_fig.update_xaxes(range=[10, 50])
rel_yearly_savings_fig.show()

|      |   Relative yearly savings in % |
|:-----|-------------------------------:|
| mean |                      30.0379   |
| std  |                       6.95214  |
| min  |                       0.284183 |
| 25%  |                      25.9579   |
| 50%  |                      30.914    |
| 75%  |                      34.8399   |
| max  |                      75.114    |


## Energy costs

In [13]:
rel_yearly_energy_savings = 100 * rel_diff["energy_costs_eur"]
rel_yearly_energy_savings.name = "Relative yearly energy savings savings in %"
print(rel_yearly_energy_savings.describe().drop("count").to_markdown())

rel_yearly_energy_savings_fig_df = rel_diff.copy() * 100
rel_yearly_energy_savings_fig_df.rename(columns={"energy_costs_eur": "Savings"}, inplace=True)
rel_yearly_energy_savings_fig = px.box(
    data_frame=rel_yearly_energy_savings_fig_df,
    x="Savings",
    title="Relative yearly savings")
rel_yearly_energy_savings_fig.update_layout(xaxis_title="Relative yearly energy savings in %", yaxis_title="")
rel_yearly_energy_savings_fig.update_xaxes(range=[40, 85])
rel_yearly_energy_savings_fig.show()

|      |   Relative yearly energy savings savings in % |
|:-----|----------------------------------------------:|
| mean |                                     62.5425   |
| std  |                                      9.65148  |
| min  |                                      0.229135 |
| 25%  |                                     59.7934   |
| 50%  |                                     64.2741   |
| 75%  |                                     67.9581   |
| max  |                                    100        |


# Correlations

## On absolute savings

In [14]:
# merge savings onto master (with features)
abs_diff_with_master = pd.merge(left=abs_diff, right=master, how="left", left_index=True, right_index=True)
abs_diff_with_master.head()

abs_diff_with_master["std_by_mean"] = abs_diff_with_master["std_kw"] / abs_diff_with_master["mean_load_kw"]
abs_diff_with_master["std_by_peak"] = abs_diff_with_master["std_kw"] / abs_diff_with_master["peak_load_kw"]
abs_diff_with_master["peak_by_mean"] = abs_diff_with_master["peak_load_kw"] / abs_diff_with_master["mean_load_kw"]

In [15]:
cols_to_drop = [
    "grid_level",
    "zip_code",
    "sector_group_id",
    "sector_group",
    "solar_invest_eur",
    "solar_annuity_eur",
    "solar_capacity_kwp"]
abs_correlations_df = abs_diff_with_master.drop(columns=cols_to_drop).corr()
px.imshow(abs_correlations_df, title="Correlation coefficients for total yearly savings")

In [16]:
abs_corr_fig_df = abs_correlations_df[["total_yearly_costs_eur"]].round(2)
abs_corr_fig_df.sort_values("total_yearly_costs_eur", inplace=True, ascending=False)
abs_corr_fig = px.bar(
    data_frame=abs_corr_fig_df,
    y="total_yearly_costs_eur",
    text_auto=True,
    title="Correlation between different load profile characteristics and total yearly savings")
abs_corr_fig.update_layout(yaxis_title="Correlation coefficient", xaxis_title="Variable")
abs_corr_fig.show()

In [17]:
df = pd.DataFrame()
i = 0
for var in abs_correlations_df.index:
    corr, p_value = pearsonr(y=abs_diff_with_master.dropna()["total_yearly_costs_eur"], x=abs_diff_with_master.dropna()[var])
    df.loc[i, "var"] = var
    df.loc[i, "corr"] = corr
    df.loc[i, "p_value"] = p_value
    i += 1

df.sort_values("corr", ascending=False, ignore_index=True)

Unnamed: 0,var,corr,p_value
0,total_yearly_costs_eur,1.0,0.0
1,energy_costs_eur,0.987918,0.0
2,mean_load_kw,0.976211,0.0
3,total_energy_kwh,0.976211,0.0
4,grid_energy_costs_eur,0.971282,0.0
5,peak_load_kw,0.868799,0.0
6,std_kw,0.764125,0.0
7,grid_capacity_kw,0.751099,0.0
8,variance_kw,0.523374,0.0
9,full_load_hours_h,0.464247,1.190128e-274


## On relative savings

In [18]:
# merge savings onto master (with features)
rel_diff_with_master = pd.merge(left=rel_diff, right=master, how="left", left_index=True, right_index=True)
rel_diff_with_master.head()

rel_diff_with_master["std_by_mean"] = rel_diff_with_master["std_kw"] / rel_diff_with_master["mean_load_kw"]
rel_diff_with_master["std_by_peak"] = rel_diff_with_master["std_kw"] / rel_diff_with_master["peak_load_kw"]
rel_diff_with_master["peak_by_mean"] = rel_diff_with_master["peak_load_kw"] / rel_diff_with_master["mean_load_kw"]

In [19]:
cols_to_drop = [
    "grid_level",
    "zip_code",
    "sector_group_id",
    "sector_group",
    "solar_invest_eur",
    "solar_annuity_eur",
    "solar_capacity_kwp"]
rel_correlations_df = rel_diff_with_master.drop(columns=cols_to_drop).corr()
px.imshow(rel_correlations_df, title="Correlation coefficients for relative yearly savings")

In [20]:
rel_corr_fig_df = rel_correlations_df[["total_yearly_costs_eur"]].round(2)
rel_corr_fig_df.sort_values("total_yearly_costs_eur", inplace=True, ascending=False)
rel_corr_fig_df.dropna(inplace=True)
rel_corr_fig = px.bar(
    data_frame=rel_corr_fig_df,
    y="total_yearly_costs_eur",
    text_auto=True,
    title="Correlation between different load profile characteristics and relative yearly savings")
rel_corr_fig.update_layout(yaxis_title="Correlation coefficient", xaxis_title="Variable")
rel_corr_fig.show()

In [21]:
df = pd.DataFrame()
i = 0
for var in rel_correlations_df.index:
    if np.inf in rel_diff_with_master[var]:
        continue
    elif -np.inf in rel_diff_with_master[var]:
        continue
    elif np.nan in rel_diff_with_master[var]:
        continue
    elif rel_diff_with_master[var].isin([np.nan, np.inf, -np.inf]).any():
        continue
    if "storage" in var or "inverter" in var:
        continue

    corr, p_value = pearsonr(y=rel_diff_with_master["total_yearly_costs_eur"], x=rel_diff_with_master[var])
    df.loc[i, "var"] = var
    df.loc[i, "corr"] = corr
    df.loc[i, "p_value"] = p_value
    i += 1

df.sort_values("corr", ascending=False, ignore_index=True)

Unnamed: 0,var,corr,p_value
0,total_yearly_costs_eur,1.0,0.0
1,energy_costs_eur,0.756091,0.0
2,grid_energy_costs_eur,0.756091,0.0
3,grid_capacity_costs_eur,0.463623,1.685139e-283
4,grid_capacity_kw,0.463623,1.685139e-283
5,energy_price_under_2500h_eur_per_kwh,0.241584,6.1952e-72
6,energy_price_over_2500h_eur_per_kwh,0.181878,4.965285e-41
7,full_load_hours_h,0.156639,9.55062e-31
8,is_over_2500h,0.130394,9.897516e-22
9,capacity_price_over_2500h_eur_per_kw,0.124149,7.824683999999999e-20


In [23]:
px.histogram(rel_diff_with_master, "total_yearly_costs_eur", color="is_over_2500h")