In [85]:
import os

import pandas as pd
import numpy as np
import plotly.express as px

from scipy.stats import pearsonr

# Table of Contents

1. [Data loading](#data-loading)
2. [General analysis](#general-analysis)
    - [System sizes](#system-sizes)
    - [System investments](#system-investments)
3. [Comparison](#comparison)
    - [Total savings](#total-savings)
    - [Capacity costs savings](#capacity-costs-savings)
4. [Correlations](#correlations)
    - [On absolute savings](#on-absolute-savings)
    - [On relative savings](#on-relative-savings)

# Data loading

In [86]:
URI = os.getenv("DB_URI")

In [87]:
master = pd.read_sql("SELECT * FROM vea_industrial_load_profiles.master", URI)
master.set_index("id", inplace=True)
master.sort_index(inplace=True)

# in the authors database, the following columns are already present in master
# this will lead to problems later on, so we drop them here
# if you have created your own features by the "create_master_features.py", you
# do not need to worry about it
cols_to_drop = ["energy_costs_eur", "capacity_costs_eur", "total_costs_eur"]
for col_to_drop in cols_to_drop:
    try:
        master.drop(columns=col_to_drop, inplace=True)
    except KeyError as e:
        continue

master.head()

Unnamed: 0_level_0,sector_group_id,sector_group,capacity_price_over_2500h_eur_per_kw,energy_price_over_2500h_eur_per_kwh,capacity_price_under_2500h_eur_per_kw,energy_price_under_2500h_eur_per_kwh,zip_code,grid_level,number_of_peak_load,peak_load_kw,mean_load_kw,variance_kw,total_energy_kwh,full_load_hours_h,is_over_2500h,std_kw
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
0,B,Mining and quarrying,94.6,0.006,14.67,0.0379,85,MS,1,2227.36,646.151302,423413.418622,5675793.04,2548.215394,True,650.70225
1,B,Mining and quarrying,96.11,0.0052,9.92,0.0397,91,MS,1,1096.0,208.6949,109792.561757,1833176.0,1672.605839,False,331.349607
2,B,Mining and quarrying,96.11,0.0052,9.92,0.0397,92,MS,1,816.0,129.444604,36662.22051,1137041.4,1393.433088,False,191.473812
3,B,Mining and quarrying,96.11,0.0052,9.92,0.0397,85,MS,1,701.6,114.201389,19954.089684,1003145.0,1429.79618,False,141.258946
4,C,Manufacturing industry,101.04,0.0078,12.6,0.0432,30,MS,2,3480.0,1845.483607,610080.465591,16210728.0,4658.255172,True,781.076479


In [88]:
profiles_under_2500h = master[master["is_over_2500h"] == False].index

In [89]:
baseline = pd.read_sql("SELECT * FROM vea_results.overview WHERE name LIKE '%%base%%'", URI)
baseline["id"] = baseline["name"].str.split("_").str[0].astype(int)
baseline.set_index("id", inplace=True)
baseline = baseline[baseline.index.isin(profiles_under_2500h)]
baseline.sort_index(inplace=True)
baseline.head()

Unnamed: 0_level_0,name,energy_costs_eur,grid_energy_costs_eur,grid_capacity_costs_eur,grid_capacity_kw,storage_invest_eur,storage_annuity_eur,storage_capacity_kwh,inverter_invest_eur,inverter_annuity_eur,inverter_capacity_kw,solar_invest_eur,solar_annuity_eur,solar_capacity_kwp,total_yearly_costs_eur,total_annuity_eur,total_invest_eur
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1,1_baseline,329971.68,12033.847188,132976.873344,1096.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,474982.4,0.0,0.0
2,2_baseline,204667.452,7464.085529,99004.679424,816.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,311136.2,0.0,0.0
3,3_baseline,180566.1,6585.12529,85124.611622,701.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,272275.8,0.0,0.0
11,11_baseline,725850.45,32070.976283,385032.6312,2110.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1142954.0,0.0,0.0
18,18_baseline,40066.89786,3705.580104,7671.378578,89.26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,51443.86,0.0,0.0


In [122]:
storage = pd.read_sql("SELECT * FROM vea_results.overview WHERE name LIKE '%%storage_only'", URI)
storage["id"] = storage["name"].str.split("_").str[0].astype(int)
storage.set_index("id", inplace=True)
storage = storage[storage.index.isin(profiles_under_2500h)]
storage["invest_per_tac"] = (storage["storage_invest_eur"] + storage["inverter_invest_eur"]) / baseline["total_yearly_costs_eur"]
storage.sort_index(inplace=True)
storage.head()

Unnamed: 0_level_0,name,energy_costs_eur,grid_energy_costs_eur,grid_capacity_costs_eur,grid_capacity_kw,storage_invest_eur,storage_annuity_eur,storage_capacity_kwh,inverter_invest_eur,inverter_annuity_eur,inverter_capacity_kw,solar_invest_eur,solar_annuity_eur,solar_capacity_kwp,total_yearly_costs_eur,total_annuity_eur,total_invest_eur,invest_per_tac
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
1,1_storage_only,329310.116681,12009.720415,112571.591043,927.818956,72705.633117,5658.350233,255.107485,30272.58794,2355.978453,168.181044,0.0,0.0,0.0,461905.8,8014.328685,102978.221057,0.216804
2,2_storage_only,204374.029623,7453.384611,77439.89217,638.262276,76183.2931,5929.000763,267.3098,31992.790344,2489.854017,177.737724,0.0,0.0,0.0,297686.2,8418.85478,108176.083444,0.347681
3,3_storage_only,180091.219844,6567.806727,75835.912954,625.042224,27228.142451,2119.043045,95.537342,13780.399758,1072.466119,76.557776,0.0,0.0,0.0,265686.4,3191.509164,41008.542208,0.150614
11,11_storage_only,724413.938321,32007.505451,283208.83584,1552.0,796350.0,61976.314826,2794.210526,100440.0,7816.790433,558.0,0.0,0.0,0.0,1109423.0,69793.105259,896790.0,0.784625
18,18_storage_only,39989.713463,3698.441718,4841.311113,56.33087,7472.947443,581.585664,26.220868,5927.243386,461.290516,32.92913,0.0,0.0,0.0,49572.34,1042.87618,13400.190829,0.260482


# General analysis

In [91]:
total_profiles_analyzed = len(storage)
profiles_using_storage = storage[storage["inverter_invest_eur"] > 0]
n_profiles_using_storage = len(profiles_using_storage)

print(f"{total_profiles_analyzed=}")
print(f"{n_profiles_using_storage=}")
print(f"Percentage of profiles using storage: {((n_profiles_using_storage / total_profiles_analyzed) * 100):.2f} %")

total_profiles_analyzed=1709
n_profiles_using_storage=1708
Percentage of profiles using storage: 99.94 %


## System sizes

In [92]:
storage_size = profiles_using_storage["storage_capacity_kwh"].copy()
storage_size.name = "Storage capacity"
print(storage_size.describe().drop("count").to_markdown())

print("")
inverter_size = profiles_using_storage["inverter_capacity_kw"].copy()
inverter_size.name = "Inverter capacity"
print(inverter_size.describe().drop("count").to_markdown())


fig_df = profiles_using_storage.copy()
fig_df = fig_df.rename(columns={
    "storage_capacity_kwh": "Storage",
    "inverter_capacity_kw": "Inverter"})
fig = px.box(
    data_frame=fig_df,
    x=["Inverter", "Storage"],
    title="Battery system sizes")
fig.update_layout(xaxis_title="Capacity in kWh (storage) / kW (inverter)", yaxis_title="")
fig.update_xaxes(range=[0, 300])
fig.show()

|      |   Storage capacity |
|:-----|-------------------:|
| mean |        125.791     |
| std  |        358.369     |
| min  |          0.0118421 |
| 25%  |         18.7696    |
| 50%  |         45.7277    |
| 75%  |        111.885     |
| max  |       8539.07      |

|      |   Inverter capacity |
|:-----|--------------------:|
| mean |            100.511  |
| std  |            640.563  |
| min  |              0.045  |
| 25%  |             18.4184 |
| 50%  |             38.84   |
| 75%  |             82.0855 |
| max  |          25586.3    |


## System investments

In [93]:
storage_invest = profiles_using_storage["storage_invest_eur"].copy()
storage_invest.name = "Absolute storage investment"
print(storage_invest.describe().drop("count").to_markdown())

print("")
inverter_invest = profiles_using_storage["inverter_invest_eur"].copy()
inverter_invest.name = "Absolute inverter investment"
print(inverter_invest.describe().drop("count").to_markdown())


fig_df = profiles_using_storage.copy()
fig_df = fig_df.rename(columns={
    "storage_invest_eur": "Storage",
    "inverter_invest_eur": "Inverter"})
fig = px.box(
    data_frame=fig_df,
    x=["Inverter", "Storage"],
    title="Battery system investments")
fig.update_layout(xaxis_title="Storage system investments in €", yaxis_title="")
fig.update_xaxes(range=[0, 75000])
fig.show()

|      |   Absolute storage investment |
|:-----|------------------------------:|
| mean |               35850.4         |
| std  |              102135           |
| min  |                   3.375       |
| 25%  |                5349.35        |
| 50%  |               13032.4         |
| 75%  |               31887.1         |
| max  |                   2.43364e+06 |

|      |   Absolute inverter investment |
|:-----|-------------------------------:|
| mean |                18091.9         |
| std  |               115301           |
| min  |                    8.1         |
| 25%  |                 3315.31        |
| 50%  |                 6991.2         |
| 75%  |                14775.4         |
| max  |                    4.60554e+06 |


In [125]:
fig_df = storage.copy() * 100
fig = px.box(fig_df, "invest_per_tac", title="Investment per total yearly energy costs")
fig.update_xaxes(range=[0, 100], title="Ratio in %")

# Comparison

In [95]:
abs_diff = baseline.drop(columns="name") - storage.drop(columns="name")

# drop those that could not be optimized
abs_diff.dropna(subset="total_yearly_costs_eur", inplace=True)

abs_diff.head()

Unnamed: 0_level_0,energy_costs_eur,grid_capacity_costs_eur,grid_capacity_kw,grid_energy_costs_eur,inverter_annuity_eur,inverter_capacity_kw,inverter_invest_eur,invest_per_tac,solar_annuity_eur,solar_capacity_kwp,solar_invest_eur,storage_annuity_eur,storage_capacity_kwh,storage_invest_eur,total_annuity_eur,total_invest_eur,total_yearly_costs_eur
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1,661.563319,20405.282301,168.181044,24.126773,-2355.978453,-168.181044,-30272.58794,,0.0,0.0,0.0,-5658.350233,-255.107485,-72705.633117,-8014.328685,-102978.221057,13076.643708
2,293.422377,21564.787254,177.737724,10.700918,-2489.854017,-177.737724,-31992.790344,,0.0,0.0,0.0,-5929.000763,-267.3098,-76183.2931,-8418.85478,-108176.083444,13450.055769
3,474.880156,9288.698668,76.557776,17.318563,-1072.466119,-76.557776,-13780.399758,,0.0,0.0,0.0,-2119.043045,-95.537342,-27228.142451,-3191.509164,-41008.542208,6589.388223
11,1436.511679,101823.79536,558.0,63.470832,-7816.790433,-558.0,-100440.0,,0.0,0.0,0.0,-61976.314826,-2794.210526,-796350.0,-69793.105259,-896790.0,33530.672611
18,77.184397,2830.067465,32.92913,7.138386,-461.290516,-32.92913,-5927.243386,,0.0,0.0,0.0,-581.585664,-26.220868,-7472.947443,-1042.87618,-13400.190829,1871.514068


In [96]:
# merge savings onto master (with features)
abs_diff_with_master = pd.merge(left=abs_diff, right=master, how="left", left_index=True, right_index=True)
abs_diff_with_master.head()

abs_diff_with_master["std_by_mean"] = abs_diff_with_master["std_kw"] / abs_diff_with_master["mean_load_kw"]
abs_diff_with_master["std_by_peak"] = abs_diff_with_master["std_kw"] / abs_diff_with_master["peak_load_kw"]
abs_diff_with_master["peak_by_mean"] = abs_diff_with_master["peak_load_kw"] / abs_diff_with_master["mean_load_kw"]

In [97]:
rel_diff = (baseline.drop(columns="name") - storage.drop(columns="name")) / baseline.drop(columns="name")

# drop those that could not be optimized
rel_diff.dropna(subset="total_yearly_costs_eur", inplace=True)

rel_diff

Unnamed: 0_level_0,energy_costs_eur,grid_capacity_costs_eur,grid_capacity_kw,grid_energy_costs_eur,inverter_annuity_eur,inverter_capacity_kw,inverter_invest_eur,invest_per_tac,solar_annuity_eur,solar_capacity_kwp,solar_invest_eur,storage_annuity_eur,storage_capacity_kwh,storage_invest_eur,total_annuity_eur,total_invest_eur,total_yearly_costs_eur
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
1,0.002005,0.153450,0.153450,0.002005,-inf,-inf,-inf,,,,,-inf,-inf,-inf,-inf,-inf,0.027531
2,0.001434,0.217816,0.217816,0.001434,-inf,-inf,-inf,,,,,-inf,-inf,-inf,-inf,-inf,0.043229
3,0.002630,0.109119,0.109119,0.002630,-inf,-inf,-inf,,,,,-inf,-inf,-inf,-inf,-inf,0.024201
11,0.001979,0.264455,0.264455,0.001979,-inf,-inf,-inf,,,,,-inf,-inf,-inf,-inf,-inf,0.029337
18,0.001926,0.368913,0.368913,0.001926,-inf,-inf,-inf,,,,,-inf,-inf,-inf,-inf,-inf,0.036380
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5349,0.001966,0.336584,0.336584,0.001966,-inf,-inf,-inf,,,,,-inf,-inf,-inf,-inf,-inf,0.073225
5350,0.002545,0.163913,0.163913,0.002545,-inf,-inf,-inf,,,,,-inf,-inf,-inf,-inf,-inf,0.026960
5353,0.002452,0.184117,0.184117,0.002452,-inf,-inf,-inf,,,,,-inf,-inf,-inf,-inf,-inf,0.019726
5357,0.002540,0.181818,0.181818,0.002540,-inf,-inf,-inf,,,,,-inf,-inf,-inf,-inf,-inf,0.022003


In [98]:
# merge savings onto master (with features)
rel_diff_with_master = pd.merge(left=rel_diff, right=master, how="left", left_index=True, right_index=True)
rel_diff_with_master.head()

rel_diff_with_master["std_by_mean"] = rel_diff_with_master["std_kw"] / rel_diff_with_master["mean_load_kw"]
rel_diff_with_master["std_by_peak"] = rel_diff_with_master["std_kw"] / rel_diff_with_master["peak_load_kw"]
rel_diff_with_master["peak_by_mean"] = rel_diff_with_master["peak_load_kw"] / rel_diff_with_master["mean_load_kw"]

## Total savings

In [99]:
abs_yearly_savings = abs_diff["total_yearly_costs_eur"].copy()
abs_yearly_savings.name = "Total yearly savings in eur"
print(abs_yearly_savings.describe().drop("count").to_markdown())

fig_df = abs_diff.copy()
fig_df = fig_df.rename(columns={"total_yearly_costs_eur": "Savings"})
fig = px.box(
    data_frame=fig_df,
    x=["Savings"],
    title="Total yearly savings")
fig.update_layout(xaxis_title="Total yearly savings in €", yaxis_title="")
fig.update_xaxes(range=[0, 15e3])
fig.show()

|      |   Total yearly savings in eur |
|:-----|------------------------------:|
| mean |                       5562.79 |
| std  |                      16297.7  |
| min  |                          0    |
| 25%  |                       1051.1  |
| 50%  |                       2468.1  |
| 75%  |                       5465.09 |
| max  |                     514178    |


In [100]:
rel_yearly_savings = 100 * rel_diff["total_yearly_costs_eur"].copy()
rel_yearly_savings.name = "Relative yearly savings in %"
print(rel_yearly_savings.describe().drop("count").to_markdown())
print("-----------------------")
n_profiles_1pct_yearly_savings = len(rel_diff[rel_diff["total_yearly_costs_eur"] > 0.01])
print(f"Number of profiles with more than 1% yearly savings: {n_profiles_1pct_yearly_savings}")
perc_profiles_1pct_yearly_savings = (n_profiles_1pct_yearly_savings / len(rel_diff)) * 100
print(f"Percentage of profiles with more than 1% yearly savings: {perc_profiles_1pct_yearly_savings:.2f} %")
n_profiles_2pct_yearly_savings = len(rel_diff[rel_diff["total_yearly_costs_eur"] > 0.02])
print(f"Number of profiles with more than 2% yearly savings: {n_profiles_2pct_yearly_savings}")
perc_profiles_2pct_yearly_savings = (n_profiles_2pct_yearly_savings / len(rel_diff)) * 100
print(f"Percentage of profiles with more than 2% yearly savings: {perc_profiles_2pct_yearly_savings:.2f} %")
n_profiles_3pct_yearly_savings = len(rel_diff[rel_diff["total_yearly_costs_eur"] > 0.03])
print(f"Number of profiles with more than 3% yearly savings: {n_profiles_3pct_yearly_savings}")
perc_profiles_3pct_yearly_savings = (n_profiles_3pct_yearly_savings / len(rel_diff)) * 100
print(f"Percentage of profiles with more than 3% yearly savings: {perc_profiles_3pct_yearly_savings:.2f} %")

fig_df = pd.DataFrame()
fig_df = rel_diff.copy() * 100
fig_df = fig_df.rename(columns={"total_yearly_costs_eur": "Savings"})
fig = px.box(
    data_frame=fig_df,
    x="Savings",
    title="Relative yearly savings")
fig.update_layout(xaxis_title="Relative yearly savings in %", yaxis_title="")
fig.update_xaxes(range=[0, 15])
fig.show()

|      |   Relative yearly savings in % |
|:-----|-------------------------------:|
| mean |                       4.17191  |
| std  |                       4.60534  |
| min  |                       0.244344 |
| 25%  |                       1.69312  |
| 50%  |                       2.93126  |
| 75%  |                       5.04405  |
| max  |                      75.0611   |
-----------------------
Number of profiles with more than 1% yearly savings: 1548
Percentage of profiles with more than 1% yearly savings: 90.63 %
Number of profiles with more than 2% yearly savings: 1151
Percentage of profiles with more than 2% yearly savings: 67.39 %
Number of profiles with more than 3% yearly savings: 835
Percentage of profiles with more than 3% yearly savings: 48.89 %


## Capacity costs savings

In [101]:
abs_cap_costs_yearly_savings = abs_diff["grid_capacity_costs_eur"].copy()
abs_cap_costs_yearly_savings.name = "Total yearly grid capacity costs savings in %"
print(abs_cap_costs_yearly_savings.describe().drop("count").to_markdown())

total_cap_saving_fig_df = abs_diff.copy()
total_cap_saving_fig_df = total_cap_saving_fig_df.rename(columns={"grid_capacity_costs_eur": "Savings"})
total_cap_savings_fig = px.box(
    data_frame=total_cap_saving_fig_df,
    x="Savings",
    title="Yearly capacity costs savings")
total_cap_savings_fig.update_layout(xaxis_title="Savings in €", yaxis_title="")
total_cap_savings_fig.update_xaxes(range=[0, 10e3])
total_cap_savings_fig.show()

|      |   Total yearly grid capacity costs savings in % |
|:-----|------------------------------------------------:|
| mean |                                  9489.26        |
| std  |                                 31444.6         |
| min  |                                     0           |
| 25%  |                                  1822.06        |
| 50%  |                                  3932.63        |
| 75%  |                                  9154.88        |
| max  |                                     1.06138e+06 |


In [102]:
rel_cap_costs_yearly_savings = 100 * rel_diff["grid_capacity_costs_eur"].copy()
rel_cap_costs_yearly_savings.name = "Relative yearly grid capacity costs savings in %"
print(rel_cap_costs_yearly_savings.describe().drop("count").to_markdown())

rel_cap_savings_fig_df = pd.DataFrame()
rel_cap_savings_fig_df = rel_diff.copy() * 100
rel_cap_savings_fig_df.rename(columns={"grid_capacity_costs_eur": "Savings"}, inplace=True)
rel_cap_savings_fig = px.box(
    data_frame=rel_cap_savings_fig_df,
    x="Savings",
    title="Relative yearly capacity cost savings")
rel_cap_savings_fig.update_layout(xaxis_title="Savings in %", yaxis_title="")
rel_cap_savings_fig.update_xaxes(range=[0, 75])
rel_cap_savings_fig.show()

|      |   Relative yearly grid capacity costs savings in % |
|:-----|---------------------------------------------------:|
| mean |                                           24.8865  |
| std  |                                           14.5741  |
| min  |                                            0.12364 |
| 25%  |                                           14.1304  |
| 50%  |                                           22.3893  |
| 75%  |                                           33.1419  |
| max  |                                           99.9732  |


# Correlations

## On absolute savings

In [103]:
cols_to_drop = [
    "grid_level",
    "zip_code",
    "sector_group_id",
    "sector_group",
    "solar_invest_eur",
    "solar_annuity_eur",
    "solar_capacity_kwp"]
abs_correlations_df = abs_diff_with_master.drop(columns=cols_to_drop).corr()
px.imshow(abs_correlations_df, title="Correlation coefficients for total yearly savings")

In [104]:
fig_df = abs_correlations_df[["total_yearly_costs_eur"]].round(2)
fig_df.sort_values("total_yearly_costs_eur", inplace=True, ascending=False)
fig = px.bar(
    data_frame=fig_df,
    y="total_yearly_costs_eur",
    text_auto=True,
    title="Correlation between different load profile characteristics and total yearly savings")
fig.update_layout(yaxis_title="Correlation coefficient", xaxis_title="Variable")

## On relative savings

In [108]:
cols_to_drop = [
    "grid_level",
    "zip_code",
    "sector_group_id",
    "sector_group",
    "solar_invest_eur",
    "solar_annuity_eur",
    "solar_capacity_kwp"]
rel_correlations_df = rel_diff_with_master.drop(columns=cols_to_drop).corr()
px.imshow(rel_correlations_df, title="Correlation coefficients for relative yearly savings")

In [109]:
fig_df = rel_correlations_df[["total_yearly_costs_eur"]].round(2)
fig_df.sort_values("total_yearly_costs_eur", inplace=True, ascending=False)
fig_df.dropna(inplace=True)
fig = px.bar(
    data_frame=fig_df,
    y="total_yearly_costs_eur",
    text_auto=True,
    title="Correlation between different load profile characteristics and relative yearly savings")
fig.update_layout(yaxis_title="Correlation coefficient", xaxis_title="Variable")

In [110]:
df = pd.DataFrame()
i = 0
for var in rel_correlations_df.index:
    if np.inf in rel_diff_with_master[var]:
        continue
    elif -np.inf in rel_diff_with_master[var]:
        continue
    elif np.nan in rel_diff_with_master[var]:
        continue
    elif rel_diff_with_master[var].isin([np.nan, np.inf, -np.inf]).any():
        continue
    if "storage" in var or "inverter" in var:
        continue

    corr, p_value = pearsonr(y=rel_diff_with_master["total_yearly_costs_eur"], x=rel_diff_with_master[var])
    df.loc[i, "var"] = var
    df.loc[i, "corr"] = corr
    df.loc[i, "p_value"] = p_value
    i += 1

df.sort_values("corr", ascending=False, ignore_index=True)


An input array is constant; the correlation coefficient is not defined.



Unnamed: 0,var,corr,p_value
0,total_yearly_costs_eur,1.0,0.0
1,grid_capacity_costs_eur,0.770037,0.0
2,grid_capacity_kw,0.770037,0.0
3,std_by_mean,0.46984,1.63474e-94
4,peak_by_mean,0.435124,7.742472e-80
5,capacity_price_over_2500h_eur_per_kw,0.321727,1.986941e-42
6,energy_price_under_2500h_eur_per_kwh,0.301869,2.556074e-37
7,capacity_price_under_2500h_eur_per_kw,0.137331,1.208779e-08
8,energy_price_over_2500h_eur_per_kwh,0.046163,0.05646211
9,number_of_peak_load,-0.030318,0.2104443


## Scatterplots

In [None]:
cap_vs_savings_fig_df = rel_diff_with_master.copy()
cap_vs_savings_fig_df.loc[cap_vs_savings_fig_df["is_over_2500h"], "capacity_price_eur"] = cap_vs_savings_fig_df.loc[cap_vs_savings_fig_df["is_over_2500h"], "capacity_price_over_2500h_eur_per_kw"].copy()
cap_vs_savings_fig_df.loc[~cap_vs_savings_fig_df["is_over_2500h"], "capacity_price_eur"] = cap_vs_savings_fig_df.loc[~cap_vs_savings_fig_df["is_over_2500h"], "capacity_price_under_2500h_eur_per_kw"].copy()
cap_vs_savings_fig_df["total_yearly_costs_eur"] *= 100
cap_vs_savings_fig_df = cap_vs_savings_fig_df[cap_vs_savings_fig_df.index.isin(profiles_using_storage.index)]
cap_vs_savings_fig_df = cap_vs_savings_fig_df[cap_vs_savings_fig_df["std_by_mean"] < 2]
# cap_vs_savings_fig_df = cap_vs_savings_fig_df[cap_vs_savings_fig_df["capacity_price_eur"] < 200]

cap_vs_savings_fig = px.scatter(
    data_frame=cap_vs_savings_fig_df,
    x="std_by_mean",
    y="total_yearly_costs_eur",
    title="Grid capacity prices vs relative yearly savings",
    color="is_over_2500h",
    trendline="ols",
    trendline_color_override="black")
cap_vs_savings_fig.update_xaxes(title="Grid capacity costs in eur / kW")
cap_vs_savings_fig.update_yaxes(title="Relative yearly savings in %")
cap_vs_savings_fig.update_layout(showlegend=False)
cap_vs_savings_fig.show()