# First notebook to code fitting of parametric forecasts

In [1]:
from create_parametric_forecasts import ParametricForecasts
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [3]:
path_to_quantile_fc = 'C:\\Users\\fh6281\\PycharmProjects\\GermanBuildingDate\\02_forecast\\storage_quantile_fc\\file_fc_PatchTST_SFH3_2025-07-16_13-47-24_freq15.csv'
pf = ParametricForecasts()
pf.load_quantile_forecasts(path_to_quantile_fc)
#pf.quantile_forecasts

In [5]:
pf.sort_quantiles()
#pf.quantile_forecasts

Sorting quantiles in ascending order...


# Playground

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.interpolate import interp1d
from sklearn.mixture import GaussianMixture
from scipy import stats
from scipy.stats import norm
import os

In [2]:
#path = 'C:\\Users\\fh6281\\PycharmProjects\\GermanBuildingDate\\03_optimization\\file_fc_PatchTST_SFH3_2025-07-16_13-47-24_freq15.csv'
path = 'C:\\Users\\fh6281\\PycharmProjects\\GermanBuildingDate\\02_forecast\\storage_quantile_fc\\file_fc_PatchTST_SFH3_2025-07-16_13-47-24_freq15.csv'

In [3]:
class ParametricForecasts:
    def __init__(self):
        """
        Initialize the ParametricForecasts class.

        Args:
            path (str): Path to the file containing quantile forecasts.
            distribution (str): Type of distribution to fit ('normal' or 'sum2gaussian').
        """
        self.quantile_forecasts = None
        self.param_forecasts = None
        self.csv_path = None
        self.implemented_distributions = ['sum2gaussian']


    def load_quantile_forecasts(self, csv_path, timerange=None):
        """Load quantile forecasts from the specified path and group them according to their creation timestamp."""

        self.csv_path = csv_path

        # 1) Load CSV, parse timestamps
        df = pd.read_csv(
            csv_path,
            parse_dates=['timestamp','time_fc_created'],
            index_col='timestamp'          # if you want 'timestamp' as the DataFrame index
        )

        # Change the order of the columns to have P_TOT before quantiles
        cols = df.columns.tolist()
    
        cols = ['building', 'P_TOT'] + [col for col in cols if col not in  ['building', 'P_TOT']]
        df = df[cols]

        # remove the quantile_ prefix from the quantile columns
        df.columns = df.columns.str.replace('quantile_', '', regex=False)

        # 2) Filter by timerange if provided. Keep all rows where 'time_fc_created' is within the specified range.
        if timerange is not None:
            start_time, end_time = pd.to_datetime(timerange[0]), pd.to_datetime(timerange[1])
            df = df[(df['time_fc_created'] >= start_time) & (df['time_fc_created'] <= end_time)]

        
        
        # 2) Group by the forecast‐creation time
        groups = {
            created_time: group.copy()
            for created_time, group in df.groupby('time_fc_created')
        }
        
        self.quantile_forecasts = {}

        for created_time, subdf in groups.items():
            subdf = subdf.drop(columns=['time_fc_created'])
            self.quantile_forecasts[created_time] = subdf





    def fit_distribution(self, name):
        """Fit the specified distribution to the quantile forecasts."""
        if self.quantile_forecasts is None:
            raise ValueError("Quantile forecasts not loaded. Call load_quantile_forecasts() first.")
        
        if name not in self.implemented_distributions:
            raise ValueError(f"Distribution '{name}' is not implemented. Available distributions: {self.implemented_distributions}")
        
        if name == 'sum2gaussian':
            self.fit_sum2gaussian()

    
    def fit_sum2gaussian(self):

        self.param_forecasts = {}

        for created_time, subdf in self.quantile_forecasts.items():
            
            # get a subdf excluding the 'building' and 'P_TOT' columns
            df_quantiles = subdf.drop(columns=['building', 'P_TOT'])
            quantile_probabilites = df_quantiles.columns.astype(float)


            self.param_forecasts[created_time] = pd.DataFrame(index=df_quantiles.index, columns=['w1', 'mu1', 'std1', 'w2', 'mu2', 'std2'])   

            for t, quants in df_quantiles.iterrows():
                # Step 1: Create an interpolator to map continuous probabilities to continuous values
                inv_cdf = interp1d(quantile_probabilites, quants, kind='linear', fill_value='extrapolate')

                # Step 2: Generate synthethic samples from the inverse CDF via interpolation
                np.random.seed(42)
                synthetic_probs = np.random.uniform(0.0, 1.0, 10000)
                synthetic_values = inv_cdf(synthetic_probs)

                # Step 3: Fit Gaussian Mixture Model (GMM) to the synthetic samples
                gmm = GaussianMixture(n_components=2, random_state=42, covariance_type='full')
                gmm.fit(synthetic_values.reshape(-1, 1))

                # Step 4: Store the GMM parameters in a DataFrame
                self.param_forecasts[created_time].loc[t, 'w1'] = gmm.weights_[0]
                self.param_forecasts[created_time].loc[t, 'mu1'] = gmm.means_[0, 0]
                self.param_forecasts[created_time].loc[t, 'std1'] = np.sqrt(gmm.covariances_[0, 0, 0]) # Transform covariance to standard deviation
                self.param_forecasts[created_time].loc[t, 'w2'] = gmm.weights_[1]
                self.param_forecasts[created_time].loc[t, 'mu2'] = gmm.means_[1, 0]
                self.param_forecasts[created_time].loc[t, 'std2'] = np.sqrt(gmm.covariances_[1, 0, 0]) # Transform covariance to standard deviation


    def store_parametric_forecasts(self):
        """Store the parametric forecasts to a CSV file."""
        if self.param_forecasts is None:
            raise ValueError("Parametric forecasts not generated. Call fit_distribution() first.")
        
        # Convert the dictionary of DataFrames to a single DataFrame
        if not self.param_forecasts:
            raise ValueError("No parametric forecasts to save.")
        combined_df = pd.concat(self.param_forecasts, axis=0)
        combined_df.index = combined_df.index.set_names(['time_fc_created', 'timestamp'])

        # add curent timestamp to the filename
        current_time = pd.Timestamp.now().strftime('%Y-%m-%d_%H-%M')
        # Create the filename and directory based on the original CSV path
        filename = os.path.basename(self.csv_path).replace('file_fc', 'file_fc_parametric')
        filename = filename.replace('.csv', f'_CreationTime{current_time}.csv')
        directory = os.path.dirname(self.csv_path).replace('storage_quantile_fc', 'storage_param_fc')
        
        # Save to CSV
        combined_df.to_csv(directory + '/' + filename, index=True)
        print(f"Parametric forecasts saved to {directory}")


      


    

    def sort_quantiles(self):
        """Sort the quantiles in ascending order."""
        for created_time, subdf in self.quantile_forecasts.items():
            df_quantiles = subdf.filter(regex='^quantile_')
            for index, row in df_quantiles.iterrows():
                sorted_row = row.sort_values()
                # Update the DataFrame with the sorted values
                self.quantile_forecasts[created_time].loc[index, df_quantiles.columns] = sorted_row.values
                


In [4]:
pf = ParametricForecasts()
pf.load_quantile_forecasts(path, timerange=None)  # ['2020-05-13 06:15:00+00:00', '2020-05-13 06:45:00+00:00']
#pf.quantile_forecasts[list(pf.quantile_forecasts.keys())[0]]
len(pf.quantile_forecasts)


28

In [5]:
pf.sort_quantiles()
pf.quantile_forecasts[list(pf.quantile_forecasts.keys())[0]]


Unnamed: 0_level_0,building,P_TOT,0.01,0.02,0.03,0.04,0.05,0.06,0.07,0.08,...,0.9,0.91,0.92,0.93,0.94,0.95,0.96,0.97,0.98,0.99
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-05-13 06:15:00+00:00,SFH3,-1658.738249,-5333.4053,-9732.7310,-5029.9000,-2910.13800,-3703.5598,-3728.15230,-3924.83940,-2612.71340,...,384.411070,31.47174,1920.61990,1403.065700,2383.66850,691.87700,1599.12700,719.84920,8815.102000,10281.54200
2020-05-13 06:30:00+00:00,SFH3,-2056.013586,-1768.7485,-2766.3655,-2560.3484,-1355.18070,-2160.4640,-2018.36990,-1652.48510,-2017.19750,...,654.816350,-787.88430,-556.90015,-580.514800,-1185.79460,-502.83157,-1004.80530,848.20917,-675.653800,1008.09894
2020-05-13 06:45:00+00:00,SFH3,-2435.954031,-2269.1010,-2160.1040,-2892.4263,-2884.04100,-2881.7776,-2952.48360,-2532.22460,-3214.89580,...,-420.227480,-1035.40670,-1801.16540,-709.137700,51.53711,-1335.70650,-1143.94470,-1063.50120,-917.387450,-694.37463
2020-05-13 07:00:00+00:00,SFH3,-2834.008850,-2347.1530,-3734.8027,-2770.1733,-3707.12960,-3529.8820,-2592.61080,-3067.35400,-2807.21530,...,201.459050,-246.04010,-1072.68190,-792.116300,-1296.43710,-797.60370,38.60388,-1082.78750,-503.205540,-326.76550
2020-05-13 07:15:00+00:00,SFH3,-2879.560260,-3383.9230,-3857.6372,-2948.1330,-2235.61060,-3134.1950,-3068.44120,-3012.17000,-2979.26500,...,-930.008600,-855.26490,-849.18740,265.528260,-1605.87020,-1232.22020,-311.61304,-667.97840,-510.433470,-661.55930
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-05-14 05:00:00+00:00,SFH3,-369.257306,-1466.3691,-1487.0508,-1729.5812,68.09076,-1041.0454,-1156.62010,-223.42358,-245.27423,...,-31.522156,369.11760,525.89655,1736.426300,820.01630,1259.02610,1352.46190,817.24750,1269.591100,2504.01590
2020-05-14 05:15:00+00:00,SFH3,-567.394781,-1434.6311,-791.1844,-1999.3706,-654.70496,-1074.5382,-637.51886,-884.26420,-1164.47790,...,1370.276400,1135.12920,1207.01610,1197.408000,1036.99460,177.54901,1825.08960,1059.61570,637.388100,27.61139
2020-05-14 05:30:00+00:00,SFH3,-786.521705,-1705.6101,-1262.1174,-1684.9755,-1128.39940,-1096.8840,-578.22314,-1564.87700,-2073.99200,...,-583.715000,302.24982,469.84406,229.154360,1613.62550,1197.13870,1325.82230,1871.42040,42.617554,78.34674
2020-05-14 05:45:00+00:00,SFH3,-976.972762,-1023.2340,-912.8257,-1451.9517,-1890.09800,-1442.3835,-1190.54540,-1045.54250,-1661.20170,...,672.944030,129.52972,-136.20905,103.679750,577.37054,511.22772,410.72820,527.03436,369.650200,225.88800


In [6]:
pf.quantile_forecasts

{Timestamp('2020-05-13 06:15:00+0000', tz='UTC'):                           building        P_TOT       0.01       0.02  \
 timestamp                                                               
 2020-05-13 06:15:00+00:00     SFH3 -1658.738249 -5333.4053 -9732.7310   
 2020-05-13 06:30:00+00:00     SFH3 -2056.013586 -1768.7485 -2766.3655   
 2020-05-13 06:45:00+00:00     SFH3 -2435.954031 -2269.1010 -2160.1040   
 2020-05-13 07:00:00+00:00     SFH3 -2834.008850 -2347.1530 -3734.8027   
 2020-05-13 07:15:00+00:00     SFH3 -2879.560260 -3383.9230 -3857.6372   
 ...                            ...          ...        ...        ...   
 2020-05-14 05:00:00+00:00     SFH3  -369.257306 -1466.3691 -1487.0508   
 2020-05-14 05:15:00+00:00     SFH3  -567.394781 -1434.6311  -791.1844   
 2020-05-14 05:30:00+00:00     SFH3  -786.521705 -1705.6101 -1262.1174   
 2020-05-14 05:45:00+00:00     SFH3  -976.972762 -1023.2340  -912.8257   
 2020-05-14 06:00:00+00:00     SFH3 -1314.484946 -2416.3845  -9

In [7]:
pf.fit_distribution('sum2gaussian')

In [8]:
pf.store_parametric_forecasts()

Parametric forecasts saved to C:\Users\fh6281\PycharmProjects\GermanBuildingDate\02_forecast\storage_param_fc


In [None]:
pf.param_forecasts

{Timestamp('2020-05-13 06:15:00+0000', tz='UTC'):                                  w1          mu1         std1        w2  \
 timestamp                                                                 
 2020-05-13 06:15:00+00:00  0.977438 -1447.802011  1615.415259  0.022562   
 2020-05-13 06:30:00+00:00   0.42179  -1892.43233   594.428184   0.57821   
 2020-05-13 06:45:00+00:00  0.628727 -2016.347908   528.521881  0.371273   
 2020-05-13 07:00:00+00:00  0.452051  -2488.93251   614.093255  0.547949   
 2020-05-13 07:15:00+00:00  0.487155 -2535.414973   591.649503  0.512845   
 ...                             ...          ...          ...       ...   
 2020-05-14 05:00:00+00:00  0.465875  -720.759799   615.750548  0.534125   
 2020-05-14 05:15:00+00:00   0.65571  -510.414824   476.572055   0.34429   
 2020-05-14 05:30:00+00:00  0.381847 -1039.381624   448.695222  0.618153   
 2020-05-14 05:45:00+00:00  0.461972 -1251.619326   431.433071  0.538028   
 2020-05-14 06:00:00+00:00  0.420933 -1

In [33]:
pf.param_forecasts[list(pf.param_forecasts.keys())[0]]

Unnamed: 0_level_0,w1,mu1,std1,w2,mu2,std2
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2020-05-13 06:15:00+00:00,0.977438,-1447.802011,1615.415259,0.022562,9440.699431,1804.178269
2020-05-13 06:30:00+00:00,0.42179,-1892.43233,594.428184,0.57821,-903.770641,774.227828
2020-05-13 06:45:00+00:00,0.628727,-2016.347908,528.521881,0.371273,-867.983009,501.641421
2020-05-13 07:00:00+00:00,0.452051,-2488.93251,614.093255,0.547949,-1209.40357,570.186332
2020-05-13 07:15:00+00:00,0.487155,-2535.414973,591.649503,0.512845,-1149.970353,504.47391
...,...,...,...,...,...,...
2020-05-14 05:00:00+00:00,0.465875,-720.759799,615.750548,0.534125,388.110894,746.178037
2020-05-14 05:15:00+00:00,0.65571,-510.414824,476.572055,0.34429,678.169935,541.980224
2020-05-14 05:30:00+00:00,0.381847,-1039.381624,448.695222,0.618153,163.042375,601.072206
2020-05-14 05:45:00+00:00,0.461972,-1251.619326,431.433071,0.538028,68.760378,448.384554


In [None]:
# Test the data structure
first_forecast = list(pf.quantile_forecasts.values())[0]
print("Column names:", first_forecast.columns.tolist())
print("\nQuantile columns:", first_forecast.filter(regex='^quantile_').columns.tolist())

In [10]:
pf.quantile_forecasts

{Timestamp('2020-05-13 06:15:00+0000', tz='UTC'):                           building        P_TOT  quantile_0.01  quantile_0.02  \
 timestamp                                                                       
 2020-05-13 06:15:00+00:00     SFH3 -1658.738249     -9732.7310     -5333.4053   
 2020-05-13 06:30:00+00:00     SFH3 -2056.013586     -3307.4202     -2990.7502   
 2020-05-13 06:45:00+00:00     SFH3 -2435.954031     -4045.3870     -3214.8958   
 2020-05-13 07:00:00+00:00     SFH3 -2834.008850     -3734.8027     -3707.1296   
 2020-05-13 07:15:00+00:00     SFH3 -2879.560260     -4196.6133     -3857.6372   
 ...                            ...          ...            ...            ...   
 2020-05-14 05:00:00+00:00     SFH3  -369.257306     -1933.5383     -1755.6257   
 2020-05-14 05:15:00+00:00     SFH3  -567.394781     -1999.3706     -1761.6089   
 2020-05-14 05:30:00+00:00     SFH3  -786.521705     -2073.9920     -1751.6750   
 2020-05-14 05:45:00+00:00     SFH3  -976.972762 

In [None]:
import pandas as pd

def load_and_split_by_creation(csv_path: str):
    # 1) Load CSV, parse timestamps
    df = pd.read_csv(
        csv_path,
        parse_dates=['timestamp','time_fc_created'],
        index_col='timestamp'          # if you want 'timestamp' as the DataFrame index
    )
    
    # 2) Group by the forecast‐creation time
    groups = {
        created_time: group.copy()
        for created_time, group in df.groupby('time_fc_created')
    }
    return groups

# ─── Example usage ─────────────────────────────────────────────────────────────

csv_file = path
by_creation = load_and_split_by_creation(csv_file)

# Suppose you want to inspect the forecast created at a particular time:
fc_splitted = {}
for created_time, subdf in by_creation.items():
    # print("Forecast created at:", created_time)
    # print(subdf.head(), "\n")
    # drop the 'time_fc_created' column from each subdf
    subdf = subdf.drop(columns=['time_fc_created'])
    fc_splitted[created_time] = subdf


In [None]:
fc_splitted

{Timestamp('2020-05-13 06:15:00+0000', tz='UTC'):                           building  quantile_0.01  quantile_0.02  \
 timestamp                                                          
 2020-05-13 06:15:00+00:00     SFH3     -5333.4053     -9732.7310   
 2020-05-13 06:30:00+00:00     SFH3     -1768.7485     -2766.3655   
 2020-05-13 06:45:00+00:00     SFH3     -2269.1010     -2160.1040   
 2020-05-13 07:00:00+00:00     SFH3     -2347.1530     -3734.8027   
 2020-05-13 07:15:00+00:00     SFH3     -3383.9230     -3857.6372   
 ...                            ...            ...            ...   
 2020-05-14 05:00:00+00:00     SFH3     -1466.3691     -1487.0508   
 2020-05-14 05:15:00+00:00     SFH3     -1434.6311      -791.1844   
 2020-05-14 05:30:00+00:00     SFH3     -1705.6101     -1262.1174   
 2020-05-14 05:45:00+00:00     SFH3     -1023.2340      -912.8257   
 2020-05-14 06:00:00+00:00     SFH3     -2416.3845      -991.0830   
 
                            quantile_0.03  quantile_