In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import torch
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import pickle
import pandas as pd
import numpy as np
import os
import json

In [2]:
def get_exact_time_lag_fast(df, value_column, timestamp_column='timestamp_utc', lag_hours=168):
    """
    Optimized version of get_exact_time_lag using vectorized operations.
    
    Parameters:
    -----------
    df : pandas.DataFrame
        DataFrame containing the time series data
    value_column : str
        Name of the column containing values to be lagged
    timestamp_column : str
        Name of the column containing timestamps
    lag_hours : int
        Number of hours to look back for the lag
    
    Returns:
    --------
    pandas.Series
        Series containing the lagged values
    """
    # Calculate target timestamps
    target_timestamps = df[timestamp_column] - pd.Timedelta(hours=lag_hours)
    
    # Create a merged dataframe to find matches
    reference_df = pd.DataFrame({
        'reference_time': df[timestamp_column],
        'value': df[value_column]
    }).sort_values('reference_time')
    
    # Use searchsorted to find the insertion points
    idx = np.searchsorted(reference_df['reference_time'], target_timestamps, side='right') - 1
    
    # Handle cases where idx is -1 (target time before any reference time)
    idx = np.where(idx < 0, 0, idx)
    
    # Get the matched values
    return reference_df['value'].iloc[idx].values

In [3]:
df_bbidding = pd.read_csv('bidding_training.csv')
df_bbidding["day_ahead_price"] = df_bbidding["price_x"].rename("day_ahead_price")
df_bbidding["market_price"] = df_bbidding["price_y"].rename("market_price")
df_bbidding["timestamp_utc"] = pd.to_datetime(df_bbidding["timestamp_utc"])

In [4]:
# Modified code to apply to your DataFrame
df_bbidding1 = df_bbidding[["timestamp_utc","market_price","day_ahead_price","volume",
                           "settlement_period","cos_hour","cos_day","1","2","3","4","5",
                           "6","7","8","9","imbalance_price"]].copy()

lag_configs = {
    "market_price_lag96h": ("market_price", 96),
    "imbalance_price_lag96h": ("imbalance_price", 96),
    "volume_lag96h": ("volume", 96),
    "day_ahead_price_lag1week": ("day_ahead_price", 168)
}

for new_col, (source_col, hours) in lag_configs.items():
    df_bbidding1[new_col] = get_exact_time_lag_fast(
        df_bbidding1, 
        value_column=source_col,
        lag_hours=hours
    )

In [5]:
# dfbidding1 drop first week
df_bbidding1 = df_bbidding1.loc[df_bbidding1["timestamp_utc"] > "2021-02-26 23:00:00+00:00"]

In [6]:
# Original dataset
X = df_bbidding1[["market_price_lag96h","imbalance_price_lag96h","day_ahead_price_lag1week","volume_lag96h",
                  "cos_hour","cos_day","1","2","3","4","5","6","7","8","9"]].values
y = df_bbidding1["day_ahead_price"].values  # Convert to numpy

# Step 1: Split into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Step 2: Further split the training set into train and validation sets (80% train, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, shuffle=False)

# Step 3: Standardize the data (use only training data to fit the scaler)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [7]:
import torch
import torch.nn as nn

# Define the MLP model
class MLP(nn.Module):
    def __init__(self, input_dim):
        super(MLP, self).__init__()
        # Layer sizes from the best trial
        layer_sizes = [256, 448, 192, 96]
        dropout_rates = [0.12338360578207397, 0.2192742565593194, 0.15708417985889997, 0.253419888887539]

        # Define the layers
        self.fc1 = nn.Linear(input_dim, layer_sizes[0])
        self.fc2 = nn.Linear(layer_sizes[0], layer_sizes[1])
        self.fc3 = nn.Linear(layer_sizes[1], layer_sizes[2])
        self.fc4 = nn.Linear(layer_sizes[2], layer_sizes[3])
        self.fc5 = nn.Linear(layer_sizes[3], 1)  # Output layer

        # Dropouts
        self.dropout1 = nn.Dropout(dropout_rates[0])
        self.dropout2 = nn.Dropout(dropout_rates[1])
        self.dropout3 = nn.Dropout(dropout_rates[2])
        self.dropout4 = nn.Dropout(dropout_rates[3])

        # Activation function (Swish)
        self.swish = nn.SiLU()
    
    def forward(self, x):
        x = torch.relu(self.fc1(x))  # First layer with ReLU
        x = self.dropout1(x)         # First dropout
        x = self.swish(self.fc2(x))  # Second layer with Swish
        x = self.dropout2(x)         # Second dropout
        x = self.swish(self.fc3(x))  # Third layer with Swish
        x = self.dropout3(x)         # Third dropout
        x = self.swish(self.fc4(x))  # Fourth layer with Swish
        x = self.dropout4(x)         # Fourth dropout
        x = self.fc5(x)              # Output layer (no activation for raw outputs)
        return x



In [8]:
# y_train = y_train.values
# y_test = y_test.values

In [9]:
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

# Step 4: Create TensorDataset and DataLoader for training, validation, and testing
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# DataLoader
train_loader = DataLoader(dataset=train_dataset, batch_size=32, shuffle=False)
val_loader = DataLoader(dataset=val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=32, shuffle=False)

# Step 5: Initialize the model, loss function, and optimizer
input_dim = X_train_tensor.shape[1]
model = MLP(input_dim)
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [10]:
import torch.nn as nn

# Define MAE loss
mae_loss = nn.L1Loss()

# Training parameters
num_epochs = 500
patience = 15  # Number of epochs to wait for improvement
min_relative_improvement = 0.000001  # 0.1% improvement threshold
best_val_loss = float('inf')
early_stopping_counter = 0

# Lists to store metrics
train_losses = []
val_losses = []
test_losses = []

for epoch in range(num_epochs):
    # Training phase
    model.train()
    epoch_loss = 0
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = mae_loss(outputs, y_batch)  # Use MAE loss
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    
    avg_train_loss = epoch_loss / len(train_loader)
    train_losses.append(avg_train_loss)
    
    # Validation phase
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val_tensor)
        val_loss = mae_loss(val_outputs, y_val_tensor).item()  # Use MAE loss
        val_losses.append(val_loss)
        
        test_outputs = model(X_test_tensor)
        test_loss = mae_loss(test_outputs, y_test_tensor).item()  # Use MAE loss
        test_losses.append(test_loss)
    
    # Early stopping and relative improvement check
    if val_loss < best_val_loss:
        relative_improvement = (best_val_loss - val_loss) / best_val_loss
        if relative_improvement < min_relative_improvement and epoch > 10:
            print(f"Stopping due to small relative improvement: {relative_improvement:.6f}")
            break
        
        best_val_loss = val_loss
        early_stopping_counter = 0
        # Save best model
        torch.save(model.state_dict(), 'MLP_day_ahead_price.pth')

    else:
        early_stopping_counter += 1
    
    # Early stopping check
    if early_stopping_counter >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs")
        break
    
    # Print progress
    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], "
              f"Train Loss: {avg_train_loss:.4f}, "
              f"Val Loss: {val_loss:.4f}, "
              f"Test Loss: {test_loss:.4f}")

# Load best model after training
model.load_state_dict(torch.load('MLP_day_ahead_price.pth'))

# Final evaluation
model.eval()
with torch.no_grad():
    final_test_outputs = model(X_test_tensor)
    final_test_loss = mae_loss(final_test_outputs, y_test_tensor).item()  # Use MAE loss
print(f"Final Test Loss: {final_test_loss:.4f}")


Epoch [10/500], Train Loss: 40.8978, Val Loss: 54.1151, Test Loss: 79.2119
Epoch [20/500], Train Loss: 39.5800, Val Loss: 54.2998, Test Loss: 68.1533
Epoch [30/500], Train Loss: 38.6693, Val Loss: 52.6772, Test Loss: 62.1616
Epoch [40/500], Train Loss: 38.0505, Val Loss: 53.8031, Test Loss: 58.2042
Early stopping triggered after 50 epochs
Final Test Loss: 56.1786


  model.load_state_dict(torch.load('MLP_day_ahead_price.pth'))


In [11]:
current_dir = os.getcwd()

path_df = os.path.abspath(os.path.join(current_dir, '..', 'basic_files'))
df_total_solar = pd.read_csv(os.path.join(path_df, 'solar_total_production.csv'))
df_total_solar.generation_mw = df_total_solar.generation_mw *0.5
df_total_wind = pd.read_csv(os.path.join(path_df, 'wind_total_production.csv'))
df_total_wind.generation_mw = df_total_wind.generation_mw *0.5 - df_total_wind.boa
df_imbalance_price = pd.read_csv(os.path.join(path_df, 'imbalance_price.csv'))
df_day_ahead_price = pd.read_csv(os.path.join(path_df, 'day_ahead_price.csv'))
df_market_price = pd.read_csv(os.path.join(path_df, 'market_index.csv'))

# Get the path to the 'logs' directory in the parent directory
path = os.path.abspath(os.path.join(current_dir, '..', 'logs'))
files = os.listdir(path)
txt_files = [file for file in files if file.endswith('.txt')]
data = []
for file in txt_files:
    with open(os.path.join(path, file), 'r') as f:
        try:
            json_data = json.load(f)
            data.append(json_data)
        except json.JSONDecodeError:
            print(f"Failed to decode JSON from file: {file}")
date_name = []
for i in range(len(data)):
    date_name.append(data[i]["prediction_date"])


In [12]:
# Extrahiere Daten
dataframe_list = []

for entry in data:
    prediction_date = entry['prediction_date']
    
    # Iteriere durch jedes 'submission' Element
    for submission in entry['solution']['submission']:
        timestamp = submission['timestamp']
        probabilistic_forecast = submission['probabilistic_forecast']
        
        # Extrahiere die Werte von 'probabilistic_forecast' und füge sie der Liste hinzu
        row = {
            'prediction_date': prediction_date,
            'timestamp': timestamp,
            '1': probabilistic_forecast.get('10', None),
            '2': probabilistic_forecast.get('20', None),
            '3': probabilistic_forecast.get('30', None),
            '4': probabilistic_forecast.get('40', None),
            '5': probabilistic_forecast.get('50', None),
            '6': probabilistic_forecast.get('60', None),
            '7': probabilistic_forecast.get('70', None),
            '8': probabilistic_forecast.get('80', None),
            '9': probabilistic_forecast.get('90', None)
        }
        dataframe_list.append(row)

# Erstelle DataFrame
df_api_new = pd.DataFrame(dataframe_list)
df_api_new = df_api_new.groupby("timestamp").last().reset_index()
df_api_new

Unnamed: 0,timestamp,prediction_date,1,2,3,4,5,6,7,8,9
0,2024-10-03T22:00:00+00:00,2024-10-04,1355,487,963,1544,1330,867,326,1402,1357
1,2024-10-03T22:30:00+00:00,2024-10-04,788,584,1148,1320,1007,565,923,797,487
2,2024-10-03T23:00:00+00:00,2024-10-04,571,748,1001,1460,740,1017,1533,597,991
3,2024-10-03T23:30:00+00:00,2024-10-04,1476,1194,1512,334,1054,1572,744,1497,463
4,2024-10-04T00:00:00+00:00,2024-10-04,1352,1390,912,662,1505,1093,814,487,1340
...,...,...,...,...,...,...,...,...,...,...,...
909,2024-10-27T20:30:00+00:00,2024-10-27,278,337,389,433,480,518,562,610,645
910,2024-10-27T21:00:00+00:00,2024-10-27,250,337,390,439,477,516,547,586,618
911,2024-10-27T21:30:00+00:00,2024-10-27,284,368,406,458,489,502,546,567,596
912,2024-10-27T22:00:00+00:00,2024-10-27,309,389,429,457,483,493,505,517,548


In [13]:
from datetime import datetime, timedelta
df_api_new = df_api_new.rename(columns={
    "timestamp": "datetime"
})
df_api_new["datetime"] = pd.to_datetime(df_api_new["datetime"])
min_date = df_api_new.datetime.min() - timedelta(minutes=30)
datetimes = pd.date_range(end=min_date, periods=336, freq='30min')
datetimes = pd.date_range(end=min_date, periods=336, freq='30min')
df_half_hourly = pd.DataFrame({"datetime": datetimes})
df_half_hourly["datetime"] = pd.to_datetime(df_half_hourly["datetime"])
df_api_new = pd.merge(df_half_hourly, df_api_new, left_on='datetime', right_on='datetime', how='outer')
df_api_new

Unnamed: 0,datetime,prediction_date,1,2,3,4,5,6,7,8,9
0,2024-09-26 22:00:00+00:00,,,,,,,,,,
1,2024-09-26 22:30:00+00:00,,,,,,,,,,
2,2024-09-26 23:00:00+00:00,,,,,,,,,,
3,2024-09-26 23:30:00+00:00,,,,,,,,,,
4,2024-09-27 00:00:00+00:00,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...
1245,2024-10-27 20:30:00+00:00,2024-10-27,278.0,337.0,389.0,433.0,480.0,518.0,562.0,610.0,645.0
1246,2024-10-27 21:00:00+00:00,2024-10-27,250.0,337.0,390.0,439.0,477.0,516.0,547.0,586.0,618.0
1247,2024-10-27 21:30:00+00:00,2024-10-27,284.0,368.0,406.0,458.0,489.0,502.0,546.0,567.0,596.0
1248,2024-10-27 22:00:00+00:00,2024-10-27,309.0,389.0,429.0,457.0,483.0,493.0,505.0,517.0,548.0


In [14]:
df_day_ahead_price.timestamp_utc = pd.to_datetime(df_day_ahead_price.timestamp_utc)
df_market_price.timestamp_utc = pd.to_datetime(df_market_price.timestamp_utc)
df_imbalance_price.timestamp_utc = pd.to_datetime(df_imbalance_price.timestamp_utc)
df_api_new_merged = pd.merge(df_api_new,df_day_ahead_price, left_on='datetime', right_on='timestamp_utc', how='left')
df_api_new_merged = pd.merge(df_api_new_merged,df_market_price, left_on='datetime', right_on='timestamp_utc', how='left')
df_api_new_merged = pd.merge(df_api_new_merged,df_imbalance_price, left_on='datetime', right_on='timestamp_utc', how='left')
df_api_new_merged["day_ahead_price"] = df_api_new_merged["price_x"].rename("day_ahead_price")
df_api_new_merged["market_price"] = df_api_new_merged["price_y"].rename("market_price")
df_api_new_merged["settlement_period"] = df_api_new_merged["settlement_period_x"].rename("settlement_period")
df_api_new_merged["cos_hour"] = np.cos(2*np.pi*df_api_new_merged["datetime"].dt.hour/24)
df_api_new_merged["cos_day"] = np.cos(2*np.pi*df_api_new_merged["datetime"].dt.day/7)
df_api_new_merged

Unnamed: 0,datetime,prediction_date,1,2,3,4,5,6,7,8,...,price_y,volume,timestamp_utc,settlement_date,settlement_period,imbalance_price,day_ahead_price,market_price,cos_hour,cos_day
0,2024-09-26 22:00:00+00:00,,,,,,,,,,...,21.21,1485.30,2024-09-26 22:00:00+00:00,2024-09-26,47.0,68.929966,7.20,21.21,0.866025,-0.222521
1,2024-09-26 22:30:00+00:00,,,,,,,,,,...,3.46,1364.20,2024-09-26 22:30:00+00:00,2024-09-26,48.0,-28.480000,7.20,3.46,0.866025,-0.222521
2,2024-09-26 23:00:00+00:00,,,,,,,,,,...,3.15,1688.95,2024-09-26 23:00:00+00:00,2024-09-27,1.0,-19.020000,3.87,3.15,0.965926,-0.222521
3,2024-09-26 23:30:00+00:00,,,,,,,,,,...,-3.68,1661.10,2024-09-26 23:30:00+00:00,2024-09-27,2.0,-18.000000,3.87,-3.68,0.965926,-0.222521
4,2024-09-27 00:00:00+00:00,,,,,,,,,,...,-0.52,1824.60,2024-09-27 00:00:00+00:00,2024-09-27,3.0,56.450000,0.45,-0.52,1.000000,0.623490
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1254,2024-10-27 20:30:00+00:00,2024-10-27,278.0,337.0,389.0,433.0,480.0,518.0,562.0,610.0,...,,,NaT,,,,,,0.500000,0.623490
1255,2024-10-27 21:00:00+00:00,2024-10-27,250.0,337.0,390.0,439.0,477.0,516.0,547.0,586.0,...,,,NaT,,,,,,0.707107,0.623490
1256,2024-10-27 21:30:00+00:00,2024-10-27,284.0,368.0,406.0,458.0,489.0,502.0,546.0,567.0,...,,,NaT,,,,,,0.707107,0.623490
1257,2024-10-27 22:00:00+00:00,2024-10-27,309.0,389.0,429.0,457.0,483.0,493.0,505.0,517.0,...,,,NaT,,,,,,0.866025,0.623490


In [15]:
df_api_new_merged1 = df_api_new_merged[["datetime","market_price","day_ahead_price","volume","settlement_period","cos_hour","cos_day","1","2","3","4","5","6","7","8","9","imbalance_price"]].copy()
df_api_new_merged1.loc[:,"market_price_lag96h"] = df_api_new_merged1["market_price"].shift(192)
df_api_new_merged1.loc[:,"imbalance_price_lag96h"] = df_api_new_merged1["imbalance_price"].shift(192)
df_api_new_merged1.loc[:,"day_ahead_price_lag1week"] = df_api_new_merged1["day_ahead_price"].shift(336)
df_api_new_merged1.loc[:,"volume_lag96h"] = df_api_new_merged1["volume"].shift(192)
df_api_new_merged1.dropna(inplace=True)
df_api_new_merged1

Unnamed: 0,datetime,market_price,day_ahead_price,volume,settlement_period,cos_hour,cos_day,1,2,3,...,5,6,7,8,9,imbalance_price,market_price_lag96h,imbalance_price_lag96h,day_ahead_price_lag1week,volume_lag96h
336,2024-10-03 22:00:00+00:00,71.32,78.47,1322.45,47.0,0.866025,-0.900969,1355.0,487.0,963.0,...,1330.0,867.0,326.0,1402.0,1357.0,61.00,12.85,-1.4600,7.20,1457.25
337,2024-10-03 22:30:00+00:00,71.42,78.47,1035.65,48.0,0.866025,-0.900969,788.0,584.0,1148.0,...,1007.0,565.0,923.0,797.0,487.0,50.00,-3.95,-1.4600,7.20,1367.60
338,2024-10-03 23:00:00+00:00,66.94,71.36,1378.55,1.0,0.965926,-0.900969,571.0,748.0,1001.0,...,740.0,1017.0,1533.0,597.0,991.0,93.50,-2.74,5.0000,3.87,1473.20
339,2024-10-03 23:30:00+00:00,60.03,71.36,1539.90,2.0,0.965926,-0.900969,1476.0,1194.0,1512.0,...,1054.0,1572.0,744.0,1497.0,463.0,93.50,3.85,5.0000,3.87,1799.05
340,2024-10-04 00:00:00+00:00,65.21,72.80,1818.40,3.0,1.000000,-0.900969,1352.0,1390.0,912.0,...,1505.0,1093.0,814.0,487.0,1340.0,93.00,0.03,-2.4304,0.45,1689.75
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1009,2024-10-21 19:30:00+00:00,71.47,79.79,2221.80,42.0,0.258819,1.000000,263.0,343.0,406.0,...,482.0,501.0,514.0,547.0,590.0,65.00,86.49,71.5200,91.30,2070.40
1010,2024-10-21 20:00:00+00:00,87.87,77.99,2156.50,43.0,0.500000,1.000000,266.0,345.0,405.0,...,485.0,504.0,516.0,551.0,592.0,99.00,81.51,68.0000,85.25,1475.55
1011,2024-10-21 20:30:00+00:00,86.70,77.99,2183.15,44.0,0.500000,1.000000,257.0,341.0,397.0,...,482.0,508.0,515.0,550.0,585.0,99.00,82.97,67.7000,85.25,1298.75
1012,2024-10-21 21:00:00+00:00,71.16,74.71,1754.15,45.0,0.707107,1.000000,249.0,338.0,389.0,...,478.0,508.0,514.0,553.0,587.0,0.94,80.73,70.5000,91.58,1364.60


In [16]:
X_test = df_api_new_merged1[["market_price_lag96h","imbalance_price_lag96h","day_ahead_price_lag1week","volume_lag96h",
                    "cos_hour","cos_day","1","2","3","4","5","6","7","8","9"]].values
X_test = scaler.transform(X_test)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

model.eval()
with torch.no_grad():
    final_test_outputs = model(X_test_tensor)
    final_test_outputs = final_test_outputs.numpy().flatten()

df_api_new_merged1["day_ahead_price_predictions"] = final_test_outputs
df_api_new_merged1

Unnamed: 0,datetime,market_price,day_ahead_price,volume,settlement_period,cos_hour,cos_day,1,2,3,...,6,7,8,9,imbalance_price,market_price_lag96h,imbalance_price_lag96h,day_ahead_price_lag1week,volume_lag96h,day_ahead_price_predictions
336,2024-10-03 22:00:00+00:00,71.32,78.47,1322.45,47.0,0.866025,-0.900969,1355.0,487.0,963.0,...,867.0,326.0,1402.0,1357.0,61.00,12.85,-1.4600,7.20,1457.25,128.894226
337,2024-10-03 22:30:00+00:00,71.42,78.47,1035.65,48.0,0.866025,-0.900969,788.0,584.0,1148.0,...,565.0,923.0,797.0,487.0,50.00,-3.95,-1.4600,7.20,1367.60,81.991608
338,2024-10-03 23:00:00+00:00,66.94,71.36,1378.55,1.0,0.965926,-0.900969,571.0,748.0,1001.0,...,1017.0,1533.0,597.0,991.0,93.50,-2.74,5.0000,3.87,1473.20,83.215240
339,2024-10-03 23:30:00+00:00,60.03,71.36,1539.90,2.0,0.965926,-0.900969,1476.0,1194.0,1512.0,...,1572.0,744.0,1497.0,463.0,93.50,3.85,5.0000,3.87,1799.05,79.186150
340,2024-10-04 00:00:00+00:00,65.21,72.80,1818.40,3.0,1.000000,-0.900969,1352.0,1390.0,912.0,...,1093.0,814.0,487.0,1340.0,93.00,0.03,-2.4304,0.45,1689.75,93.924911
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1009,2024-10-21 19:30:00+00:00,71.47,79.79,2221.80,42.0,0.258819,1.000000,263.0,343.0,406.0,...,501.0,514.0,547.0,590.0,65.00,86.49,71.5200,91.30,2070.40,130.463608
1010,2024-10-21 20:00:00+00:00,87.87,77.99,2156.50,43.0,0.500000,1.000000,266.0,345.0,405.0,...,504.0,516.0,551.0,592.0,99.00,81.51,68.0000,85.25,1475.55,132.365021
1011,2024-10-21 20:30:00+00:00,86.70,77.99,2183.15,44.0,0.500000,1.000000,257.0,341.0,397.0,...,508.0,515.0,550.0,585.0,99.00,82.97,67.7000,85.25,1298.75,134.650955
1012,2024-10-21 21:00:00+00:00,71.16,74.71,1754.15,45.0,0.707107,1.000000,249.0,338.0,389.0,...,508.0,514.0,553.0,587.0,0.94,80.73,70.5000,91.58,1364.60,133.737350


In [17]:
#plot imbalance price and predictions
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_api_new_merged1["datetime"], y=df_api_new_merged1["day_ahead_price"], mode='lines', name='day_ahead_price'))
fig.add_trace(go.Scatter(x=df_api_new_merged1["datetime"], y=df_api_new_merged1["day_ahead_price_predictions"], mode='lines', name='day_ahead_price Predictions'))
fig.update_layout(title='day_ahead_price and Predictions', xaxis_title='Timestamp', yaxis_title='day_ahead_price Price')
fig.show()

In [18]:
#MAE calculation for imbalance price predictions
mae = np.mean(np.abs(df_api_new_merged1["day_ahead_price"] - df_api_new_merged1["day_ahead_price_predictions"]))
print(f"MAE for day_ahead_price predictions: {mae:.4f}")

MAE for day_ahead_price predictions: 53.6012


In [19]:
df_api_new_merged1.drop(columns=["datetime"]).corr()["day_ahead_price"]

market_price                   0.459050
day_ahead_price                1.000000
volume                         0.023106
settlement_period              0.237255
cos_hour                      -0.332983
cos_day                       -0.124310
1                             -0.084179
2                             -0.124610
3                             -0.120535
4                             -0.126306
5                             -0.131031
6                             -0.115995
7                             -0.120230
8                             -0.105632
9                             -0.107018
imbalance_price                0.572484
market_price_lag96h           -0.013696
imbalance_price_lag96h         0.100072
day_ahead_price_lag1week       0.128432
volume_lag96h                  0.139725
day_ahead_price_predictions    0.257556
Name: day_ahead_price, dtype: float64

In [20]:
df_api_new_merged1.columns

Index(['datetime', 'market_price', 'day_ahead_price', 'volume',
       'settlement_period', 'cos_hour', 'cos_day', '1', '2', '3', '4', '5',
       '6', '7', '8', '9', 'imbalance_price', 'market_price_lag96h',
       'imbalance_price_lag96h', 'day_ahead_price_lag1week', 'volume_lag96h',
       'day_ahead_price_predictions'],
      dtype='object')

In [21]:
df_day_ahead_demand = pd.read_csv('D:/Users/paulh\Desktop/Domäneprojekt2/Energy_production_price_prediction/day_ahead_demand_forecast.csv')
df_margin_forecast = pd.read_csv('D:/Users/paulh\Desktop/Domäneprojekt2/Energy_production_price_prediction/margin_forecast.csv')
df_margin_forecast.forecast_date = pd.to_datetime(df_margin_forecast.forecast_date)
df_day_ahead_demand.timestamp_utc = pd.to_datetime(df_day_ahead_demand.timestamp_utc)

In [22]:
df_api_new_merged2 = pd.merge(df_api_new_merged1,df_day_ahead_demand, left_on='datetime', right_on='timestamp_utc', how='left')
df_api_new_merged2["date"] = df_api_new_merged2["datetime"].dt.date
df_api_new_merged2["date"] = pd.to_datetime(df_api_new_merged2["date"])
df_api_new_merged2 = pd.merge(df_api_new_merged2,df_margin_forecast, left_on='date', right_on='forecast_date', how='left')

In [23]:
df_api_new_merged2.dropna(inplace=True)
df_api_new_merged2 = df_api_new_merged2.groupby("timestamp_utc").last().reset_index()
df_api_new_merged2

Unnamed: 0,timestamp_utc,datetime,market_price,day_ahead_price,volume,settlement_period_x,cos_hour,cos_day,1,2,...,settlement_date,settlement_period_y,boundary,publish_time_utc_x,transmission_system_demand,national_demand,date,forecast_date,publish_time_utc_y,margin
0,2024-10-04 00:00:00+00:00,2024-10-04 00:00:00+00:00,65.21,72.80,1818.40,3.0,1.000000,-0.900969,1352.0,1390.0,...,2024-10-04,3,N,2024-10-03T20:45:00Z,22051.0,21551.0,2024-10-04,2024-10-04,2024-10-02T15:00:00Z,9842.0
1,2024-10-04 00:30:00+00:00,2024-10-04 00:30:00+00:00,64.79,72.80,1784.30,4.0,1.000000,-0.900969,1279.0,410.0,...,2024-10-04,4,N,2024-10-03T20:45:00Z,21886.0,21386.0,2024-10-04,2024-10-04,2024-10-02T15:00:00Z,9842.0
2,2024-10-04 01:00:00+00:00,2024-10-04 01:00:00+00:00,64.26,65.30,1647.20,5.0,0.965926,-0.900969,1223.0,378.0,...,2024-10-04,5,N,2024-10-04T00:46:00Z,21486.0,20986.0,2024-10-04,2024-10-04,2024-10-02T15:00:00Z,9842.0
3,2024-10-04 01:30:00+00:00,2024-10-04 01:30:00+00:00,65.61,65.30,1406.60,6.0,0.965926,-0.900969,848.0,1046.0,...,2024-10-04,6,N,2024-10-04T00:46:00Z,21471.0,20971.0,2024-10-04,2024-10-04,2024-10-02T15:00:00Z,9842.0
4,2024-10-04 02:00:00+00:00,2024-10-04 02:00:00+00:00,58.19,71.46,1149.45,7.0,0.866025,-0.900969,1469.0,543.0,...,2024-10-04,7,N,2024-10-04T00:46:00Z,20947.0,20447.0,2024-10-04,2024-10-04,2024-10-02T15:00:00Z,9842.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
661,2024-10-21 19:30:00+00:00,2024-10-21 19:30:00+00:00,71.47,79.79,2221.80,42.0,0.258819,1.000000,263.0,343.0,...,2024-10-21,42,N,2024-10-21T18:15:00Z,32190.0,30136.0,2024-10-21,2024-10-21,2024-10-19T21:00:00Z,16904.0
662,2024-10-21 20:00:00+00:00,2024-10-21 20:00:00+00:00,87.87,77.99,2156.50,43.0,0.500000,1.000000,266.0,345.0,...,2024-10-21,43,N,2024-10-21T18:15:00Z,30604.0,28749.0,2024-10-21,2024-10-21,2024-10-19T21:00:00Z,16904.0
663,2024-10-21 20:30:00+00:00,2024-10-21 20:30:00+00:00,86.70,77.99,2183.15,44.0,0.500000,1.000000,257.0,341.0,...,2024-10-21,44,N,2024-10-21T18:15:00Z,29396.0,27437.0,2024-10-21,2024-10-21,2024-10-19T21:00:00Z,16904.0
664,2024-10-21 21:00:00+00:00,2024-10-21 21:00:00+00:00,71.16,74.71,1754.15,45.0,0.707107,1.000000,249.0,338.0,...,2024-10-21,45,N,2024-10-21T20:45:00Z,27294.0,26052.0,2024-10-21,2024-10-21,2024-10-19T21:00:00Z,16904.0


In [24]:
tesz

NameError: name 'tesz' is not defined

In [25]:
df_api_new_merged2_train = df_api_new_merged2[:400]
df_api_new_merged2_test = df_api_new_merged2[400:]

In [26]:
import torch
import torch.nn as nn

class SimpleModel(nn.Module):
    def __init__(self, input_dim):
        super(SimpleModel, self).__init__()
        
        # Zwei Hidden Layers
        self.fc1 = nn.Linear(input_dim, 128)  # Erster Hidden Layer
        self.fc2 = nn.Linear(128, 64)         # Zweiter Hidden Layer
        
        # Ausgangsschicht
        self.fc3 = nn.Linear(64, 1)           # Ausgangsschicht
        
        # Dropout und Aktivierungsfunktion
        self.dropout = nn.Dropout(0.2)
        self.swish = nn.SiLU()

    def forward(self, x):
        # Durch die Hidden Layers
        x = self.swish(self.fc1(x))  # Erster Hidden Layer
        x = self.dropout(x)
        x = self.swish(self.fc2(x))  # Zweiter Hidden Layer
        x = self.dropout(x)
        
        # Ausgang
        x = self.fc3(x)  # Ausgangsschicht
        
        return x


In [27]:
cobined_X_train = df_api_new_merged2_train[["day_ahead_price_predictions","national_demand","transmission_system_demand","margin"]]
cobined_y_train = df_api_new_merged2_train["day_ahead_price"]

cobined_X_test = df_api_new_merged2_test[["day_ahead_price_predictions","national_demand","transmission_system_demand","margin"]]
cobined_y_test = df_api_new_merged2_test["day_ahead_price"]

cobined_X_train = scaler.fit_transform(cobined_X_train)
cobined_X_test = scaler.transform(cobined_X_test)

with open('scaler_MLP_extension_day_ahead.pkl', 'wb') as f:
    pickle.dump(scaler, f)

In [28]:
combined_X_train_tensor = torch.tensor(cobined_X_train, dtype=torch.float32)
combined_y_train_tensor = torch.tensor(cobined_y_train.values, dtype=torch.float32).view(-1, 1)
combined_X_test_tensor = torch.tensor(cobined_X_test, dtype=torch.float32)
combined_y_test_tensor = torch.tensor(cobined_y_test.values, dtype=torch.float32).view(-1, 1)

# Step 4: Create TensorDataset and DataLoader for training and testing
combined_train_dataset = TensorDataset(combined_X_train_tensor, combined_y_train_tensor)
combined_test_dataset = TensorDataset(combined_X_test_tensor, combined_y_test_tensor)

# DataLoader
combined_train_loader = DataLoader(dataset=combined_train_dataset, batch_size=16, shuffle=False)
combined_test_loader = DataLoader(dataset=combined_test_dataset, batch_size=16, shuffle=False)

# Step 5: Initialize the model, loss function, and optimizer
input_dim = combined_X_train_tensor.shape[1]
model = SimpleModel(input_dim)
optimizer = optim.Adam(model.parameters(), lr=0.0001)

# Define MAE loss
mae_loss = nn.L1Loss()

# Training parameters
num_epochs = 100

In [29]:
train_losses = []
test_losses = []

for epoch in range(num_epochs):
    # Training phase
    model.train()
    epoch_loss = 0
    for X_batch, y_batch in combined_train_loader:
        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = mae_loss(outputs, y_batch)  # Use MAE loss
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    
    avg_train_loss = epoch_loss / len(combined_train_loader)
    train_losses.append(avg_train_loss)
    
    # Test phase
    model.eval()
    with torch.no_grad():
        test_outputs = model(combined_X_test_tensor)
        test_loss = mae_loss(test_outputs, combined_y_test_tensor).item()  # Use MAE loss
        test_losses.append(test_loss)
    
    # Print progress
    if (epoch + 1) % 5 == 0:
        print(f"Epoch [{epoch+1}/{num_epochs}], "
              f"Train Loss: {avg_train_loss:.4f}, "
              f"Test Loss: {test_loss:.4f}")
        
torch.save(model.state_dict(), 'MLP_extension_day_ahead.pth')

# Final evaluation
model.eval()
with torch.no_grad():
    final_test_outputs = model(combined_X_test_tensor)
    final_test_loss = mae_loss(final_test_outputs, combined_y_test_tensor).item()  # Use MAE loss
print(f"Final Test Loss: {final_test_loss:.4f}")


Epoch [5/100], Train Loss: 81.2195, Test Loss: 73.7997
Epoch [10/100], Train Loss: 80.7718, Test Loss: 73.3224
Epoch [15/100], Train Loss: 80.0472, Test Loss: 72.5541
Epoch [20/100], Train Loss: 78.8381, Test Loss: 71.3296
Epoch [25/100], Train Loss: 77.0326, Test Loss: 69.5168
Epoch [30/100], Train Loss: 74.7209, Test Loss: 67.1038
Epoch [35/100], Train Loss: 71.5493, Test Loss: 64.1045
Epoch [40/100], Train Loss: 67.5544, Test Loss: 60.7306
Epoch [45/100], Train Loss: 63.1426, Test Loss: 56.9665
Epoch [50/100], Train Loss: 57.9115, Test Loss: 52.7486
Epoch [55/100], Train Loss: 52.2559, Test Loss: 48.0079
Epoch [60/100], Train Loss: 45.1959, Test Loss: 42.7133
Epoch [65/100], Train Loss: 39.0474, Test Loss: 36.9881
Epoch [70/100], Train Loss: 32.5701, Test Loss: 31.3315
Epoch [75/100], Train Loss: 27.8031, Test Loss: 27.1899
Epoch [80/100], Train Loss: 24.5450, Test Loss: 24.5013
Epoch [85/100], Train Loss: 23.7042, Test Loss: 22.9215
Epoch [90/100], Train Loss: 22.3533, Test Loss: 2

In [30]:
model.eval()
with torch.no_grad():
    final_test_outputs = model(combined_X_test_tensor)
    final_test_outputs = final_test_outputs.numpy().flatten()

df_api_new_merged2_test["day_ahead_price_predictions_2"] = final_test_outputs



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [31]:
df_api_new_merged2_test

Unnamed: 0,timestamp_utc,datetime,market_price,day_ahead_price,volume,settlement_period_x,cos_hour,cos_day,1,2,...,settlement_period_y,boundary,publish_time_utc_x,transmission_system_demand,national_demand,date,forecast_date,publish_time_utc_y,margin,day_ahead_price_predictions_2
400,2024-10-16 09:00:00+00:00,2024-10-16 09:00:00+00:00,91.70,86.94,1612.10,21.0,-0.707107,-0.222521,1187.0,1260.0,...,21,N,2024-10-16T08:46:00Z,33501.0,32021.0,2024-10-16,2024-10-16,2024-10-14T21:00:00Z,13209.0,87.072289
401,2024-10-16 09:30:00+00:00,2024-10-16 09:30:00+00:00,91.84,86.94,1853.60,22.0,-0.707107,-0.222521,1223.0,1312.0,...,22,N,2024-10-16T08:46:00Z,33493.0,32013.0,2024-10-16,2024-10-16,2024-10-14T21:00:00Z,13209.0,87.017097
402,2024-10-16 10:00:00+00:00,2024-10-16 10:00:00+00:00,94.58,88.71,1878.35,23.0,-0.866025,-0.222521,1255.0,1378.0,...,23,N,2024-10-16T08:46:00Z,33533.0,32053.0,2024-10-16,2024-10-16,2024-10-14T21:00:00Z,13209.0,87.274467
403,2024-10-16 10:30:00+00:00,2024-10-16 10:30:00+00:00,95.17,88.71,2009.30,24.0,-0.866025,-0.222521,1281.0,1430.0,...,24,N,2024-10-16T08:46:00Z,33572.0,32092.0,2024-10-16,2024-10-16,2024-10-14T21:00:00Z,13209.0,87.475113
404,2024-10-16 11:00:00+00:00,2024-10-16 11:00:00+00:00,97.75,89.47,1846.55,25.0,-0.965926,-0.222521,1316.0,1449.0,...,25,N,2024-10-16T08:46:00Z,33880.0,32400.0,2024-10-16,2024-10-16,2024-10-14T21:00:00Z,13209.0,89.498344
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
661,2024-10-21 19:30:00+00:00,2024-10-21 19:30:00+00:00,71.47,79.79,2221.80,42.0,0.258819,1.000000,263.0,343.0,...,42,N,2024-10-21T18:15:00Z,32190.0,30136.0,2024-10-21,2024-10-21,2024-10-19T21:00:00Z,16904.0,78.744644
662,2024-10-21 20:00:00+00:00,2024-10-21 20:00:00+00:00,87.87,77.99,2156.50,43.0,0.500000,1.000000,266.0,345.0,...,43,N,2024-10-21T18:15:00Z,30604.0,28749.0,2024-10-21,2024-10-21,2024-10-19T21:00:00Z,16904.0,71.915565
663,2024-10-21 20:30:00+00:00,2024-10-21 20:30:00+00:00,86.70,77.99,2183.15,44.0,0.500000,1.000000,257.0,341.0,...,44,N,2024-10-21T18:15:00Z,29396.0,27437.0,2024-10-21,2024-10-21,2024-10-19T21:00:00Z,16904.0,67.556808
664,2024-10-21 21:00:00+00:00,2024-10-21 21:00:00+00:00,71.16,74.71,1754.15,45.0,0.707107,1.000000,249.0,338.0,...,45,N,2024-10-21T20:45:00Z,27294.0,26052.0,2024-10-21,2024-10-21,2024-10-19T21:00:00Z,16904.0,63.244022


In [32]:
#plot imbalance price and predictions
fig = go.Figure()
fig.add_trace(go.Scatter(x=df_api_new_merged2_test["timestamp_utc"], y=df_api_new_merged2_test["day_ahead_price"], mode='lines', name='day_ahead_price'))
fig.add_trace(go.Scatter(x=df_api_new_merged2_test["timestamp_utc"], y=df_api_new_merged2_test["day_ahead_price_predictions_2"], mode='lines', name='day_ahead_price Predictions'))

In [33]:
#MAE calculation for imbalance price predictions
mae = np.mean(np.abs(df_api_new_merged2_test["day_ahead_price"] - df_api_new_merged2_test["day_ahead_price_predictions_2"]))
mae

20.980847531655677

In [34]:
df_api_new_merged2_test.day_ahead_price.corr(df_api_new_merged2_test.day_ahead_price_predictions_2)

0.40011607984925457

In [35]:
df_api_new_merged2_test.columns

Index(['timestamp_utc', 'datetime', 'market_price', 'day_ahead_price',
       'volume', 'settlement_period_x', 'cos_hour', 'cos_day', '1', '2', '3',
       '4', '5', '6', '7', '8', '9', 'imbalance_price', 'market_price_lag96h',
       'imbalance_price_lag96h', 'day_ahead_price_lag1week', 'volume_lag96h',
       'day_ahead_price_predictions', 'settlement_date', 'settlement_period_y',
       'boundary', 'publish_time_utc_x', 'transmission_system_demand',
       'national_demand', 'date', 'forecast_date', 'publish_time_utc_y',
       'margin', 'day_ahead_price_predictions_2'],
      dtype='object')

In [36]:
#correlation of day_ahead price with every other feature

df_api_new_merged2_test.drop(columns=["datetime","timestamp_utc","publish_time_utc_x","publish_time_utc_y","forecast_date","boundary","settlement_date"]).corr()["day_ahead_price"]

market_price                     0.879251
day_ahead_price                  1.000000
volume                           0.311159
settlement_period_x              0.215999
cos_hour                        -0.223931
cos_day                         -0.511324
1                                0.016275
2                               -0.049474
3                               -0.053590
4                               -0.061665
5                               -0.049642
6                               -0.036269
7                               -0.019980
8                                0.003853
9                                0.028839
imbalance_price                  0.669219
market_price_lag96h             -0.193040
imbalance_price_lag96h           0.174814
day_ahead_price_lag1week         0.254057
volume_lag96h                   -0.119286
day_ahead_price_predictions      0.323696
settlement_period_y              0.215999
transmission_system_demand       0.516814
national_demand                  0

In [37]:
df_api_new_merged2_train.drop(columns=["datetime","timestamp_utc","publish_time_utc_x","publish_time_utc_y","forecast_date","boundary","settlement_date"]).corr()["day_ahead_price"]

market_price                   0.225837
day_ahead_price                1.000000
volume                        -0.056094
settlement_period_x            0.254632
cos_hour                      -0.416987
cos_day                        0.117433
1                             -0.167084
2                             -0.182620
3                             -0.169609
4                             -0.173727
5                             -0.181057
6                             -0.163089
7                             -0.173848
8                             -0.157236
9                             -0.163722
imbalance_price                0.562994
market_price_lag96h            0.178042
imbalance_price_lag96h         0.044392
day_ahead_price_lag1week       0.035650
volume_lag96h                  0.368316
day_ahead_price_predictions    0.245695
settlement_period_y            0.254632
transmission_system_demand     0.573296
national_demand                0.608243
date                          -0.075084
