In [23]:
import math
from datetime import datetime
from datetime import timedelta
import numpy as np
import pandas as pd
from prophet import Prophet
from prophet.plot import add_changepoints_to_plot
from prophet.utilities import regressor_coefficients
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from matplotlib import pyplot as plt
from prophet.serialize import model_to_json, model_from_json

In [17]:
# Enter here location to your test data and modell
activelosses = "../data/Avtice-losses.csv"
renewablegen = "../data/Forecast-renewable-generation.csv"
forecasttemp = '../data/Forecast-temperature.csv'
ntc = '../data/NTC.csv'
modell = "modell_group_02_v01.json"

In [18]:
# Preprocess data

In [13]:
def preprocess_and_merge_data(activelosses1, renewablegen1, forecasttemp1, ntc1):
    # Load Active Losses data
    activeslosses = pd.read_csv(activelosses1, skiprows=1)
    activeslosses['Zeitstempel'] = pd.to_datetime(activeslosses['Zeitstempel']) - pd.Timedelta(minutes=15)
    activeslosses.set_index(activeslosses.columns[0], inplace=True)
    activeslosses['MWh'] = activeslosses['kWh'] / 1000
    activeslosses = activeslosses[~activeslosses.index.duplicated(keep='first')]
    activeslosses = activeslosses.resample('15T').asfreq()

    # Load Forecast Renewable Generation data
    Forecast_renew = pd.read_csv(renewablegen1, skiprows=0)
    Forecast_renew['datetime'] = pd.to_datetime(Forecast_renew['datetime'])
    Forecast_renew.set_index(Forecast_renew.columns[0], inplace=True)
    Forecast_renew = Forecast_renew[~Forecast_renew.index.duplicated(keep='first')]
    Forecast_renew = Forecast_renew.resample('H').asfreq()

    # Load Forecast Temperature data
    Forecast_temp = pd.read_csv(forecasttemp1, skiprows=0)
    Forecast_temp['datetime'] = pd.to_datetime(Forecast_temp['datetime'])
    Forecast_temp.set_index(Forecast_temp.columns[0], inplace=True)
    Forecast_temp = Forecast_temp[~Forecast_temp.index.duplicated(keep='first')]

    # Load NTC data
    NTC = pd.read_csv(ntc1, skiprows=0)
    NTC['datetime'] = pd.to_datetime(NTC['datetime'])
    NTC.set_index(NTC.columns[0], inplace=True)
    NTC = NTC[~NTC.index.duplicated(keep='first')]
    NTC = NTC.resample('H').asfreq()

    # Data preprocessing steps
    activeslosses = activeslosses.interpolate(method='polynomial', order=2)
    activeslosses_hour = activeslosses.resample('H').sum()
    Forecast_renew = Forecast_renew.interpolate(method='polynomial', order=2)
    Forecast_temp_hourly = Forecast_temp.resample('H').asfreq()
    Forecast_temp_hourly_lin = Forecast_temp_hourly.interpolate(method='linear')
    Forecast_temp_hourly_poly = Forecast_temp_hourly.interpolate(method='polynomial', order=1)
    Forecast_temp_hourly_poly.loc[pd.to_datetime('2019-01-01 00:00:00')] = Forecast_temp_hourly_poly.loc['2019-01-01 01:00:00']
    NTC = NTC.interpolate(method='polynomial', order=2)

    # Merge the dataframes
    merged_df = pd.merge(activeslosses_hour, Forecast_renew, left_index=True, right_index=True, how='outer')
    merged_df = pd.merge(merged_df, Forecast_temp_hourly_poly, left_index=True, right_index=True, how='outer')
    merged_df = pd.merge(merged_df, NTC, left_index=True, right_index=True, how='outer')
    
    # Save the merged dataframe to a CSV file
    #merged_df.to_csv('merged_data.csv', index=True)
    
    return merged_df.reset_index()
    
    

# Call the function to preprocess and merge the data
df = preprocess_and_merge_data(activelosses, renewablegen, forecasttemp, ntc)
display(df)

Unnamed: 0,index,kWh,MWh,solar_fore_de [MW],solar_fore_it [MW],wind_fore_de [MW],wind_fore_it [MW],temperature_fore_ch,temperature_fore_fr,temperature_fore_de,temperature_fore_it,CH_AT,CH_DE,CH_FR,CH_IT,AT_CH,DE_CH,FR_CH,IT_CH
0,2019-01-01 00:00:00,139525.003695,139.525004,0.0,0.0,21344.8514,4302.6977,4.1067,5.972900,7.426800,4.028100,700.0,4000.0,1200.0,2513.0,1200.0,800.0,3000.0,1910.0
1,2019-01-01 01:00:00,129716.036003,129.716036,0.0,0.0,23052.3310,4596.5916,4.1067,5.972900,7.426800,4.028100,700.0,4000.0,1200.0,2513.0,1200.0,800.0,3000.0,1910.0
2,2019-01-01 02:00:00,133398.074458,133.398074,0.0,0.0,24969.9701,4478.5564,3.7155,5.900717,7.246083,3.811433,700.0,4000.0,1200.0,2513.0,1200.0,800.0,3000.0,1910.0
3,2019-01-01 03:00:00,135133.851731,135.133852,0.0,0.0,27082.9626,4323.3712,3.3243,5.828533,7.065367,3.594767,700.0,4000.0,1200.0,2513.0,1200.0,800.0,3000.0,1910.0
4,2019-01-01 04:00:00,131699.424059,131.699424,0.0,0.0,26890.9717,4231.8283,2.9331,5.756350,6.884650,3.378100,700.0,4000.0,1200.0,2513.0,1200.0,800.0,3000.0,1910.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26299,2021-12-31 19:00:00,171707.317615,171.707318,0.0,0.0,36997.7200,1108.4000,8.5300,9.960000,10.790000,9.590000,1200.0,4000.0,1400.0,4069.0,1200.0,800.0,3200.0,1810.0
26300,2021-12-31 20:00:00,159462.903412,159.462903,0.0,0.0,35666.9300,1077.9700,8.0000,9.400000,10.630000,9.110000,1200.0,4000.0,1400.0,4069.0,1200.0,800.0,3200.0,1810.0
26301,2021-12-31 21:00:00,155109.519730,155.109520,0.0,0.0,34383.8800,1048.2800,7.5000,8.880000,10.510000,8.670000,1200.0,4000.0,1400.0,3953.0,1200.0,800.0,3200.0,1810.0
26302,2021-12-31 22:00:00,171370.276941,171.370277,0.0,0.0,33075.2500,1078.7800,6.9700,8.510000,10.320000,8.140000,1200.0,4000.0,1400.0,3780.0,1200.0,800.0,3200.0,1810.0


In [None]:
df = df.drop(columns=['kWh'])
df = df.rename(columns={'index': 'datetime'})
df['datetime'] = pd.to_datetime(df['datetime'])
df.head()

In [None]:
dat = df.copy()
# Extract day of the week (0 = Monday, 6 = Sunday)
dat['weekday'] = dat['datetime'].dt.weekday
dat['month'] = dat['datetime'].dt.month
dat['time'] = dat['datetime'].dt.time

# Encode 'month' and 'weekday' as categorical variables
dat['month'] = dat['month'].astype('category')
dat['weekday'] = dat['weekday'].astype('category')

# Perform one-hot encoding on 'month' and 'weekday'
df_encoded = pd.get_dummies(dat, columns=['month', 'weekday', "time"], prefix=['month', 'weekday', 'time'])

# drop the datetime variable
df_encoded = df_encoded.drop(["datetime"], axis=1)

In [None]:
import numpy as np

data = df_encoded

# Define sequence lengths
input_seq_length = 7 * 24  # Previous 7 days
output_seq_length = 24     # Next 24 hours

# Initialize lists to store sequences
input_sequences = []
output_sequences = []

# Iterate over the data to create sequences
for i in range(0, len(data) - input_seq_length - output_seq_length + 1, output_seq_length):
    input_seq = data.iloc[i : i + input_seq_length].drop(columns=['MWh'])
    output_seq = data.iloc[i + input_seq_length : i + input_seq_length + output_seq_length]['MWh'].values
    
    # Append the sequences to the lists
    input_sequences.append(input_seq.values)
    output_sequences.append(output_seq)

# Convert lists to NumPy arrays
input_sequences = np.array(input_sequences)
output_sequences = np.array(output_sequences)

# 'input_sequences' now contains input sequences with shape (num_samples, input_seq_length, num_features)
# 'output_sequences' now contains corresponding output sequences with shape (num_samples, output_seq_length)


In [None]:

# Ensure the data types are compatible with PyTorch tensors
input_test = input_sequences.astype(np.float32)
output_test = output_sequences.astype(np.float32)

# Convert NumPy arrays to PyTorch tensors
input_test_tensor = torch.tensor(input_test, dtype=torch.float32)
output_test_tensor = torch.tensor(output_test, dtype=torch.float32)

# Create PyTorch datasets
test_dataset = TensorDataset(input_test_tensor, output_test_tensor)

# Define batch size
batch_size = 64  # Adjust as needed

# Create PyTorch dataloaders
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
import pickle

# Save the model to a file using pickle
model_path = "group2_seq2seq.pkl"  # Replace with your desired file path
    
# Load the model from the pickle file
with open(model_path, 'rb') as model_file:
    model = pickle.load(model_file)

In [None]:
model.eval()
total_mae = 0.0  # Initialize the total mean absolute error
all_predictions = []  # Initialize a list to store all predictions
all_y_test = []

with torch.no_grad():
    for batch_input, batch_target in test_loader:
        # Forward pass
        batch_input = batch_input.to(device)
        batch_target = batch_target.to(device)
        
        predictions = model(batch_input)
        
        # Append the predictions to the list
        all_predictions.append(predictions.cpu().numpy())  # Assuming predictions is a PyTorch tensor
        all_y_test.append(batch_target.cpu().numpy()) 

        # Calculate the Mean Absolute Error (MAE)
        mae = torch.mean(torch.abs(predictions - batch_target))

        # Accumulate the MAE for each batch
        total_mae += mae.item()

# Calculate the average MAE for the entire test set
average_mae = total_mae / len(test_loader)

print(f'Average MAE on Test Set: {average_mae:.4f}')


# Concatenate predictions from all batches into a single numpy array
all_predictions = np.concatenate(all_predictions, axis=0)