In [1]:
import math
from datetime import datetime
from datetime import timedelta
import pandas as pd
from prophet import Prophet
from prophet.plot import add_changepoints_to_plot
from prophet.utilities import regressor_coefficients
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error
from matplotlib import pyplot as plt
from prophet.serialize import model_to_json, model_from_json

In [2]:
# Enter here location to your test data and modell
activelosses = "../data/Avtice-losses.csv"
renewablegen = "../data/Forecast-renewable-generation.csv"
forecasttemp = '../data/Forecast-temperature.csv'
ntc = '../data/NTC.csv'
modell = "modell_group_02_v01.json"

In [3]:
# Preprocess data

In [4]:
def preprocess_and_merge_data(activelosses1, renewablegen1, forecasttemp1, ntc1):
    # Load Active Losses data
    activeslosses = pd.read_csv(activelosses1, skiprows=1)
    activeslosses['Zeitstempel'] = pd.to_datetime(activeslosses['Zeitstempel']) - pd.Timedelta(minutes=15)
    activeslosses.set_index(activeslosses.columns[0], inplace=True)
    activeslosses['MWh'] = activeslosses['kWh'] / 1000
    activeslosses = activeslosses[~activeslosses.index.duplicated(keep='first')]
    activeslosses = activeslosses.resample('15T').asfreq()

    # Load Forecast Renewable Generation data
    Forecast_renew = pd.read_csv(renewablegen1, skiprows=0)
    Forecast_renew['datetime'] = pd.to_datetime(Forecast_renew['datetime'])
    Forecast_renew.set_index(Forecast_renew.columns[0], inplace=True)
    Forecast_renew = Forecast_renew[~Forecast_renew.index.duplicated(keep='first')]
    Forecast_renew = Forecast_renew.resample('H').asfreq()

    # Load Forecast Temperature data
    Forecast_temp = pd.read_csv(forecasttemp1, skiprows=0)
    Forecast_temp['datetime'] = pd.to_datetime(Forecast_temp['datetime'])
    Forecast_temp.set_index(Forecast_temp.columns[0], inplace=True)
    Forecast_temp = Forecast_temp[~Forecast_temp.index.duplicated(keep='first')]

    # Load NTC data
    NTC = pd.read_csv(ntc1, skiprows=0)
    NTC['datetime'] = pd.to_datetime(NTC['datetime'])
    NTC.set_index(NTC.columns[0], inplace=True)
    NTC = NTC[~NTC.index.duplicated(keep='first')]
    NTC = NTC.resample('H').asfreq()

    # Data preprocessing steps
    activeslosses = activeslosses.interpolate(method='polynomial', order=2)
    activeslosses_hour = activeslosses.resample('H').sum()
    Forecast_renew = Forecast_renew.interpolate(method='polynomial', order=2)
    Forecast_temp_hourly = Forecast_temp.resample('H').asfreq()
    Forecast_temp_hourly_lin = Forecast_temp_hourly.interpolate(method='linear')
    Forecast_temp_hourly_poly = Forecast_temp_hourly.interpolate(method='polynomial', order=2)
    Forecast_temp_hourly_poly.loc[pd.to_datetime('2019-01-01 00:00:00')] = Forecast_temp_hourly_poly.loc['2019-01-01 01:00:00']
    NTC = NTC.interpolate(method='polynomial', order=2)

    # Merge the dataframes
    merged_df = pd.merge(activeslosses_hour, Forecast_renew, left_index=True, right_index=True, how='outer')
    merged_df = pd.merge(merged_df, Forecast_temp_hourly_poly, left_index=True, right_index=True, how='outer')
    merged_df = pd.merge(merged_df, NTC, left_index=True, right_index=True, how='outer')
    
    return merged_df.reset_index()

    # Save the merged dataframe to a CSV file
    #merged_df.to_csv('merged_data_adrian.csv', index=True)

# Call the function to preprocess and merge the data
df = preprocess_and_merge_data(activelosses, renewablegen, forecasttemp, ntc)

In [49]:
def preprocess_data(df):
    # Read the CSV file
    data = df

    # Convert the 'Unnamed: 0' column to datetime format and rename it to 'ds'
    data['ds'] = pd.to_datetime(data['index'], format='%Y-%m-%d %H:%M:%S')

    # Drop the original 'Unnamed: 0' column
    data = data.drop(columns=['index'])

    # Sort the DataFrame by 'ds'
    data = data.sort_values(by='ds')

    # Calculate rolling mean with ROLLING_WINDOW
    data['y'] = data['MWh']

    # Filter data based on startDate and endDate
    data = data.dropna()
    
    data = data[["ds", "MWh"]]

    return data


df_final = preprocess_data(df)
display(df_final)

Unnamed: 0,ds,MWh
0,2019-01-01 00:00:00,139.525004
1,2019-01-01 01:00:00,129.716036
2,2019-01-01 02:00:00,133.398074
3,2019-01-01 03:00:00,135.133852
4,2019-01-01 04:00:00,131.699424
...,...,...
26299,2021-12-31 19:00:00,171.707318
26300,2021-12-31 20:00:00,159.462903
26301,2021-12-31 21:00:00,155.109520
26302,2021-12-31 22:00:00,171.370277


In [43]:
with open(modellort, 'r') as fin:
    m = model_from_json(fin.read())  # Load model

In [None]:
# Predict and evaluate your modell after here: