In [None]:
%run dataImport.ipynb
%run Plots.ipynb
%run HelperFunctions.ipynb
%run tsMetrics.ipynb
from tabulate import tabulate
import pandas as pd
import numpy as np

In [None]:
# because of data privacy reasons I was not allowed to publish the dataset
df_or = pd.read_csv("time_series.csv")
df_train = pd.read_csv("train_df.csv")
df_test = pd.read_csv("test_df.csv")
# predict quarterly
time_horizon = 360

In [None]:
energy_demand = df_or["Aktueller_Strombezug_kW"].to_numpy() 
min_energy = energy_demand.min()
max_energy = energy_demand.max()

In [None]:
def normalize(df, var):
    x = (df[var]-df[var].min())/(df[var].max()-df[var].min())
    df[var] = x  

def denormalize(ynorm, maximum, minimum):
    denorm = (ynorm * (maximum - minimum)) + minimum
    return denorm

normalize(df_or, "Aktueller_Strombezug_kW")
normalize(df_train, "Aktueller_Strombezug_kW")
normalize(df_test, "Aktueller_Strombezug_kW")

In [None]:
seeds = [1,2,3,4,5,6,7,8,9,10]

# FLAML

In [None]:
df_train.Datetime=pd.to_datetime(df_train.Datetime, infer_datetime_format=True)

In [None]:
from flaml import AutoML
from datetime import datetime as dt
import matplotlib.pyplot as plt



# Create an empty DataFrame with the specified column names
df_metrics_FLAML = pd.DataFrame(columns=['MSE', 'RMSE', 'MAE', 'MAPE', "Time"])
FLAML_configs = []
counter = 0
forecasts_f = []
# initialize instance
for i in seeds:   
    np.random.seed(i)
    # initialize instance
    automl = AutoML()
    # settings for fitting the model
    settings = {
        "time_budget":1800,
        "metric": "rmse",  # add metric
        "task": "ts_forecast",  # define task
        "gpu_per_trial": 1, # add gpu usage
        "period":time_horizon,
        "seed": i  # for replicatioin
    }
  
    # start time measurement
    start_time_FLAML = dt.now()
    
    # fit the model
    automl.fit(dataframe = df_train, label="Aktueller_Strombezug_kW", **settings, cache=False)
    
    # end time measurement
    end_time_FLAML = dt.now()  
    elapsed_seconds_FLAML = (end_time_FLAML - start_time_FLAML).total_seconds()
    elapsed_min_FLAML = divmod(elapsed_seconds_FLAML, 60)
    time_passed_FLAML = f"{int(elapsed_min_FLAML[0])}m{int(elapsed_min_FLAML[1])}s"
    print("Total fitting time: ", f"{int(elapsed_min_FLAML[0])}m{int(elapsed_min_FLAML[1])}s")

    #y_FLAML = df_test["Aktueller_Strombezug_kW"].to_numpy()
    y_FLAML = energy_demand[-time_horizon:]
    x_test = df_test.drop("Aktueller_Strombezug_kW", axis = 1)
    date_FLAML = x_test["Datetime"].to_numpy()
    date_FLAML = np.array(date_FLAML, dtype='datetime64')
    
    #mke the predictions
    y_pred_FLAML = automl.predict(x_test)
    
    # denormalize the forcasts
    for i in range(len(y_pred_FLAML)):
        y_pred_FLAML[i] = denormalize(y_pred_FLAML[i], max_energy, min_energy)
    
     
    forecasts_f.append(y_pred_FLAML)
    # claculate the metrics
    metric_FLAML, mse, rmse, mae, mape = calculate_metrics(y_FLAML, y_pred_FLAML)
    row = {"MSE": mse, 'RMSE': rmse, 'MAE': mae, "MAPE":mape, "Time": time_passed_FLAML}
    # save the metrics
    df_metrics_FLAML.loc[counter] = row
      
    name = "plots/FLAML/FLAML" +str(counter)
    # plot the stuff and safe
    
    plt.figure(figsize=(20, 8))
    plt.plot(date_FLAML, y_FLAML, label='Energy Demand')
    plt.plot(date_FLAML, y_pred_FLAML, label='FLAML Forecast')
    plt.xlabel('Date')
    plt.ylabel('Aktueller_Strombezug_kW')
    
    tick_positions = ['2023-01-16', '2023-01-31', '2023-02-15', '2023-02-28', 
                    '2023-03-15', '2023-03-31', '2023-04-15']
    
    plt.xticks(tick_positions, tick_positions)
    plt.legend()
    plt.savefig(name)
    
    counter += 1
    
    best_config = automl.best_config
    FLAML_configs.append(best_config)
    #best_loss = automl.best_loss
# save the metrics in a csv for later calculations    
df_metrics_FLAML.to_csv('plots/FLAML/FLAML_Metrics.csv', index=False)    
# save best configs in file
filename = "plots/FLAML/bestConfigs.txt"
with open(filename, 'w') as file:
    for config in FLAML_configs:
        for key, value in config.items():
            file.write(str(automl.best_estimator))
            file.write(f"{key}: {value}\n")
            file.write('\n') 
        file.write("---------------------")
        file.write("\n")
        
filename = "plots/FLAML/forecasts.txt"
with open(filename, 'w') as file:
    for forcast in forecasts_f:
        file.write(str(forcast))
        file.write('\n') 
        file.write("---------------------")
        file.write('\n') 

# Autogluon

In [None]:
dfa = pd.concat([df_train, df_test], ignore_index=True)

# add index to the time series, need for Autogluon to identify time series
le = len(dfa)
idx = [1]*le
# add index (needed for autogluon)
dfa["index"]  = idx
# make datetime object
dfa.Datetime=pd.to_datetime(dfa.Datetime, infer_datetime_format=True)


In [None]:
from autogluon.timeseries import TimeSeriesDataFrame, TimeSeriesPredictor
from datetime import datetime as dt
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

df_metrics_auto = pd.DataFrame(columns=['MSE', 'RMSE', 'MAE', 'MAPE', "Time"])
df_models_auto = pd.DataFrame(columns=["model","score_test","score_val","pred_time_test","pred_time_val","fit_time_marginal","fit_order"])


counter = 0

for i in seeds:
    np.random.seed(i) # set seed
    data_auto = TimeSeriesDataFrame.from_data_frame(
        dfa,
        timestamp_column="Datetime",
        id_column="index"
    )

    prediction_length_a = time_horizon
    train_data_a, test_data_a = data_auto.train_test_split(prediction_length_a)

    predictor_a = TimeSeriesPredictor(
        prediction_length=prediction_length_a,
        target="Aktueller_Strombezug_kW",
        eval_metric="MSE"
        #ignore_time_index=True
        #past_covariates = ["Demand", "Occupancy"]
    )

    start_time_auto = dt.now()

    predictor_a.fit(
        train_data_a,
        time_limit = 1800, # half an hour
        presets = "best_quality" # get best model quality
    )

    end_time_auto = dt.now()  
    elapsed_seconds_auto = (end_time_auto - start_time_auto).total_seconds()
    elapsed_min_auto = divmod(elapsed_seconds_auto, 60)
    time_passed_auto = f"{int(elapsed_min_auto[0])}m{int(elapsed_min_auto[1])}s"
    print("Total fitting time: ", f"{int(elapsed_min_auto[0])}m{int(elapsed_min_auto[1])}s")


    predictions_auto = predictor_a.predict(train_data_a)
    
    y_auto = energy_demand[-time_horizon:]
    pred_auto = predictions_auto["mean"].to_numpy()
    for i in range(len(pred_auto)):
        pred_auto[i] = denormalize(pred_auto[i], max_energy, min_energy)
    
    
    table_auto, mse_a, rmse_a, mae_a, mape_a = calculate_metrics(y_auto, pred_auto)
    row_a = {"MSE": mse_a, 'RMSE': rmse_a, 'MAE': mae_a, "MAPE":mape_a, "Time":time_passed_auto}
    # save the metrics
    df_metrics_auto.loc[counter] = row_a
      

    trained_models_a = predictor_a.leaderboard(test_data_a, silent=True)
    trained_models_a
    
    df_models_auto = pd.concat([df_models_auto, trained_models_a], ignore_index=True)
    
    table_a= [["model","score_test","score_val","pred_time_test","pred_time_val","fit_time_marginal","fit_order"]]
    for i in range(0,len(trained_models_a)):
        row = trained_models_a.iloc[i].to_numpy()
        table_a.append(row)
    latex_table_a = tabulate(table_a, headers="firstrow", tablefmt='latex')
    print(latex_table_a)

    
    date_auto = df_test["Datetime"].to_numpy()
    date_auto = np.array(date_auto, dtype='datetime64')
    
    plt.figure(figsize=(20, 8))
    plt.plot(date_auto, y_auto, label='Energy Demand')
    plt.plot(date_auto, pred_auto, label='Autogluon Forecast')
    plt.xlabel('Date')
    plt.ylabel('Aktueller_Strombezug_kW')
    
    #num_ticks = 8
    # Calculate the step size for ticks
    #step_size = len(date_auto) // (num_ticks - 1)
    #Set the tick positions and labels on the x-axis
    #tick_positions = [date_auto[i] for i in range(0, len(date_auto), step_size)]
    tick_positions = ['2023-01-16', '2023-01-31', '2023-02-15', '2023-02-28', 
                    '2023-03-15', '2023-03-31', '2023-04-15']
    
    

    plt.xticks(tick_positions, tick_positions)
    plt.legend()
    plt.savefig("plots/auto/auto" +str(counter))

    counter +=1
df_metrics_auto.to_csv('plots/auto/auto_metrics.csv', index=False)
df_models_auto.to_csv('plots/auto/auto_models.csv', index=False)

# Fedot

In [None]:
from fedot.api.main import Fedot
from fedot.core.data.data import InputData
from fedot.core.data.data_split import train_test_data_setup
from fedot.core.repository.tasks import Task, TaskTypesEnum, TsForecastingParams
import pandas as pd
from datetime import datetime as dt
import matplotlib.pyplot as plt

In [None]:
fed_data =df_or.copy()
normalize(fed_data, "Aktueller_Strombezug_kW")
fed_data.to_csv("fedot_data.csv", index =True)

In [None]:
def create_or_append_to_csv_file(file_path, data):
    '''
    function to append metrics to a csv file
    '''
    try:
        # try to load the file
        df = pd.read_csv(file_path)
        # save the metrics
        df.loc[len(df)] = data
        df.to_csv(file_path, index=False)
    except:
        
        # if file does not exists, make file
        df_metrics_fed = pd.DataFrame(columns=['MSE', 'RMSE', 'MAE', 'MAPE', "Time"])
        df_metrics_fed.loc[len(df_metrics_fed)] = data
        df_metrics_fed.to_csv(file_path, index=False)

In [None]:
#df_metrics_fed = pd.DataFrame(columns=['MSE', 'RMSE', 'MAE', 'MAPE', "Time"])
counter = 0

for i in seeds:
    task = Task(TaskTypesEnum.ts_forecasting,
                TsForecastingParams(forecast_length=time_horizon))
    #print(task.task_params)

    train_input = InputData.from_csv_time_series(task=task,
                                                file_path="fedot_data.csv",
                                                delimiter=',',
                                                target_column='Aktueller_Strombezug_kW'
                                                      )
    np.random.seed(i) # set seed

    train_data, test_data = train_test_data_setup(train_input)


    model = Fedot(problem='ts_forecasting', 
                  task_params=task.task_params, 
                  timeout = 15,
                  seed  =  i)
                  #with_tuning = True)
    # use pretrained model add: predefined_model = "auto"
    start_time_fed = dt.now()

    pipeline = model.fit(train_data)
    tuned = model.tune(timeout=15)
    end_time_fed = dt.now()  
    elapsed_seconds_fed = (end_time_fed - start_time_fed).total_seconds()
    elapsed_min_fed = divmod(elapsed_seconds_fed, 60)
    time_passed_fed = f"{int(elapsed_min_fed[0])}m{int(elapsed_min_fed[1])}s"
    print("Total fitting time: ", f"{int(elapsed_min_fed[0])}m{int(elapsed_min_fed[1])}s")

    pipeline.show()
    pipeline.save("plots/FEDOT/FedotPipeline" +str(i))
    #pipeline.save_plot(file_name=name)

    # Speichern Sie den Plot als Bild
    plt.savefig('pipeline_plot.png') 

    forecast = model.forecast(test_data)
    for i in range(len(forecast)):
        forecast[i] = denormalize(forecast[i], max_energy, min_energy)

   # print(model.get_metrics(metric_names=['rmse', 'mae', 'mape'], target=test_data.target))
    
    date_FEDOT = df_test["Datetime"].to_numpy()
    date_FEDOT = np.array(date_FEDOT, dtype='datetime64')
    
    te = energy_demand[-time_horizon:]   
    table = calculate_metrics(te, forecast)
    table_fed, mse_f, rmse_f, mae_f, mape_f = calculate_metrics(te, forecast)

    row_f = {"MSE": mse_f, 'RMSE': rmse_f, 'MAE': mae_f, "MAPE":mape_f, "Time":time_passed_fed}
    # save the metrics
    create_or_append_to_csv_file("plots/FEDOT/metrics.csv", row_f)

  
    # plot the stuff and safe

    plt.figure(figsize=(20, 8))
    plt.plot(date_FEDOT, forecast, label='FEDOT Forecast')
    plt.plot(date_FEDOT, te, label='Energy Demand')
    plt.xlabel('Date')
    plt.ylabel('energy demand')
   
    #Set the tick positions and labels on the x-axis
    tick_positions = ['2023-01-16', '2023-01-31', '2023-02-15', '2023-02-28', 
                    '2023-03-15', '2023-03-31', '2023-04-15']
    plt.xticks(tick_positions, tick_positions)


    plt.legend()
    plt.show()
    plt.savefig("plots/FEDOT/FEDOT" + str(i))

    counter += 1

#df_metrics_fed.to_csv('plots/FEDOT/FEDOT_metrics.csv', index=False)