In [1]:
import pandas as pd
import datetime 
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import os 
from datetime import datetime
import time

In [2]:
from helpers import prepare_features

In [3]:
# Save current directory
current_directory = os.getcwd()

# Set print options to suppress scientific notation and show 3 decimal places
np.set_printoptions(suppress=True, precision=5)
pd.options.display.float_format = '{:.5f}'.format

# Suppress all warnings globally
import warnings
warnings.filterwarnings("ignore")

In [4]:
file_path = os.path.join(current_directory, 'data_augmented/X.csv')
X = pd.read_csv(file_path, index_col = 0)

file_path = os.path.join(current_directory, 'data_augmented/timestamps.csv')
timestamps = pd.read_csv(file_path, index_col = 0)

In [5]:
df = X 

In [6]:
df['timestamp'] = timestamps
df.set_index("timestamp", inplace=True)

df.index = pd.to_datetime(df.index)
df = df.asfreq('H')  # 'H' for hourly frequency

In [8]:
# Step 1: Train-Test Split
train_test_split_ratio = 0.95
train_size = int(len(df) * train_test_split_ratio)  # Determine initial train size based on x%
initial_test_start = train_size  

while df.index[initial_test_start].hour != 11: # Adjust test start to align with the next occurrence of 11 AM
    initial_test_start += 1

final_test_end = len(df) - 1
while df.index[final_test_end].hour != 10: # Adjust test end to align with the last 10 AM in the dataset
    final_test_end -= 1

train = df.iloc[:initial_test_start]
test = df.iloc[initial_test_start:final_test_end+1]  # Include the last index

In [9]:
# Standardize data
from sklearn.preprocessing import StandardScaler
columns_to_scale = ['power_consumption', 'temp']

# Train data
scaler_train = StandardScaler()
scaled_train = pd.DataFrame(
    scaler_train.fit_transform(train[columns_to_scale]),
    columns=columns_to_scale
)

means_train = pd.DataFrame(columns = columns_to_scale)
means_train.loc[0] = scaler_train.mean_
stds_train = pd.DataFrame(columns = columns_to_scale)
stds_train.loc[0] = scaler_train.scale_

train[columns_to_scale] = scaled_train.values

# Test data
scaler_test = StandardScaler()
scaled_test = pd.DataFrame(
    scaler_test.fit_transform(test[columns_to_scale]),
    columns=columns_to_scale
)

means_test = pd.DataFrame(columns = columns_to_scale)
means_test.loc[0] = scaler_test.mean_
stds_test = pd.DataFrame(columns = columns_to_scale)
stds_test.loc[0] = scaler_test.scale_

test[columns_to_scale] = scaled_test.values

In [10]:
# save train and test data
file_path = os.path.join(current_directory, 'data_augmented/train.csv')
train.to_csv(file_path)
file_path = os.path.join(current_directory, 'data_augmented/test.csv')
test.to_csv(file_path)
file_path = os.path.join(current_directory, 'data_augmented/means_train.csv')
means_train.to_csv(file_path)
file_path = os.path.join(current_directory, 'data_augmented/means_test.csv')
means_test.to_csv(file_path)
file_path = os.path.join(current_directory, 'data_augmented/stds_train.csv')
stds_train.to_csv(file_path)
file_path = os.path.join(current_directory, 'data_augmented/stds_test.csv')
stds_test.to_csv(file_path)

In [11]:
# Step 2: Prepare input data to NN
include_forecast = False

target_col = 'power_consumption'
exog_cols = ['temp'] # [col for col in df.columns if col not in target_col]
window_length = 168  # 7 days
forecast_horizon = 24  # Next 24 hours

X_train, y_train, timestamps_train = prepare_features(train, target_col, exog_cols, window_length, forecast_horizon, include_forecast)
X_test, y_test, timestamps_test = prepare_features(test, target_col, exog_cols, window_length, forecast_horizon, include_forecast)

In [12]:
# Save input data to NN
file_path = os.path.join(current_directory, 'data_augmented/X_train.npy')
np.save(file_path, X_train)
file_path = os.path.join(current_directory, 'data_augmented/y_train.npy')
np.save(file_path, y_train)
file_path = os.path.join(current_directory, 'data_augmented/timestamps_train.csv')
timestamps_train.to_series().to_csv(file_path, index=False) 
file_path = os.path.join(current_directory, 'data_augmented/X_test.npy')
np.save(file_path, X_test)
file_path = os.path.join(current_directory, 'data_augmented/y_test.npy')
np.save(file_path, y_test)
file_path = os.path.join(current_directory, 'data_augmented/timestamps_test.csv')
timestamps_test.to_series().to_csv(file_path, index=False) 

In [13]:
# Step 2: Prepare input data to NN
include_forecast = True

target_col = 'power_consumption'
exog_cols = ['temp'] # [col for col in df.columns if col not in target_col]
window_length = 168  # 7 days
forecast_horizon = 24  # Next 24 hours

X_train, y_train, timestamps_train = prepare_features(train, target_col, exog_cols, window_length, forecast_horizon, include_forecast)
X_test, y_test, timestamps_test = prepare_features(test, target_col, exog_cols, window_length, forecast_horizon, include_forecast)

In [14]:
# Save input data to NN
file_path = os.path.join(current_directory, 'data_augmented/X_train_include_forecast.npy')
np.save(file_path, X_train)
file_path = os.path.join(current_directory, 'data_augmented/X_test_include_forecast.npy')
np.save(file_path, X_test)

In [15]:
errors = pd.DataFrame(columns = ['RMSE', 'MAE', 'ME', 'MAPE'])
file_path = os.path.join(current_directory, 'results/errors.csv')
errors.to_csv(file_path)

In [16]:
uncertainty_evaluation = pd.DataFrame(columns = ['PICP', 'PINAW'])
file_path = os.path.join(current_directory, 'results/uncertainty_evaluation.csv')
uncertainty_evaluation.to_csv(file_path) 