In [2]:
import absl.logging

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_squared_error as mse, r2_score, mean_absolute_error as mae, mean_absolute_percentage_error as mape
from statsmodels.tsa.seasonal import seasonal_decompose

import os
import pandas as pd
import numpy as np
from tpot import TPOTRegressor

ImportError: this version of pandas is incompatible with numpy < 1.20.3
your numpy version is 1.19.5.
Please upgrade numpy to >= 1.20.3 to use this pandas version

In [186]:
folder_path = r"C:\Users\gauld\OneDrive\Documents\4th year\Thesis\pipeline"
model_directory = folder_path + r"\models"
csv_directory = folder_path + r"\csvs"

standard_scaling = 1
epochs = 2
batch_size = 1
future = 0
window = 3
split = 0.7

absl.logging.set_verbosity(absl.logging.ERROR)

In [187]:
model_directory, csv_directory

('C:\\Users\\gauld\\OneDrive\\Documents\\4th year\\Thesis\\pipeline\\models',
 'C:\\Users\\gauld\\OneDrive\\Documents\\4th year\\Thesis\\pipeline\\csvs')

In [188]:
def restitch(array, stride):
    flat = array.flatten().reshape(-1, array.shape[2]*array.shape[1])
    keep = [i for i in range(len(flat)) if not(i%(stride+1)==0 and i>0)]
    return flat[keep]

In [189]:
def create_dataset(input, win_size):
    
    np_data = input.copy()

    X = []

    for i in range(len(np_data)-win_size):
        row = [r for r in np_data[i:i+win_size]]
        X.append(row)

    X = np.array(X).astype(np.float32)
    X = X.flatten().reshape(X.shape[0], -1)

    return X


In [190]:

class FeatureScaler(BaseEstimator, TransformerMixin):

    def fit(self, X, y=None):
        return self
    
    def transform(self, X):

        if standard_scaling:
            dry = StandardScaler()
            dew = StandardScaler()
            wet = StandardScaler()
            humid = StandardScaler()
            hour = StandardScaler()
            prevWeek = StandardScaler()
            prevDay = StandardScaler()
            prev24 = StandardScaler()

        else:
            dry = MinMaxScaler(feature_range=(0,1))
            dew = MinMaxScaler(feature_range=(0,1))
            wet = MinMaxScaler(feature_range=(0,1))
            humid = MinMaxScaler(feature_range=(0,1))
            hour = MinMaxScaler(feature_range=(0,1))
            prevWeek = MinMaxScaler(feature_range=(0,1))
            prevDay = MinMaxScaler(feature_range=(0,1))
            prev24 = MinMaxScaler(feature_range=(0,1))

        X['DryBulb'] = dry.fit_transform(X[['DryBulb']])
        X['DewPnt'] = dew.fit_transform(X[['DewPnt']])
        X['WetBulb'] = wet.fit_transform(X[['WetBulb']])
        X['Humidity'] = humid.fit_transform(X[['Humidity']])
        X['Hour'] = hour.fit_transform(X[['Hour']])
        X['PrevWeekSameHour'] = prevWeek.fit_transform(X[['PrevWeekSameHour']])
        X['PrevDaySameHour'] = prevDay.fit_transform(X[['PrevDaySameHour']])
        X['Prev24HourAveLoad'] = prev24.fit_transform(X[['Prev24HourAveLoad']])

        return [dry, dew, wet, humid, hour, prevWeek, prevDay, prev24]

In [191]:
def get_metrics(predictions, actual):

    MSE = mse(actual, predictions, squared=True)
    MAE = mae(actual, predictions)
    MAPE = mape(actual, predictions)
    RMSE = mse(actual, predictions, squared=False)
    R2 = r2_score(actual, predictions)

    metrics = {'RMSE': RMSE, 'R2': R2, 'MSE': MSE, 'MAE': MAE, 'MAPE': MAPE}
    return metrics

In [192]:
def scaling(csv_directory, future):

    data = pd.read_csv(csv_directory + r"\data_" + str(future) + ".csv").drop('Date', axis=1)
    outputs = pd.read_csv(csv_directory + r"\outputs_" + str(future) + ".csv")

    pipe = Pipeline([('Scaler', FeatureScaler())])
    scalers = pipe.fit_transform(data)

    pred_dates = outputs['Date']
    actual = outputs['SYSLoad']

    if standard_scaling:
        y_scaler = StandardScaler()

    else:
        y_scaler = MinMaxScaler(feature_range=(0,1))

    y_data = y_scaler.fit_transform(outputs[['SYSLoad']])

    X_frame = np.array(data).astype(np.float32)
    y_data = np.array(y_data).astype(np.float32)

    np.save(csv_directory + r"\X_train.npy", X_frame)
    np.save(csv_directory + r"\y_train.npy", y_data)

    return X_frame, y_data, pred_dates, y_scaler, scalers

In [193]:
X_frame, y_data, pred_dates, y_scaler, scalers = scaling(csv_directory, future)
length = X_frame.shape[0]

y_train = y_data[window:int(length*split)]

X_train = create_dataset(X_frame[:int(length*split)], window)

X_train.shape, y_train.shape

    DryBulb    DewPnt   WetBulb  Humidity      Hour  Weekday  IsWorkingDay  \
0  0.688102  1.124625  1.058434  0.689912 -1.624150        6           0.0   
1  0.667613  1.170364  1.081746  0.778966 -1.551966        6           0.0   
2  0.647124  1.197807  1.093401  0.868021 -1.479781        6           0.0   
3  0.626636  1.225251  1.105057  0.957075 -1.407597        6           0.0   
4  0.667613  1.216103  1.116713  0.868021 -1.335412        6           0.0   

   PrevWeekSameHour  PrevDaySameHour  Prev24HourAveLoad  
0         -0.629116        -1.068261          -1.178185  
1         -0.832556        -1.285618          -1.183704  
2         -1.084049        -1.528614          -1.188809  
3         -1.297887        -1.748005          -1.193678  
4         -1.444502        -1.877083          -1.197902  


((61115, 30), (61115, 1))

In [194]:
model = TPOTRegressor(early_stop=5, verbosity=1, max_time_mins=180)
model.fit(X_train, y_train)
model.export(csv_directory + r"\tpot_" + str(future) + ".py")

NameError: name 'TPOTRegressor' is not defined