# Use Difference as additional input

This file is for using the difference as an input. However, we also take the input voltage into account. We therefore use input voltage and a delayed difference as input. The delay of the difference can be chosen.

We select a delay of 60 - meaning 1 minute. We figured that worst case, our model learns the the output purely based on the difference. With a smaller window of 60 seconds we would only overshoot our prediction for 1 minute. As always we need a trade off between the best possible outcome and uncertainty for the future.

## Preliminaries

### Import Libraries

In [1]:
# own libraries
import Global_Functions as gf
import Neuronal_Networks as nn

# python provided libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import time
timestr = time.strftime("%Y-%m-%d_%H-%M/")

# ML Libraries
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from keras.callbacks import EarlyStopping

In [2]:
# set seed for reprocudible results
np.random.seed(123)

### Open files

In [3]:
OPEN_FOLDER = '../Data/Preped_Data/'

In [4]:
ex_1 = gf.open_CSV_file('experiment_1_short.csv', OPEN_FOLDER)
ex_4 = gf.open_CSV_file('experiment_4_short.csv', OPEN_FOLDER)
ex_9 = gf.open_CSV_file('experiment_9_short.csv', OPEN_FOLDER)
ex_20 = gf.open_CSV_file('experiment_20_short.csv', OPEN_FOLDER)
ex_21 = gf.open_CSV_file('experiment_21_short.csv', OPEN_FOLDER)
ex_22 = gf.open_CSV_file('experiment_22_short.csv', OPEN_FOLDER)
ex_23 = gf.open_CSV_file('experiment_23_short.csv', OPEN_FOLDER)
ex_24 = gf.open_CSV_file('experiment_24_short.csv', OPEN_FOLDER)

In [5]:
experiments = [ex_1, ex_4, ex_9, ex_20, ex_21, ex_22, ex_23, ex_24]
names = ['1', '4', '9', '20', '21','22', '23', '24']

In [6]:
OPEN_SYNTHETIC = '../Data/Synthetic_Data/Without_Noise/Sample_Ratio_1/quadratic_40_0_linear_1'

In [7]:
experiments = gf.load_synthetic(OPEN_SYNTHETIC, length = 200)
names = [str(x+1) for x in range(len(experiments)-1)]
names.append("hand")

### Specify parameters

In [8]:
NEUR = 512
EPOCH = 200
LAG = 60
DIFFERENCE = 'add_scaled'
BATCH_SIZE = 512

In [9]:
train = "150"
val = "30"

train_exs = experiments[:150]
ex_train = gf.use_multiple_experiments(train_exs)
ex_val = gf.use_multiple_experiments(experiments[150:180])
ex_test = gf.use_multiple_experiments(experiments[180:200])
# ex_train = ex_20
# ex_val = ex_21

In [10]:
# saving folder for images
image_path = "../Images/Validation/Synthetic/Difference/" + str(DIFFERENCE) + "/" + timestr
image_folder = image_path

gf.check_folder(image_folder)

Creation of directory ../Images/Validation/Synthetic/Difference/add_scaled/2021-05-28_11-31/ successful.


In [11]:
# saving folder for images
model_path = "../Models/Validation/Synthetic/Difference/" + str(DIFFERENCE) + "/" + timestr
model_folder = model_path

gf.check_folder(model_folder)

Creation of directory ../Models/Validation/Synthetic/Difference/add_scaled/2021-05-28_11-31/ successful.


In [12]:
specs = {
    'neurons': NEUR,
    'epochs': EPOCH,
    'lag': LAG,
    'difference': DIFFERENCE,
    'batch_size': BATCH_SIZE,
    'model': "LSTM(Neur), Dense(Neur/2), Dense(Out)",
    'data': 'Synthetic - (40, 0, 1)',
    'data_points': '150 for training - 30 for validation',
    'sampling_rate': 1,
    'loss_function': 'Adagrad'
}

specs_str = ""
for cat, descr in specs.items():
    specs_str += f'{cat}: {descr} \n'

with open(image_folder + "00_specs.txt", "w") as text_file:
    text_file.write(specs_str)

## Training the model

In [13]:
model, history, scaler_train, X_train, y_train, _, X_val, y_val = nn.train_model(ex_train, ex_val,
                                                   difference_chosen = DIFFERENCE, lag_chosen = LAG,
                                                   save_folder = model_folder,
                                                   batch_size = BATCH_SIZE,
                                                   nmb_epochs = EPOCH, neurons_chosen = NEUR)

Folder already exists.


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['difference'] = diffs_scaled
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['difference'] = diffs_scaled


Epoch 10 of 200 is done.
Epoch 20 of 200 is done.
Epoch 30 of 200 is done.
Epoch 40 of 200 is done.
Epoch 50 of 200 is done.
Epoch 60 of 200 is done.
Epoch 70 of 200 is done.
Epoch 80 of 200 is done.


KeyboardInterrupt: 

In [None]:
all_scaler = list()
all_X_values = list()
all_y_values = list()
all_predictions = list()
all_scaled_predictions = list()
results = pd.DataFrame(index = names, columns = ['rmse', 'r2', 'mae', 'maxae', 'rmse_scaled', 'r2_scaled', 'mae_scaled', 'maxae_scaled'])

for i in range(len(experiments)):
    rms, r2, mae, maxae = [], [], [], []
    rms_scaled, r2_scaled, mae_scaled, maxae_scaled = [], [], [], []
    scaler, X, y, preds_scaled, preds = nn.predictions(experiments[i], model,
                                                       difference_chosen = DIFFERENCE,
                                                      lag_chosen = LAG,
                                                      batch_size = BATCH_SIZE)
    all_scaler.append(scaler)
    all_X_values.append(X)
    all_y_values.append(y)
    all_predictions.append(preds)
    all_scaled_predictions.append(preds_scaled)
    
    gf.create_prediction_plot(experiments[i]['el_power'], preds, image_folder,
                              title = 'Using Difference as Intermediate Variable ',
                             specs = 'on ex_{0} with model trained on {1}'.format(names[i], train))

    results_ex = gf.measure_difference(experiments[i]['el_power'], preds, should_print = False)
    results_scaled = gf.measure_difference(y, preds_scaled, should_print = False)
    rms.append(results_ex['RMSE'][0])
    r2.append(results_ex['R2'][0])
    mae.append(results_ex['MAE'][0])
    maxae.append(results_ex['MaxAE'][0])
    rms_scaled.append(results_scaled['RMSE'][0])
    r2_scaled.append(results_scaled['R2'][0])
    mae_scaled.append(results_scaled['MAE'][0])
    maxae_scaled.append(results_scaled['MaxAE'][0])
                   
    results.loc[names[i], 'rmse'] = rms
    results.loc[names[i], 'r2'] = r2
    results.loc[names[i], 'mae'] = mae
    results.loc[names[i], 'maxae'] = maxae
    results.loc[names[i], 'rmse_scaled'] = rms_scaled
    results.loc[names[i], 'r2_scaled'] = r2_scaled
    results.loc[names[i], 'mae_scaled'] = mae_scaled
    results.loc[names[i], 'maxae_scaled'] = maxae_scaled
    
    results.to_csv(image_folder + "99_results.csv", sep = "|", encoding = 'utf-8')

In [None]:
gf.save_losses(history, image_folder)