In [5]:
# Import statements
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

from keras import layers, models, regularizers
from tensorflow import keras
from tensorflow.keras import regularizers
from tensorflow.keras.applications import Xception
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img

from sklearn.metrics import (precision_score, recall_score, f1_score, accuracy_score, roc_curve, auc, 
                             confusion_matrix, ConfusionMatrixDisplay, classification_report)
from sklearn.preprocessing import power_transform

import tensorflow as tf
from tensorflow.keras.preprocessing import timeseries_dataset_from_array
from sklearn.model_selection import train_test_split

import PIL
import os
import cv2 as cv
import math
import winsound
from sklearn.preprocessing import LabelEncoder

os.chdir('../scripts')
from functions import impute_immediate_mean
import datetime as dt

In [2]:
# Set up alarm for notification of model completion
duration = 1000  # milliseconds
freq = 440  # Hz
winsound.Beep(freq, duration)

## Read in Data

In [389]:
# Read in data
df = pd.read_csv('../data/df_clean.csv', index_col=0, parse_dates=True)
X = df.drop(columns = 'price_tomorrow')
y = df.price_tomorrow

In [250]:
grouped = df.groupby(by=[df.index.year, df.index.month, df.index.day]).count()
grouped.loc[grouped['generation biomass']<24].index

MultiIndex([], )

## Continuous

In [251]:
X.drop(columns='diff', inplace=True)

continuous = X.select_dtypes(exclude='object').columns

# Get rid of negatives
time = dt.datetime(2021,3,23,22)
X.loc[time, 'dew_point_bilbao'] = impute_immediate_mean(X['dew_point_bilbao'], time)

# Add 0.0001 to everything
X[continuous] += .0001

# Box-Cox transformation
X[continuous] = power_transform(X[continuous], method='box-cox')

In [390]:
df.drop(columns='diff', inplace=True)

continuous = df.select_dtypes(exclude='object').drop(columns='price_tomorrow').columns

# Get rid of negatives
time = dt.datetime(2021,3,23,22)
df.loc[time, 'dew_point_bilbao'] = impute_immediate_mean(df['dew_point_bilbao'], time)

# Add 0.0001 to everything
df[continuous] += .0001

# Box-Cox transformation
df[continuous] = power_transform(df[continuous], method='box-cox')

## Categoricals

In [252]:
# Get Categorical columns
categorical = X.select_dtypes(include='object')

# Instationate LabelEncoder, fit and transform on wind_direction cols
wind_dir_coder = LabelEncoder()
wind_dir_coder.fit(X['wind_madrid'])
for col in categorical.filter(regex='wind').columns:
    X[col] = wind_dir_coder.transform(X[col])
    

# Stack condition columns into single col
stacked_conditions = categorical.filter(regex='condition').stack()

# Instantiate Label encoder, fit and transform on condition cols
condition_coder = LabelEncoder()
condition_coder.fit(stacked_conditions)
for col in categorical.filter(regex='condition').columns:
    X[col] = condition_coder.transform(X[col])

In [391]:
# Get Categorical columns
categorical = df.select_dtypes(include='object')

# Instationate LabelEncoder, fit and transform on wind_direction cols
wind_dir_coder = LabelEncoder()
wind_dir_coder.fit(df['wind_madrid'])
for col in categorical.filter(regex='wind').columns:
    df[col] = wind_dir_coder.transform(df[col])
    

# Stack condition columns into single col
stacked_conditions = categorical.filter(regex='condition').stack()

# Instantiate Label encoder, fit and transform on condition cols
condition_coder = LabelEncoder()
condition_coder.fit(stacked_conditions)
for col in categorical.filter(regex='condition').columns:
    df[col] = condition_coder.transform(df[col])

## Split Data

In [35]:
X_train, X_test, y_train, y_test = train_test_split(df.drop(columns='price_tomorrow'), df['price_tomorrow'], test_size=.3,
                                                    random_state=17)
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=.5, random_state=17)

In [253]:
array_y_train = y.loc[:'2019'].to_numpy()
array_y_val = y.loc['2020'].to_numpy()
array_X_train = X.loc[:'2019'].to_numpy()
array_X_val = X.loc['2020'].to_numpy()

In [445]:
array = df.values

In [447]:
len(df.columns)

63

In [457]:
train_X = df.loc[:'2019'].drop(columns='price_tomorrow').values
train_y = df.loc[:'2019', 'price_tomorrow'].values
val_X = df.loc['2020'].drop(columns='price_tomorrow').values
val_y = df.loc['2020', 'price_tomorrow'].values

In [461]:
input_shape = (train_X.shape[1], train_X.shape[2])
input_shape

(1, 62)

In [462]:
train_X.shape

(43800, 1, 62)

In [436]:
array_y = df_array[21]
array_y = df.drop(columns='price_tomorrow').to_numpy()

61008

In [94]:
len(array_X_train)

43800

In [92]:
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator

In [472]:
array_X_train = df.loc[:'2019'].drop(columns='price_tomorrow').values
array_y_train = df.loc[:'2019', 'price_tomorrow'].values
array_X_val = df.loc['2020'].drop(columns='price_tomorrow').values
array_y_val = df.loc['2020', 'price_tomorrow'].values

In [522]:
batch_size = 24
sequence_length = batch_size
input_shape = (batch_size, array_X_train.shape[1])

train_set = timeseries_dataset_from_array(data=array_X_train, 
                                          targets=array_y_train, 
                                          sequence_length=sequence_length, 
                                          batch_size=batch_size)
val_set = timeseries_dataset_from_array(data=array_X_val, 
                                        targets=array_y_val, 
                                        sequence_length=sequence_length, 
                                        batch_size=batch_size)
print(len(train_set))

1825


## Benchmark Neural Network
Two hidden layers

In [523]:
# Instantiate model and build layers
bm = models.Sequential()
bm.add(layers.Dense(239, activation='relu', input_shape=input_shape))
bm.add(layers.Dense(162, activation='relu'))
bm.add(layers.Dense(24, activation='relu'))

In [544]:
bm

TypeError: 'str' object is not callable

In [542]:
# Loss Metric to optimize
metric = tf.keras.metrics.MeanAbsolutePercentageError(name='mean_absolute_percentage_error')

# Create checkpoint to save model weights if this epoch's accuracy is the best so far
checkpoint = keras.callbacks.ModelCheckpoint(
    filepath='checkpoints/',
    monitor=metric.name,
    mode='min',
    save_best_only=True,
    save_weights_only=True)

# Create early stopping point
callback = keras.callbacks.EarlyStopping(patience=10)


# Compile the model
bm.compile(loss=metric.name, 
           optimizer='Adam',
           metrics=[metric.name])

# Fit the model
history = bm.fit(train_set, 
                 epochs = 2, 
                 callbacks=[checkpoint],
                 validation_data=val_set)

Epoch 1/2
Epoch 2/2


In [537]:
metric.name

'mean_absolute_percentage_error'

In [526]:
bm.load_weights('checkpoints/')
model.save('models/{}'.format(name))

NotFoundError: Unsuccessful TensorSliceReader constructor: Failed to find any matching files for checkpoints/

In [102]:
keras.

array([[ 0.60542916,  1.13491958, -0.53602249, ..., -0.60937955,
         1.98402493,  4.        ],
       [ 0.62771746,  1.13467981, -0.36416936, ..., -0.60937955,
         1.98402493,  4.        ],
       [ 0.61657714,  1.13346918, -0.52947867, ..., -0.13194659,
         2.15110321,  4.        ],
       ...,
       [ 1.66451673, -0.84885601, -0.11902295, ..., -0.60937955,
         1.00834037,  4.        ],
       [ 1.60146487, -0.84885601, -0.20472313, ..., -0.13194659,
         0.85011052,  4.        ],
       [ 1.42163019, -0.84885601, -0.46096862, ..., -0.13194659,
         1.00834037,  4.        ]])

In [111]:
X.groupby(by=[X.index.year, X.index.month, X.index.day]).count().loc[X.groupby(by=[X.index.year, X.index.month, X.index.day]).count()['generation biomass']<24]

Unnamed: 0,Unnamed: 1,Unnamed: 2,generation biomass,generation fossil brown coal/lignite,generation fossil gas,generation fossil hard coal,generation fossil oil,generation hydro pumped storage consumption,generation hydro run-of-river and poundage,generation hydro water reservoir,generation nuclear,generation other,...,wind_speeds_bilbao,pressures_bilbao,condition_bilbao,temp_valencia,dew_point_valencia,humidities_valencia,wind_valencia,wind_speeds_valencia,pressures_valencia,condition_valencia
2021,5,19,8,8,8,8,8,8,8,8,8,8,...,8,8,8,8,8,8,8,8,8,8


In [109]:
len(X.groupby(by=[X.index.year, X.index.month, X.index.day]).count())


2543