In [None]:
%matplotlib tk

# TIME SERIES ANALYSIS

# Data loading

In [None]:
import Load_forecasting as lf
import seaborn as sns
import matplotlib.pyplot as plt
sns.set()

stations_to_drop = \
[(-51.317, -59.6),(53.5, -19.5),(47.5, -8.5), (54.017,1.1), (53.7,1.133), 
(59.733,1.667),(58.033,1.400), (57.283,1.650),(61.2, 1.1),(60.6 ,1.0),
(59.5, 1.5),(58.3, 0.4),(57.883, 0.033),(57.6, 1.7),(57.2, 1.2),
(57.2, 0.5),(54.0, 0.7),(53.833, 2.917),(53.5, 2.2),(53.4, 1.7),
(53.0, 2.1),(53.0, 1.7),(49.9, -2.9), (60.15, -2.067), (60.117, -2.067)]


df = lf.Load_Forecaster()
#df.attach_load(filename="/media/jonathan/DATA/HW/Project/DATA/NG_DATA/DATA/", location="UK")
df.attach_load(filename="/media/jonathan/DATA/HW/Project/DATA/NY_Data/DATA/zones/MHK VL.csv", location="NEW_YORK")
#df.attach_load(filename="/media/jonathan/DATA/HW/Project/DATA/NY_Data/DATA/zones/N.Y.C..csv", location="NEW_YORK")

df.attach_weather(filepath_stations="METAR_DATA/isd_stations_uk.txt",
                  filespath_data=["METAR_DATA/isd_2005-2010_uk.txt", "METAR_DATA/isd_2011-2018_uk.txt"],
                  variables=['W_Spd', 'Air_Temp', 'RHx ', 'Dew_temp'],
                  drop_stations=stations_to_drop)

"""df.attach_weather(filepath_stations="METAR_DATA/isd_stations_ny.txt",
                  filespath_data=['METAR_DATA/isd_2001-2018_ny.txt'],
                  variables=['W_Spd', 'Air_Temp', 'RHx ', 'Dew_temp'],
                  drop_stations=[])"""

df.process_data()
load = df.preprocessors['Load'].get_data()

# Weather variable plotting function  

In [None]:
import pandas as pd
def plot_weather_var(data, label):
    data = pd.DataFrame(data.mean(axis=1))
    data.index = pd.to_datetime(data.index)
    month_names = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
    ax = data.boxplot(by=data.index.month, column=0, fontsize=13, rot=90)
    ax.set_ylabel(label, fontsize=13)
    ax.set_xlabel("Month", fontsize=13)
    ax.set_title("Average " + label + " by month", fontsize=13)
    plt.suptitle("")
    locs, _ = plt.xticks()
    plt.xticks(locs, month_names)
    plt.tight_layout()

# Air temp mean plot

In [None]:
air_temp = df.preprocessors["Weather"].pivot_data['Air_Temp']
plot_weather_var(air_temp, 'Air temperature (Â°C)')

# Wind speed temp mean plot

In [None]:
wind_speed = df.preprocessors["Weather"].pivot_data['W_Spd']
plot_weather_var(wind_speed, 'Wind speed (km/h)')

# Humidity

In [None]:
humidity = df.preprocessors["Weather"].pivot_data['RHx ']
plot_weather_var(humidity, 'Humidity')

# Simple load / demand plot

In [None]:
load['TS'].plot()

# Load / demand boxplot per month

In [None]:
%matplotlib tk
from matplotlib.patches import Patch
month_names = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
ax = data.boxplot(by='Month', column='TS', showfliers=False, rot=90, patch_artist=True, fontsize=15)

# Box coloring
for m in range(len(month_names)):
    if m in [5,6,7,8]:
        ax.findobj(Patch)[m].set_facecolor("gray")
    else:
        ax.findobj(Patch)[m].set_facecolor("white")
        
ax.set_ylabel("Load (MW)", fontsize=15)
ax.set_xlabel("Month", fontsize=15)
ax.set_title("Load per month", fontsize=15)
plt.suptitle("")
locs, labels = plt.xticks()
plt.tight_layout()
plt.xticks(locs, month_names)

# Change rate boxplot per hour

In [None]:
load['Change Percentage'] = load['TS'].pct_change() * 100
load['Time of day'] = load.index.time
load['Month'] = load.index.month

In [None]:
ax = load.boxplot(by='Time of day', column='Change Percentage', showfliers=False, rot=90)
ax.set_ylabel("Change percentage")
ax.set_title("Change percentage throughout the day")
plt.suptitle("")

# Change rate boxplot per season

In [None]:
load['Season'] = (load.index.month%12 + 3)//3
seasons = {1:'Winter', 2:'Spring', 3:'Summer', 4:'Fall'}
load['Season'] = data['Season'].apply(lambda x: seasons[x])
ax = load.boxplot(by='Season', column='Change Percentage', showfliers=False, rot=90)
ax.set_ylabel("Change Percentage")
ax.set_title("Change percentage by season")
plt.suptitle("")

# Change rate boxplot per month

In [None]:
from matplotlib.patches import Patch
month_names = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']
ax = load.boxplot(by='Month', column='Change Percentage', showfliers=False, rot=90, patch_artist=True)

# Box coloring
for m in range(len(month_names)):
    if m in [0,1,2,9,10,11,12]:
        ax.findobj(Patch)[m].set_facecolor("gray")
    else:
        ax.findobj(Patch)[m].set_facecolor("white")
        
ax.set_ylabel("Change Percentage")
ax.set_title("Change Percentage by month")
plt.suptitle("")
locs, labels = plt.xticks()
plt.xticks(locs, month_names)

# Change rate histogram

In [None]:
ax = load[['Change Percentage']].plot.hist(density=True, bins=25)
#data[['Change Percentage']].plot.hist(density=True, bins=25, ylim=(0,0.001))
ax.set_title("Frequency of change percentage")
ax.set_xlabel("Change Percentage")
plt.suptitle("")

---

# BENCHMARKS 

In [None]:
benchmark_dataset = 'data_UK_weather'

---

# Benchmark : RNN Structure [OK] DONE

In [None]:
# Dataset : 'data_UK_weather'
# Model : <benchmark>
# Modified settings : None

In [None]:
%matplotlib tk

In [None]:
import Benchmarker
#Benchmarker.benchmark_RNN_structure(benchmark_dataset, '1-4', '1X-3X', 1, ['LSTM', 'GRU'], run_count=1)
Benchmarker.plot_structure_bench_RNN_Dense(db="results_cluster.db", RNN=True)

---

# Benchmark : Dense structure [OK] DONE

In [None]:
%matplotlib tk
import Benchmarker
#Benchmarker.benchmark_Dense_structure(benchmark_dataset, '1-1', '1X-3X', 1, run_count=1)
Benchmarker.plot_structure_bench_RNN_Dense(db="results_cluster.db", Dense=True)

---

# Benchmark : CNN structure [OK] DONE

In [None]:
%matplotlib tk
import Benchmarker
#Benchmarker.benchmark_CNN_structure(benchmark_dataset, '1-6', '1024-1024', 1, kernel_size=3, last_layer_type='Conv1D', run_count=2)
Benchmarker.plot_structure_bench_CNN(database="results_cluster.db")

---

# Benchmark : Weather vs no weather [OK] DONE

In [None]:
import Benchmarker
datasets = ['data_UK_base', 'data_UK_weather', 'data_NYC_base', 'data_NYC_weather', 'data_MHK_base', 'data_MHK_weather']
#Benchmarker.benchmark_datasets(datasets, run_count=3, flag="weather_variables", verbose=0)
Benchmarker.plot_benchmark("weather_variables", mode='barplot')

---

# Benchmark : Load propagation [OK] DONE

In [None]:
%matplotlib tk
import Benchmarker
datasets = ['data_UK_base', 'data_UK_no_weather_more_historical', 'data_UK_no_weather_no_historical']
#Benchmarker.benchmark_datasets(datasets, run_count=3, flag="historical_ts", verbose=0)
Benchmarker.plot_benchmark("historical_ts", mode='barplot', xlabel="Load propagation", split_labels_line=True, rot=0, secondary_y="ran_epochs", secondary_y_label="Epochs")

---

# Benchmark : Optimizers [OK] DONE

In [None]:
import Benchmarker
Benchmarker.benchmark_variable(benchmark_dataset, var_name='optimizer', decompose=False, 
    var_list=[
                #{'optimizer_name':'SGD', 'decay':0, 'momentum':0.0, 'nesterov':False},
                #{'optimizer_name':'SGD', 'decay':1e-6, 'momentum':0.0, 'nesterov':False},
                #{'optimizer_name':'SGD', 'decay':1e-6, 'momentum':0.9, 'nesterov':False},
                #{'optimizer_name':'SGD', 'decay':1e-6, 'momentum':0.9, 'nesterov':True},
                {'optimizer_name':'Adam'}, 
                #{'optimizer_name':'Adadelta'}, 
                {'optimizer_name':'RMSprop'},
                #{'optimizer_name':'Adagrad'},
                {'optimizer_name':'Nadam'},
                #{'optimizer_name':'Adamax'}
               ], override_method='_override_optimizer', run_count=3, verbose=0, model='CNN')
Benchmarker.plot_benchmark('optimizer', 'detailed_table', rot=30, secondary_y='ran_epochs', secondary_y_label='epochs')

---

# Benchmark : Learning rate   [OK] DONE

In [None]:
import Benchmarker
#Benchmarker.benchmark_variable(benchmark_dataset, var_name="lr", var_range=[0.0001, 0.02, 10], override_method='_override_optimizer', run_count=3, verbose=1)
#Benchmarker.benchmark_variable(benchmark_dataset, var_name="lr", var_range=[0.035, 0.1, 3], override_method='_override_optimizer', run_count=3, verbose=1)
#Benchmarker.benchmark_variable("data_UK_weather_categorical_time", var_name="lr", var_list=[0.0005, 0.002, 0.005], override_method='_override_optimizer', run_count=3, verbose=1)
Benchmarker.plot_benchmark('lr', 'table', secondary_y='ran_epochs', secondary_y_label='Epochs')

# Benchmark : Time encoding [OK] DONE

In [None]:
%matplotlib tk
import Benchmarker
datasets = ['data_UK_weather_categorical_time', 'data_UK_weather']
#Benchmarker.benchmark_datasets(datasets, flag="time_encoding", run_count=3, verbose=0)
Benchmarker.plot_benchmark('time_encoding', 'barplot')

---

# Benchmark : Early stopping [OK] DONE

In [None]:
%matplotlib tk
import Benchmarker
#Benchmarker.benchmark_variable(benchmark_dataset, var_name='early_stopping', var_list=[True,False], override_method='_override_training_settings', run_count=3, verbose=0)
Benchmarker.plot_benchmark('early_stopping', mode='barplot')

---

# Benchmark : Reduce learning rate on plateau [OK]

In [None]:
%matplotlib tk
import Benchmarker
#Benchmarker.benchmark_variable(benchmark_dataset, var_name='reduce_lr_plateau', var_list=[True,False], override_method='_override_training_settings', run_count=3, verbose=0)
Benchmarker.plot_benchmark('reduce_lr_plateau', 'table')

---

# Benchmark : Training years  [OK]

<font size=4>Note : Not a very relevant test.</font>
<br>
<font size=3>A better one would be to forecast a given year, given the last N years as training data.</font>

In [None]:
import Benchmarker
train_test_list = [(train, 2) for train in range(1,8+1)]
#Benchmarker.benchmark_variable(benchmark_dataset, var_name='train_test', var_list=train_test_list, override_method='_override_model_data_settings', run_count=3, verbose=0)
Benchmarker.plot_benchmark('train_test', mode='table')

---

# Benchmark : Standardizers [OK] DONE

### Note : A variant of the QuantileTransformer is not tested as it requires an argument, which is not supported for now.

In [None]:
%matplotlib tk
import Benchmarker
Benchmarker.benchmark_variable(benchmark_dataset, var_name='stdz', var_list=['QuantileTransformer', 'Normalizer', 'MaxAbsScaler', 'RobustScaler', 'MinMaxScaler', 'StandardScaler'], override_method='_override_standardizer', run_count=3, verbose=0)
Benchmarker.plot_benchmark('stdz', mode='boxplot', rot=30, secondary_y="")

---

# Benchmark : Epoch count [OK] DONE

In [None]:
import Benchmarker
early_override={'method':'_override_training_settings', 'variable':'early_stopping', 'value':False}
#Benchmarker.benchmark_variable(benchmark_dataset, var_name="epochs", var_range=[20, 300, 5], override_method='_override_training_settings', run_count=3, verbose=0, early_override=early_override)
Benchmarker.plot_benchmark('epochs', mode='lineplot')

---

# Benchmark : Load time series corrections [OK]

In [None]:
import Benchmarker
datasets = ['data_UK_weather', 'data_UK_weather_basic_load_corrections']
#Benchmarker.benchmark_datasets(datasets, run_count=3, flag="fixes", verbose=0)
Benchmarker.plot_benchmark("fixes", mode='barplot')

---

# Benchmark : More / less (desired) weather stations [OK] DONE

In [None]:
import Benchmarker
datasets = ['data_UK_weather_less_stations'] #['data_UK_weather', 'data_UK_weather_less_stations']
#Benchmarker.benchmark_datasets(datasets, run_count=6, flag="weather_variables", verbose=0)
Benchmarker.plot_benchmark('weather_variables', mode='boxplot', secondary_y="")

---

# Benchmark : Weather variables [OK] DONE

In [None]:
%matplotlib tk
import Benchmarker
datasets = ['data_UK_weather', 'data_UK_weather_air_only', 'data_UK_weather_wind_only', 'data_UK_weather_humidity_only']
#Benchmarker.benchmark_datasets(datasets, run_count=3, flag="weather_variables", verbose=0)
Benchmarker.plot_benchmark('weather_variables', mode='table', rot=0, secondary_y="")

---

# Benchmark : Batch size [OK] DONE

In [None]:

\subsubsection{Model complexity comparison}

Gpu usage
training time
trainable params
...
%matplotlib tk
import Benchmarker
#Benchmarker.benchmark_variable(benchmark_dataset, var_name="batch_size", var_list=[8, 32, 64, 128, 256, 512], override_method='_override_training_settings', run_count=3, verbose=0)
Benchmarker.plot_benchmark('batch_size', mode='lineplot', rot=0)

---

# Benchmark : CPU vs GPU [OK]

In [None]:
%matplotlib tk
import Benchmarker
#Benchmarker.benchmark_variable(benchmark_dataset, var_name="use_gpu", var_list=[True,False], override_method='_override_training_settings', run_count=2, verbose=0)
Benchmarker.plot_benchmark('use_gpu', mode='barplot', rot=0, merge_models=True, xlabel="GPU vs CPU training")

---

# Benchmark custom models

---

# Plot selected models

# Benchmark : Backend [NOK]  
#### Run manually and switch backend by starting python like so : "KERAS_BACKEND=backend python3"
##### The var_list parameter in the "benchmark_variable" call has no effect (backed detected automatically), but make sure its lenght is 1.
##### The floating point precision is set to 32 as CNTK complains about it being 64 (saying it is slower, which is very true) - and 32 being the recommended setting anyway.

In [None]:
%matplotlib tk
import Benchmarker
early_override={'method':'_override_model_data_settings', 'variable':'float_precision', 'value':32}
#Benchmarker.benchmark_variable(benchmark_dataset, var_name="backend", var_list=['cntk','tensorflow'], override_method='_override_training_settings', run_count=1, verbose=0, model='GRU')

import Load_forecasting
import matplotlib.pyplot as plt
df = Load_forecasting.Load_Forecaster()
res = df.load_results("backend_benchmark")[['testing_MAPE', 'training_MAPE', 'training_time', 'model_type','backend']]
res.loc[res['model_type'] == 'CuDNNLSTM', 'model_type'] = 'LSTM'  # CNTK cannot use CuDNNLSTM
res.loc[res['model_type'] == 'CuDNNGRU', 'model_type'] = 'GRU'
fig, axes = plt.subplots(1, 3, sharex=True, sharey=True)

for n, (model, grp) in enumerate(res.groupby('model_type')):
    grp.groupby("backend").mean().plot.bar(fontsize=13, rot=0, ax=axes[n], secondary_y="training_time")
    axes[n].set_title("Results for location UK\nModel : {0}".format(model), fontsize=13)
    axes[n].set_xlabel("Backend", fontsize=13)
    axes[n].set_ylabel("MAPE", fontsize=13)
    axes[n].right_ax.set_ylabel('Seconds', fontsize=13)
    

---

# Benchmark : Floating point precision [OK] DONE

In [None]:
%matplotlib tk
import Benchmarker
Benchmarker.benchmark_variable(benchmark_dataset, var_name="float_precision", var_list=[32,64], override_method='_override_model_data_settings', run_count=3, verbose=0)
Benchmarker.plot_benchmark('float_precision', mode='table', rot=0)

# Benchmark models

In [None]:
%matplotlib tk
import Load_forecasting as lf
from keras.layers import CuDNNLSTM, Dropout, Dense, CuDNNGRU, Convolution1D
from keras.models import Sequential
df = lf.Load_Forecaster()
df.load_data("data_MHK_weather_categorical_time")
X_train_shape = df.get_train_data_shape(RNN=True)

#### LSTM ####

# DONE (UK, NYC, MHK)
lstm1 = Sequential([
            CuDNNLSTM(X_train_shape[2]*2, input_shape=X_train_shape[1:3], return_sequences=True),
            CuDNNLSTM(X_train_shape[2]*2, return_sequences=True),
            CuDNNLSTM(X_train_shape[2]*2, return_sequences=True),
            CuDNNLSTM(X_train_shape[2]*2, return_sequences=True),
            Dense(1, activation='linear')])

# DONE (UK, NYC, MHK)
lstm2 = Sequential([
            CuDNNLSTM(X_train_shape[2]*4, input_shape=X_train_shape[1:3], return_sequences=True),
            CuDNNLSTM(X_train_shape[2]*4, return_sequences=True),
            CuDNNLSTM(X_train_shape[2]*4, return_sequences=True),
            Dense(1, activation='linear')])


#### GRU ####

# DONE (UK, NYC, MHK)
gru1 = Sequential([
            CuDNNGRU(X_train_shape[2]*2, input_shape=X_train_shape[1:3], return_sequences=True),
            CuDNNGRU(X_train_shape[2]*2, return_sequences=True),
            CuDNNGRU(X_train_shape[2]*2, return_sequences=True),
            Dense(1, activation='linear')])

# DONE (UK, NYC, MHK)
gru2 = Sequential([
            CuDNNGRU(X_train_shape[2]*4, input_shape=X_train_shape[1:3], return_sequences=True),
            CuDNNGRU(X_train_shape[2]*4, return_sequences=True),
            CuDNNGRU(X_train_shape[2]*4, return_sequences=True),
            Dense(1, activation='linear')])


#### CNN ####
# DONE (UK, NYC, MHK)
cnn1 = Sequential([Convolution1D(1024, 3, activation='relu', padding="same", input_shape=X_train_shape[1:3]),
                   Convolution1D(1024, 3, activation='relu', padding="same"),
                   Convolution1D(1,    3, activation='linear', padding="same")])

# DONE (UK, NY, MHK)
cnn2 = Sequential([Convolution1D(1024, 3, activation='relu', padding="same", input_shape=X_train_shape[1:3]),
                   Convolution1D(1024, 3, activation='relu', padding="same"),
                   Convolution1D(1024, 3, activation='relu', padding="same"),
                   Convolution1D(1024, 3, activation='relu', padding="same"),
                   Convolution1D(1,    3, activation='linear', padding="same")])



df._override_training_settings(batch_size=64,  
                              reduce_lr_plateau=False, 
                              early_stopping=True,
                              epochs=200,
                              use_gpu=True,
                              backend='tensorflow', # No effect
                              lossplot=False)

df._override_optimizer(lr=0.002, optimizer_name='Adam')
df._override_model_data_settings(float_precision=64)
df.db['flag'] = 'models_benchmark'

######
#df.db['flag'] = 'multimodels_plot2_benchmark'
#df.db['save_detailed_results'] = True
#####

models = [lstm1, lstm2, gru1, gru2, cnn1, cnn2]

for model in models:
    for _ in range(3):
        df.train_model(model, RNN=True)
        df.predict_load(graph=False, store=True)#, plot_groupby='month')

# Plot Benchmark models

In [None]:
import Benchmarker
Benchmarker.plot_models_benchs2('UK')

---