In [163]:
import os, shutil
import numpy as np
import pandas as pd
import datetime as dt
from keras import models, layers, optimizers
from keras import preprocessing
import tensorflow as tf

from sklearn.model_selection import train_test_split
from keras_preprocessing.sequence import pad_sequences
from tqdm import tqdm
from keras.preprocessing import sequence
from keras.preprocessing.text import Tokenizer
from keras import backend as K
from keras.models import Sequential
from keras.layers import Flatten, Dense, Embedding, SimpleRNN, LSTM, GRU
from keras.optimizers import RMSprop

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme(style='whitegrid')

from plotly.subplots import make_subplots
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import init_notebook_mode
init_notebook_mode(connected = True)

from utils import plot_val_acc_loss

##  Advanced use of recurrent neural networks

In [2]:
data_dir = r'C:\Users\Cyril\Downloads\jena_climate'
fname = os.path.join(data_dir, 'jena_climate_2009_2016.csv')

In [108]:
parser = lambda date: pd.datetime.strptime(date, '%d.%m.%Y %H:%M:%S')
df = pd.read_csv(fname, parse_dates=['Date Time'], date_parser=parser)
df.set_index('Date Time', inplace=True)
float_data = df.to_numpy()


The pandas.datetime class is deprecated and will be removed from pandas in a future version. Import from datetime module instead.



In [112]:
px.line(df.loc['2009-01-01 00:10:00':'2009-01-11 00:00:00', 'T (degC)'])

In [116]:
mean = float_data[:200000].mean(axis=0)
float_data -= mean
std = float_data[:200000].std(axis=0)
float_data /= std

In [120]:
def generator(data, lookback, delay, min_index, max_index,
              shuffle=False, batch_size=128, step=6):
    if max_index is None:
        max_index = len(data) - delay - 1
    i = min_index + lookback
    while 1:
        if shuffle:
            rows = np.random.randint(
                min_index + lookback, max_index, size=batch_size)
        else:
            if i + batch_size >= max_index:
                i = min_index + lookback
            rows = np.arange(i, min(i + batch_size, max_index))
            i += len(rows)
        samples = np.zeros((len(rows),
                            lookback // step,
                            data.shape[-1]))
        targets = np.zeros((len(rows),))
        for j, row in enumerate(rows):
            indices = range(rows[j] - lookback, rows[j], step)
            samples[j] = data[indices]
            targets[j] = data[rows[j] + delay][1]
        yield samples, targets

In [133]:
lookback = 1440
step = 6
delay = 144
batch_size = 128

train_gen = generator(float_data,
                    lookback=lookback,
                    delay=delay,
                    min_index=0,
                    max_index=200000,
                    shuffle=True,
                    step=step,
                    batch_size=batch_size)
val_gen = generator(float_data,
                    lookback=lookback,
                    delay=delay,
                    min_index=200001,
                    max_index=300000,
                    step=step,
                    batch_size=batch_size)
test_gen = generator(float_data,
                    lookback=lookback,
                    delay=delay,
                    min_index=300001,
                    max_index=None,
                    step=step,
                    batch_size=batch_size)

In [134]:
val_steps = (300000 - 200001 - lookback)
test_steps = (len(float_data) - 300001 - lookback)

In [141]:
val_steps

98559

In [135]:
def evaluate_naive_method():
    batch_maes = []
    for step in tqdm(range(val_steps)):
        samples, targets = next(val_gen)
        preds = samples[:, -1, 1]
        mae = np.mean(np.abs(preds - targets))
        batch_maes.append(mae)
    print(np.mean(batch_maes))
evaluate_naive_method()

100%|██████████| 98559/98559 [15:11<00:00, 108.15it/s]


0.2896994197960971


In [137]:
model = Sequential()
model.add(layers.Flatten(input_shape=(lookback // step, float_data.shape[-1])))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dense(1))

In [144]:
model.compile(optimizer=RMSprop(), loss='mae')
history = model.fit(train_gen,
            steps_per_epoch=500,
            epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
model = Sequential()
model.add(GRU(32, input_shape=(None, float_data.shape[-1])))
model.add(Dense(1))
model.compile(optimizer=RMSprop(), loss='mae')
history = model.fit(train_gen,
                    steps_per_epoch=500,
                    epochs=20)

#### Bidirectional RNN

In [165]:
from keras.models import Sequential
from keras import layers
from keras.optimizers import RMSprop
model = Sequential()
model.add(layers.Bidirectional(
    layers.GRU(32), input_shape=(None, float_data.shape[-1])))
model.add(layers.Dense(1))
model.compile(optimizer=RMSprop(), loss='mae')
history = model.fit_generator(train_gen,
                              steps_per_epoch=500,
                              epochs=2)

Epoch 1/2



`Model.fit_generator` is deprecated and will be removed in a future version. Please use `Model.fit`, which supports generators.



Epoch 2/2
