# Get some toy data:

In [196]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Model, layers
import numpy as np
import pandas as pd
import datetime
import importlib
from tensorflow.keras.utils import plot_model
%load_ext tensorboard


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [215]:
data = pd.read_csv('..\\Datasets\\Kabetogama\\MIX_dataset.csv', delimiter=',')
#process the data to get the number of minutes since start:
start = datetime.datetime.strptime(data['DateTime'][0], "%m/%d/%Y %H:%M")

data['DateTime'] = [(datetime.datetime.strptime(el, "%m/%d/%Y %H:%M")-start).total_seconds()/60
                    for el in data['DateTime']]



In [216]:
#normalize all data:
data = (data-data.mean())/data.std()

In [218]:
data.head()

Unnamed: 0,DateTime,ELISA_MC+ANA+SXT,TP_mgL_LAG,Plank_mcyE_cp100mL_LAG,anaC_cp100mL_LAG,sxtA_cp100mL_LAG,WindDirInst_deg,LkLevelChg14day
0,-0.903046,-0.583794,-0.053502,-0.259385,-0.39974,-0.493282,1.639099,1.978509
1,-0.902916,-0.583794,0.083683,-0.2597,-0.373148,-0.485208,1.186809,1.468409
2,-0.86566,-0.583794,0.495238,-0.252598,-0.345243,-0.489533,0.463145,-1.83024
3,-0.86553,-0.583794,0.769609,-0.2597,-0.322262,-0.459257,-0.260519,-1.388153
4,-0.865389,-0.293376,0.906794,-0.2597,-0.333753,-0.34752,-1.617389,-0.214922


In [219]:
#create a transformer that only predict 1D time series 1 step in advance,using the 10 previous measures 
#(this is not necessary, but allows to process batchs of data):
import transformers as tr
importlib.reload(tr)
input_shape = (10,2)
inputs = layers.Input(input_shape)
#n_inputs = layers.BatchNormalization()(inputs)
trans = tr.transformer(input_shape=input_shape,
    head_size=6,
    num_heads=6,
    ff_dim=20,
    num_transformer_blocks=6,
    mlp_units=[10,10,10],
    n_out = 1,
    dropout=0.2,
    mlp_dropout=0.3
)
#out = trans(n_inputs)
model = trans#Model(inputs, out)

In [220]:
#plot_model(model, show_shapes=True, show_layer_names=False)
model.summary()

Model: "model_8"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_11 (InputLayer)           [(None, 10, 2)]      0                                            
__________________________________________________________________________________________________
multi_head_attention_36 (MultiH (None, 10, 2)        398         input_11[0][0]                   
                                                                 input_11[0][0]                   
__________________________________________________________________________________________________
dropout_90 (Dropout)            (None, 10, 2)        0           multi_head_attention_36[0][0]    
__________________________________________________________________________________________________
layer_normalization_72 (LayerNo (None, 10, 2)        4           dropout_90[0][0]           

In [221]:

def input_pipeline(ds):
    ds = ds.shuffle(1000)
    ds = ds.batch(10)
    return ds
#select the time series of interest:
serie = data[['DateTime','ELISA_MC+ANA+SXT']]
n_sample = 10
n_outputs = 1
context = [serie.iloc[i:n_sample+i][:].to_numpy() for i in range(len(serie)-n_sample-n_outputs+1)]
if n_outputs>1:
    target = [serie.iloc[n_sample+i:n_sample+i+n_outputs-1][:].to_numpy()    for i in range(len(serie)-n_sample-n_outputs+1)]
else:
    target = [serie.iloc[n_sample+i][:].to_numpy()    for i in range(len(serie)-n_sample-n_outputs+1)]
    #use the first half as training:
dt = tf.data.Dataset.from_tensor_slices((context[0:20],target[0:20]))
dt = input_pipeline(dt)
dv = tf.data.Dataset.from_tensor_slices((context[20:],target[20:]))
dv = input_pipeline(dv)


In [222]:
model.compile(optimizer = 'Adam',
              loss = tf.keras.metrics.mean_squared_error)

In [223]:
savebest = keras.callbacks.EarlyStopping(
    monitor="val_loss",
    mode="min",
    patience=2000,
    restore_best_weights=True)
%tensorboard --logdir logs

Reusing TensorBoard on port 6006 (pid 12524), started 0:21:12 ago. (Use '!kill 12524' to kill it.)

In [224]:
log_dir = 'logs/norm'
h = model.fit(dt,validation_data = dv,epochs=100000,verbose=0,callbacks = [savebest,keras.callbacks.TensorBoard(log_dir=log_dir)])

KeyboardInterrupt: 