#### Imports and random seeds

In [3]:
import numpy as np
import os
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import tensorflow_model_optimization as tfmot
import tensorflow.lite as tflite

In [34]:
#setup a random seed
seed = 42

tf.random.set_seed(seed)
np.random.seed(seed)

In [35]:
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
# Make sure we don't get any GPU errors
physical_devices = tf.config.list_physical_devices("GPU")
tf.config.experimental.set_memory_growth(physical_devices[0], True)

#### Data downloading and train-test-val split

In [36]:
zip_path = tf.keras.utils.get_file(
    origin="https://storage.googleapis.com/tensorflow/tf-keras-datasets/jena_climate_2009_2016.csv.zip", 
    fname='jena_climate_2009_2016.csv.zip', 
    extract=True, 
    cache_dir='.', cache_subdir='data')

csv_path, _ = os.path.splitext(zip_path)
df = pd.read_csv(csv_path)
df.head(5)

Unnamed: 0,Date Time,p (mbar),T (degC),Tpot (K),Tdew (degC),rh (%),VPmax (mbar),VPact (mbar),VPdef (mbar),sh (g/kg),H2OC (mmol/mol),rho (g/m**3),wv (m/s),max. wv (m/s),wd (deg)
0,01.01.2009 00:10:00,996.52,-8.02,265.4,-8.9,93.3,3.33,3.11,0.22,1.94,3.12,1307.75,1.03,1.75,152.3
1,01.01.2009 00:20:00,996.57,-8.41,265.01,-9.28,93.4,3.23,3.02,0.21,1.89,3.03,1309.8,0.72,1.5,136.1
2,01.01.2009 00:30:00,996.53,-8.51,264.91,-9.31,93.9,3.21,3.01,0.2,1.88,3.02,1310.24,0.19,0.63,171.6
3,01.01.2009 00:40:00,996.51,-8.31,265.12,-9.07,94.2,3.26,3.07,0.19,1.92,3.08,1309.19,0.34,0.5,198.0
4,01.01.2009 00:50:00,996.51,-8.27,265.15,-9.04,94.1,3.27,3.08,0.19,1.92,3.09,1309.0,0.32,0.63,214.3


In [37]:
column_indices = [2,5]
columns = df.columns[column_indices]
data = df[columns].values.astype(np.float32)

data[:5]

array([[-8.02, 93.3 ],
       [-8.41, 93.4 ],
       [-8.51, 93.9 ],
       [-8.31, 94.2 ],
       [-8.27, 94.1 ]], dtype=float32)

In [38]:
n = len(data)
train_data = data[0:int(n*0.7)]
val_data = data[int(n*0.7):int(n*0.9)]
test_data = data[int(n*0.9):]

In [39]:
mean = train_data.mean(axis=0)
std = train_data.std(axis=0)

###

#### Window generator and Multiple-Output_MAE

In [40]:
class WindowGenerator:
    def __init__(self, input_width, label_width, num_features, mean, std):
        self.input_width = input_width
        self.label_width = label_width
        self.num_features = num_features
        self.mean = tf.reshape(tf.convert_to_tensor(mean), [1, 1, 2])
        self.std = tf.reshape(tf.convert_to_tensor(std), [1, 1, 2])

    def split_window(self, features):
        # features -> set of sequences made of input_width + label_width values each. [#batch, (input+label)_width, 2] 
        inputs = features[:, :-self.label_width, :]
        labels = features[:, -self.label_width:, :]

        inputs.set_shape([None, self.input_width, self.num_features])
        labels.set_shape([None, self.label_width, self.num_features])
        
        return inputs, labels

    def normalize(self, features):
        features = (features - self.mean) / (self.std + 1.e-6)

        return features

    def preprocess(self, features):
        inputs, labels = self.split_window(features)
        inputs = self.normalize(inputs)

        return inputs, labels

    def make_dataset(self, data, reshuffle):
        # Creates a dataset of sliding windows over a timeseries provided as array
        ds = tf.keras.preprocessing.timeseries_dataset_from_array(
                data=data, # consecutive data points
                targets=None, # None -> the dataset will only yield the input data
                sequence_length=self.input_width + self.label_width, # Length of the output sequences
                sequence_stride=1, # Period between successive output sequences
                batch_size=32) # Number of timeseries samples in each batch 
        
        # from each set of sequences it splits data to get input and labels and then normalize
        ds = ds.map(self.preprocess)

        # so the mapping is done only once
        ds = ds.cache()
        if reshuffle:
            ds = ds.shuffle(100, reshuffle_each_iteration=True)

        return ds

In [41]:
class MultiOutputMAE(tf.keras.metrics.Metric):

    def __init__(self, name='mean_absolute_error', **kwargs):
        super().__init__(name=name, **kwargs)
        self.total = self.add_weight('total', initializer='zeros', shape=(2,))
        self.count = self.add_weight('count', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None): 
        error = tf.abs(y_pred - y_true)
        error = tf.reduce_mean(error, axis=[0,1])
        self.total.assign_add(error)
        self.count.assign_add(1.)
        return
    
    def reset_state(self):
        self.count.assign(tf.zeros_like(self.count))
        self.total.assign(tf.zeros_like(self.total))
    
    def result(self):
        result = tf.math.divide_no_nan(self.total, self.count)
        return result

In [204]:
input_width = 6
label_width = 6# 3 or 9
num_features = 2

generator = WindowGenerator(input_width, label_width, num_features, mean=mean, std=std)
train_ds = generator.make_dataset(train_data, reshuffle=True)
val_ds = generator.make_dataset(val_data, reshuffle=False)
test_ds = generator.make_dataset(test_data, reshuffle=False)

In [43]:
mlp = tf.keras.Sequential([
    tf.keras.layers.Flatten(), 
    tf.keras.layers.Dense(units=128, activation='relu'), 
    tf.keras.layers.Dense(units=128, activation='relu'), 
    tf.keras.layers.Dense(units=num_features*label_width),
    tf.keras.layers.Reshape([6, 2])
])

In [44]:
loss = tf.losses.MeanSquaredError()
optimizer = tf.optimizers.Adam()
metrics = [MultiOutputMAE()] 

In [45]:
model = mlp
model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

In [49]:
MODEL = 'mlp'
ID = 100

In [46]:
model.fit(train_ds, epochs=20, validation_data=val_ds)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x22e71650f88>

In [47]:
print(model.summary())

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_2 (Flatten)         (None, 12)                0         
                                                                 
 dense_6 (Dense)             (None, 128)               1664      
                                                                 
 dense_7 (Dense)             (None, 128)               16512     
                                                                 
 dense_8 (Dense)             (None, 12)                1548      
                                                                 
 reshape_2 (Reshape)         (None, 6, 2)              0         
                                                                 
Total params: 19,724
Trainable params: 19,724
Non-trainable params: 0
_________________________________________________________________
None


In [79]:
MODEL_FILE_NAME = f'{MODEL}_{ID}_model.h5'

if not os.path.exists(f'./models/{MODEL}_{ID}/'):
    os.makedirs(f'./models/{MODEL}_{ID}/')

saved_model_dir = os.path.join(f'./models/{MODEL}_{ID}/', MODEL_FILE_NAME)

model.save(saved_model_dir)

print('File size: ' + str(round(os.path.getsize(saved_model_dir)/1024, 4)) + ' Kilobytes')

File size: 265.875 Kilobytes


In [80]:
loss, error = model.evaluate(test_ds)
print('Error: ', error)

Error:  [0.45298815 1.8760308 ]


#### save test dataset

In [205]:
# tf.data.experimental.save(train_ds, './th_train')
# tf.data.experimental.save(val_ds, './th_val')
tf.data.experimental.save(test_ds, './th_test')

#### save TF Lite Model

In [175]:
MODEL_FILE_NAME = f'{MODEL}_{ID}_model.tflite'

saved_model_dir = os.path.join(f'./models/{MODEL}_{ID}/', MODEL_FILE_NAME)

tf_lite_converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = tf_lite_converter.convert()

open(saved_model_dir, 'wb').write(tflite_model)

INFO:tensorflow:Assets written to: C:\Users\juanm\AppData\Local\Temp\tmp3640ywgl\assets


INFO:tensorflow:Assets written to: C:\Users\juanm\AppData\Local\Temp\tmp3640ywgl\assets


81932

In [176]:
print('File size: ' + str(round(os.path.getsize(saved_model_dir)/1024, 4)) + ' Kilobytes')

File size: 80.0117 Kilobytes


#### On the edge side

In [4]:
# input_width = 6
# label_width = 6# 3 or 9
# num_features = 2

In [11]:
# MODEL = 'mlp'
# ID = 100
# MODEL_FILE_NAME = f'{MODEL}_{ID}_model.tflite'
# saved_model_dir = os.path.join(f'./models/{MODEL}_{ID}/', MODEL_FILE_NAME)

TEST_DIR = './th_test'

In [12]:
tensor_specs = (tf.TensorSpec([None, input_width, num_features], dtype=tf.float32),
 tf.TensorSpec([None,label_width, num_features]))

test_ds = tf.data.experimental.load(TEST_DIR, tensor_specs)

In [13]:
test_ds = test_ds.unbatch().batch(1)
test_ds

<BatchDataset shapes: ((None, 6, 2), (None, 6, 2)), types: (tf.float32, tf.float32)>

In [15]:
interpreter = tf.lite.Interpreter(model_path=saved_model_dir)
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
interpreter.allocate_tensors()

In [37]:
error = 0
for i,(input, labels) in enumerate(test_ds):
    interpreter.set_tensor(input_details[0]['index'], input)
    interpreter.invoke()
    tflite_model_predictions = interpreter.get_tensor(output_details[0]['index'])
    mae = np.mean(np.abs(tflite_model_predictions - labels), axis=1)

    error = error + mae

error_temp = error[0,0]/(i+1)
error_hum = error[0,1]/(i+1)

print('T MAE: ', error_temp)
print('Rh MAE:', error_hum)


0.4529724012219051
1.8758937596622667
