## Model Train 1 - Tesis Javier-Uriel

### Importamos algunas librerías que nos serán útiles más adelante

In [1]:
import os
import json
import random
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import csv

import tensorflow as tf
from tensorflow.keras import models, layers
assert (tf.__version__=='2.4.1'), 'Versión incorrecta de Tensorflow, por favor instale 2.4.1'
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from IPython.display import clear_output

pd.set_option('display.max_columns', None) #Para mostrar todas las columnas

import gc #garbage collector
import gc; gc.enable()

Num GPUs Available:  1


In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

1 Physical GPUs, 1 Logical GPUs


### Leemos el Dataset

In [3]:
#Dataset solo movimientos en Z
rpm_list = ['RPM0', 'RPM1', 'RPM2', 'RPM3']
states_list_org = ["vz","az", "uvz", 
                    "p", "q",
                    "wp", "wq", 
                    "ap", "aq"]
dataset_name = "/Dataset_Z10_Alle"
directory = "../logs/Datasets"
ORDER = 3
states_list=states_list_org.copy()

In [4]:
path = directory+"_train"+dataset_name
if not os.path.exists(path):
    try:
        os.mkdir(path)
    except OSError:
        print ("Creation of the directory %s failed" % path)
    else:
        print ("Successfully created the directory %s " % path)
else:
    print(f"{path} already exist")

Successfully created the directory ../logs/Datasets_train/Dataset_Z10_Alle 


In [5]:
def pandas_read():
    dfs = []
    global states_list
    # reading train data
    for filename in os.listdir(directory+dataset_name):
        if not filename.endswith(".csv"):
            continue
        df = pd.read_csv(os.path.join(directory+dataset_name, filename))
        a = []
        ## Desplazamos estados anteriores        
        for n in range(1,ORDER+1):
            for column in states_list:
                df[column+str(n)] = df[column].shift(periods=n, fill_value=0)
                a.append(column+str(n))
        dfs.append(df)
    states_list+=a       

    return pd.concat(dfs)

In [6]:
if not os.path.isfile(path+"/train"+".csv"):
    dataset = pandas_read()
else:
    for column in states_list_org:
        for n in range(1,ORDER+1):
            states_list.append(column+str(n))

### Estados repetidos

En este caso se eliminan estados repetidos y estados que se encuentren en estado transitorio mientras el dron despega o se estabiliza antes de introducir la señal de control.

In [None]:
if not os.path.isfile(path+"/train"+".csv"):
    shape_b4 = dataset.shape
    dataset = dataset.drop(["timestamps"], axis=1).drop_duplicates()
    shape_drop = dataset.shape
    print(f'shape_drop={shape_drop}')
    print(f'len (b4 drop) - len = {shape_b4[0]-shape_drop[0]}')

### División del dataset en estados y acciones

In [None]:
if not os.path.isfile(path+"/train"+".csv"):
    actions = dataset[rpm_list]
    actions.describe()

#### Normalización de acciones

In [None]:
def normalize_df(df, K=21666.4475, B=14468.4292):
    df_norm = (actions-B)/K
    return df_norm, K, B

In [None]:
if not os.path.isfile(path+"/train"+".csv"):
    actions, K, B = normalize_df(actions)
    dataset = dataset.drop(columns=rpm_list)

#### Definimos los estados

In [None]:
if not os.path.isfile(path+"/train"+".csv"):
    states = dataset[states_list]
    print(f'columns = {states.columns}')
    print(f'shape = {states.shape}')
    dataset = pd.concat([states, actions], axis=1)
    #states.head()

#### Guardamos el dataset en un nuevo archivo .csv

In [None]:
if not os.path.isfile(path+"/train"+".csv"):
    dataset_test = dataset.sample(frac =.10, random_state = 10)
    dataset_test.to_csv(path+"/test.csv", index=False)
    dataset = pd.merge(dataset,dataset_test, indicator=True, how='outer').query('_merge=="left_only"').drop('_merge', axis=1)
    del dataset_test
    dataset_val = dataset.sample(frac =.20, random_state = 10)
    dataset_val.to_csv(path+"/validation.csv", index=False)
    dataset = pd.merge(dataset,dataset_val, indicator=True, how='outer').query('_merge=="left_only"').drop('_merge', axis=1)
    del dataset_val
    dataset.sample(frac =1, random_state = 42).to_csv(path+"/train.csv", index=False)
    del dataset
    del states, actions

### Creamos dataset de Tensorflow a partir de csv
Esto es con el fin de no cargar todo el dataset.

In [None]:
tf.compat.v1.enable_eager_execution()

In [None]:
class YieldReadCSV:
    def __init__(self, file_path=path, file="/train.csv", skip_rows=1, batch_size=512):
        self.file_path = file_path
        self.file = file
        self.skip_rows = skip_rows
        self.batch_size = batch_size
        
    def read_csv(self):
        inputs = []
        batchcount = 0
        with open(self.file_path+self.file, 'r') as csvfile:
            data = csv.reader(csvfile, delimiter=',')
            for index, row in enumerate(data):
                if index < self.skip_rows:
                    continue
                batchcount += 1
                inputs.append([float(i) for i in row])
                if batchcount >= self.batch_size:
                    yield inputs
                    inputs = []
                    batchcount = 0
            
def XY_split(x):
    labels = x[:,len(states_list):]
    features = x[:, :len(states_list)]
    return features, labels

In [None]:
BATCH_SIZE_TRAIN = 1024
BATCH_SIZE_VAL = 1024

In [None]:
train_generator = YieldReadCSV(file="/train.csv", batch_size = BATCH_SIZE_TRAIN)
dataset_train = tf.data.Dataset.from_generator(train_generator.read_csv,
                                        output_types = tf.float32,
                                        output_shapes = (None,None,))
dataset_train = dataset_train.map(XY_split)

In [None]:
val_generator = YieldReadCSV(file="/validation.csv",  batch_size = BATCH_SIZE_VAL)
dataset_val = tf.data.Dataset.from_generator(val_generator.read_csv,
                                        output_types=tf.float32,
                                        output_shapes = (None,None,))
dataset_val = dataset_val.map(XY_split)

In [None]:
test_generator = YieldReadCSV(file="/test.csv", batch_size = BATCH_SIZE_VAL)
dataset_test = tf.data.Dataset.from_generator(test_generator.read_csv,
                                        output_types=tf.float32,
                                        output_shapes = (None,None,))
dataset_test = dataset_test.map(XY_split)

## Keras Model

## Callbacks

#### Early Stopping

In [None]:
Early_Stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15)

#### Plotting

In [None]:
class PlotLosses(tf.keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.i = 0
        self.x = []
        self.losses = []
        self.val_losses = []
        
        self.fig = plt.figure()
        
        self.logs = []

    def on_epoch_end(self, epoch, logs={}):
        
        self.logs.append(logs)
        self.x.append(self.i)
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))
        self.i += 1
        
        clear_output(wait=True)
        plt.plot(self.x, self.losses, label="loss")
        plt.plot(self.x, self.val_losses, label="val_loss")
        plt.yscale('log')
        plt.legend()
        plt.show();
        
plot_losses = PlotLosses()

#### Definición del Modelo

In [None]:
inputs = tf.keras.Input(shape=(len(states_list),), name='inputs')
x = tf.keras.layers.Dense(4*len(states_list), activation=tf.nn.relu)(inputs)
x = tf.keras.layers.Dense(2*len(states_list), activation=tf.nn.relu)(x)
outputs = tf.keras.layers.Dense(len(rpm_list), activation=tf.nn.tanh)(x)
model = tf.keras.Model(inputs=inputs, outputs=outputs)

#### Compilado el Modelo

In [None]:
metrics = ['mean_squared_error']
opt = tf.keras.optimizers.Adam(learning_rate=0.0025)
model.compile(loss='mean_squared_error', optimizer=opt, metrics=metrics)
model.summary()

#### Entrenamiento del Modelo

In [None]:
%%time
LEN_DT = 200000
EPOCHS =  250

history = model.fit(dataset_train.repeat(), 
                    epochs=EPOCHS, 
                    steps_per_epoch = LEN_DT//BATCH_SIZE_TRAIN,
                    callbacks=[Early_Stopping, plot_losses], 
                    verbose=1,
                    validation_data = dataset_val.repeat(),
                    validation_steps= (LEN_DT*0.2)//BATCH_SIZE_VAL)

#### Evaluación del Modelo

In [None]:
loss, mean_sq = model.evaluate(dataset_test, 
                               steps = LEN_DT//BATCH_SIZE_VAL
                              )
K = 21666.4475 #Ganancia del actuador
B = 14468.4292
print(f'mean_sq: {mean_sq} -> {(mean_sq)*K} RPM')
print(f'loss: {loss} -> {loss*K} RPM')

#### Se guarda el Modelo

In [None]:
I = 2
model.save(f'../Models/{dataset_name}_{I}.h5')

In [None]:
model = tf.keras.models.load_model(f'../Models/{dataset_name}_{I}.h5')

In [None]:
%%time
x_test = [0]*len(states_list)
x_test[0] = 0
x_test[1] = 0
x_test[2] = 0
print(model.predict([list(x_test)]))
print(model.predict([list(x_test)])*K+B)

In [None]:
# %%time
# for index, sample in dataset_test.take(1):
#     for i in sample:
#         print(model.predict([list(i)])*K['K'])