In [1]:
import tensorflow as tf
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np

import sys
sys.path.append('./rtaUtils')
import wandb, os
from wandb.keras import WandbCallback
os.environ["WANDB_SILENT"] = "true"

from rtaUtils import paths, experiment, data_loading, data_preparation

# Disable GPU
tf.config.set_visible_devices([], 'GPU')
visible_devices = tf.config.get_visible_devices()
print(visible_devices)
for device in visible_devices:
    assert device.device_type != 'GPU'

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


In [2]:
### Features ##################################################################
numeric_feat   = ['latitude', 'longitude', 'altitude'] # 'vspeed', 'speed', 'track', 'hav_distance'
categoric_feat = [] #'operator'      
objective      = ['latitude', 'longitude', 'altitude']

feat_dict = dict(
    numeric=numeric_feat,
    categoric=categoric_feat,
    objective=objective
)

### Experiment setup ##########################################################
model_type   = 'LSTM'
months       = '*' # 20220[12] , 202209
airport      = '*'
glob_text    = f'{months}-{airport}'

# Configuración de los datos y el entrenamiento
lookback     = 16
lookforward  = 5
sampling     = 15
epochs       = 15
from_parquet = True # Cargar desde parquet, o los tf.data.Datasets


# Se definen todos los hiperparámetros del modelo, que se pasan a la clase Experiment
# como un diccionario (para no andar con los kwargs). El diccionario se procesa en el
# constructor.
n_units      = 10
act_function = 'tanh'
batch_size   = 128

model_config = dict(
    n_units=n_units,
    act_function=act_function,
    batch_size=batch_size,
)

## Training process

In [3]:
# Instanciación de Experiment
experimento = experiment.ExperimentTrajectory(
    lookback=lookback,
    lookforward=lookforward,
    sampling=sampling,
    model_config=model_config,
    months=months, 
    airport=airport,
    features=feat_dict
)
experimento.init_model()
# Si ya se ha entrenado antes, load_model() carga el último modelo
# experimento.load_model()

experimento.model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 10)                560       
                                                                 
 dense (Dense)               (None, 15)                165       
                                                                 
 reshape (Reshape)           (None, 5, 3)              0         
                                                                 
Total params: 725
Trainable params: 725
Non-trainable params: 0
_________________________________________________________________


In [4]:
# Entrenamiento
history = experimento.train(epochs=epochs, from_parquet=from_parquet, add_callbacks=[]) 
# add_callbacks = [wandCallback]

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089
Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


In [None]:
progress = pd.read_csv(experimento.model_path_log)
skip = 0

plt.plot([str(x) for x in range(1+skip,progress.shape[0]+1)], progress['loss'].iloc[skip:], label='loss')
plt.plot([str(x) for x in range(1+skip,progress.shape[0]+1)], progress['val_loss'].iloc[skip:], label='validation loss')
plt.title(f'{model_type}.u{n_units}lb{lookback}s{sampling}')
plt.xlabel('Epochs')
plt.ylabel('MAE')
if progress.shape[0]>10:
    plt.xticks([str(x) for x in range(1+skip,progress.shape[0],progress.shape[0]//5)])
plt.legend()
plt.tight_layout()
plt.show()

## Evaluation

In [None]:
# Cargamos el mejor modelo
experimento.load_model('best')

In [None]:
# experimento.evaluate() evalúa el modelo sobre los conjuntos de validación y test
# Para deshabilitar la impresión por pantalla, cambiar a print_err=False 
experimento.evaluate(from_parquet=from_parquet, print_err=True)

In [None]:
# experimento.get_evaluation_results() extrae un dataframe con los resultados calculados 
experimento.get_evaluation_results('long')

## Report generation

In [None]:
# Exportar a CSV
experimento.get_evaluation_results('wide')\
           .to_csv(f'./results/{model_type}_s{sampling}_lb{lookback}_lf{lookforward}_u{n_units}.csv', 
                 header=True,index=False, encoding='utf8')

## Visualization

In [None]:
data = data_loading.load_final_data('202201','test',sampling=15).sort_values(['fpId','timestamp'])
trajectories = (data.groupby(['fpId','aerodromeOfDeparture']).count().vectorId.reset_index()
                    .sort_values(['aerodromeOfDeparture', 'vectorId']))
trajectories[trajectories.aerodromeOfDeparture == 'LEBL'].head(10)

In [None]:
# Evaluamos una o más trayectorias
fpId_trajectory = ['AT05788200',] #'AT05486226'
data = data[data.fpId.isin(fpId_trajectory)]

In [None]:
# Preparación de las ventanas de las trayectorias
windows = data_preparation.get_windows(data, lookback, experimento.encoders, experimento.scaler, features = feat_dict)
windows = experimento._format_data(windows)

predictions = experimento.model.predict(windows.batch(batch_size))

In [None]:
df_viz = data[['latitude','longitude','altitude']].copy()
# Desescalamos los datos
pred_unsc = experimento.scaler.inverse_transform(
    np.concatenate([np.zeros((predictions.shape[0],len(numeric_feat))),
                    predictions.reshape((predictions.shape[0],len(objective)))],axis=1)
    )[:,-len(objective):]
# Unimos las predicciones con los valores reales para representarlas en el mapa
df_viz = pd.concat([df_viz, pd.DataFrame(pred_unsc, columns=df_viz.columns)], axis=0)
df_viz['real'] = 'real'
df_viz.iloc[-len(predictions):,-1] = 'predicho'

# Asignamos a cada predicción el mismo valor en la columna "index" que el del
# último vector de la ventana que la originó (para alinearlos en los perfiles
# de altitud, longitud y latitud)
# Ojo: si la trayectoria está "a trozos", casca
df_viz = df_viz.reset_index()
df_viz.iloc[-pred_unsc.shape[0]:,0] = df_viz.iloc[lookback-1:-pred_unsc.shape[0],0].values

df_viz

In [None]:
px.scatter_mapbox(df_viz, 'latitude', 'longitude', zoom=6.5,width=900, height=500,
                  mapbox_style="open-street-map", opacity = 1,
                  color ='real'
                 )

In [None]:
px.scatter(df_viz, x = 'index', y='altitude', width=600, height=400,
                  opacity = 1, color ='real', title='Altitud')

In [None]:
px.scatter(df_viz, x = 'index', y='longitude', width=600, height=400,
                  opacity = 1, color ='real', title='Longitud')

In [None]:
px.scatter(df_viz, x = 'index', y='latitude', width=600, height=400,
                  opacity = 1, color ='real', title='Latitud')