In [None]:
import tensorflow as tf
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import plotly.express as px
import numpy as np

import sys
sys.path.append('./rtaUtils')

import wandb, os
from wandb.keras import WandbCallback
os.environ["WANDB_SILENT"] = "true"

from rtaUtils import paths, experiment, data_loading

# Disable GPU
tf.config.set_visible_devices([], 'GPU')
visible_devices = tf.config.get_visible_devices()
print(visible_devices)
for device in visible_devices:
    assert device.device_type != 'GPU'

In [None]:
### Experiment setup ##########################################################
model_type   = 'LSTM'

# Features
numeric_feat = [
    'latitude', 'longitude', 'altitude', 'delay', 'vspeed', 'speed', 
    'day_of_week', 'track', 'wind_dir_degrees', 'wind_speed_kt', 
    'visibility_statute_mi', 'max_temp', 'min_temp', 'hav_distance',
    'cloud_base_ft_agl'
]
categoric_feat = [
    'time_of_day', 'operator', 'aerodromeOfDeparture', 'aircraftType', 'cloud_type', 'sky_cover'
]
objective = ['RTA']

# Data parameters
months       = '*' # 20220[123456789]
airport      = '*'
sampling     = 60
from_parquet = True # Load from parquet files

# Model configuration
lookback     = 64
lookforward  = 1
shift        = -1

n_units      = 20
act_function = 'tanh'
batch_size   = 128

epochs       = 50

In [None]:
# Auxiliar parameters
glob_text    = f'{months}-{airport}'
model_config = dict(
    n_units=n_units,
    act_function=act_function,
    batch_size=batch_size,
)
ts_features = [
    'latitude', 'longitude', 'altitude', 'vspeed', 'speed', 'track', 'hav_distance'
]
nts_features = [
    'departureDelay', 'day_of_week', 'wind_dir_degrees','wind_speed_kt', 
    'visibility_statute_mi', 'max_temp', 'min_temp', 'time_of_day', 'operator', 
    'aerodromeOfDeparture', 'sky_status', 'clouds'
]
feat_dict = dict(
    numeric=numeric_feat,
    categoric=categoric_feat,
    objective=objective,
    ts=ts_features,
    nts=nts_features
)

## Training process

In [None]:
experimento = experiment.ExperimentVanilla(
    lookback=lookback,
    sampling=sampling,
    model_config=model_config,
    months=months, 
    airport=airport,
    features=feat_dict,
    lookforward=lookforward,
    shift=shift,
)
# Si ya se ha entrenado antes, load_model() carga el Ãºltimo modelo
# experimento.load_model()
experimento.model.summary()

In [None]:
# Entrenamiento
history = experimento.train(epochs=epochs, from_parquet=from_parquet, add_callbacks = []) 
# add_callbacks = [wandCallback]

In [None]:
progress = pd.read_csv(experimento.model_path_log)
skip = 0

plt.plot([str(x) for x in range(1+skip,progress.shape[0]+1)], progress['loss'].iloc[skip:], label='loss')
plt.plot([str(x) for x in range(1+skip,progress.shape[0]+1)], progress['val_loss'].iloc[skip:], label='validation loss')
plt.title(f'{model_type}.u{n_units}lb{lookback}s{sampling}')
plt.xlabel('Epochs')
plt.ylabel('MAE')
if progress.shape[0]>10:
    plt.xticks([str(x) for x in range(1+skip,progress.shape[0],progress.shape[0]//5)])
plt.legend()
plt.tight_layout()
plt.show()

## Evaluation

In [None]:
experimento.load_model('best')

In [None]:
experimento.evaluate('val', from_parquet=from_parquet, print_err=True, original_scale = True)
experimento.evaluate('test', from_parquet=from_parquet, print_err=True, original_scale = True)

In [None]:
experimento.evaluate_at_times('val')
experimento.evaluate_at_times('test')

In [None]:
experimento.evaluate_airports()

In [None]:
# experimento.get_evaluation_results() extrae un dataframe con los resultados calculados 
experimento.get_evaluation_results('long')

## Report generation

In [None]:
# Exportar a CSV
experimento.get_evaluation_results('wide')\
           .to_csv(f'./results/{model_type}_s{sampling}_lb{lookback}_u{n_units}.csv', 
                 header=True,index=False, encoding='utf8')

## Result visualization

In [None]:
import plotly.express as px

In [None]:
report_df = pd.read_csv(f'./results/{model_type}_s{sampling}_lb{lookback}_u{n_units}.csv', 
                        header=0, encoding='utf8')

In [None]:
vis = pd.merge(
    report_df[['MAE all','RMSE all','MAPE all', 'StDev all','Mean all','dataset']],
    pd.read_csv('./data/airports.csv', sep = ','),
    left_on='dataset', right_on='id'
)

In [None]:
td = data_loading.load_final_data(months, 'test')[['aerodromeOfDeparture', 'fpId']].drop_duplicates()\
              .groupby('aerodromeOfDeparture').count()

vis = pd.merge(vis, td, left_on='dataset', right_on='aerodromeOfDeparture')

In [None]:
px.scatter_mapbox(vis, 'lat', 'lon', zoom=3.6,width=950, height=750,
                  mapbox_style="open-street-map", opacity = 1,
                  color = 'MAE all', size='fpId',
#                   range_color=(50,300),
                  # color_continuous_scale='RdYlGn_r', #'bluered' 'Portland'
                  # color_continuous_midpoint=150,
                  hover_data = {'MAE all':':.3f',
                                'RMSE all':':.3f'},
                  hover_name='dataset'
                 )

In [None]:
px.scatter_mapbox(vis, 'lat', 'lon', zoom=3.6,width=950, height=750,
                  mapbox_style="open-street-map", opacity = 1,
                  color = 'MAPE all', size='fpId',
                  # range_color=(0,0.2),
                  # color_continuous_scale='RdYlGn_r', #'bluered' 'Portland'
                  # color_continuous_midpoint=150,
                  hover_data = {'MAE all':':.3f',
                                'RMSE all':':.3f'},
                  hover_name='dataset'
                 )