<a href="https://colab.research.google.com/github/CrowdSalat/ann-dynamical-systems/blob/master/baseline_model_scaled_100_epochs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# overview

Uses actions and an input state to predict the output state.

# imports

In [0]:
try:
  # %tensorflow_version only exists in Colab.
  %tensorflow_version 2.x
except Exception:
  pass

TensorFlow 2.x selected.


In [0]:
import pandas as pd
import numpy as np
import os
import sys
import datetime
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


import tensorflow as tf
print(tf.__version__)
from tensorflow import keras

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import *
from tensorflow.keras import metrics

2.0.0


In [0]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 1437889407055152046
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 8298921637337825557
physical_device_desc: "device: XLA_CPU device"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 2784346573654397340
physical_device_desc: "device: XLA_GPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 11330115994
locality {
  bus_id: 1
  links {
  }
}
incarnation: 14365543338747744948
physical_device_desc: "device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7"
]


# mount drive

In [0]:
file = 'dataset_all.csv'

In [0]:
from google.colab import drive
drive.mount('/content/drive')

root_dir = '/content/drive/My Drive/xplane_data' 
os.chdir(root_dir)
print ('working dir: ' + os.getcwd())

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive
working dir: /content/drive/My Drive/xplane_data


# load and prepare data

## define features (actions & states)
actions and states at time t are the input for the network.

the states at time t + 1 are the desired output of the network.


In [0]:
col_m_time      = '   missn,_time '
col_t_time      = '   _totl,_time '
col_yaw_v       = '   ____Q,rad/s '

col_yaw_deg     = '   _beta,__deg '
col_roll_deg    = '   _roll,__deg '
col_roll_v      = '   ____R,rad/s '
col_pitch_deg   = '   pitch,__deg '
col_pitch_v     = '   ____P,rad/s '
col_alt         = '   __alt,ftmsl '
col_vv          = '   __VVI,__fpm '
col_v           = '   Vtrue,_ktas '
col_rpm_prop    = '   rpm_1,_prop '
col_throttle     = '   thro1,_part '

cols_state = [ 
    col_yaw_v,
    col_yaw_deg,
    col_roll_deg,
    col_roll_v,
    col_pitch_deg,
    col_pitch_v,
    col_vv,
    col_alt,
    col_v,
    col_rpm_prop,
    col_throttle,
]

cols_time = [
    col_m_time,
    col_t_time,
]

col_elev_servo                  = '   _elev,servo '
col_ailrn_servo                  = '   ailrn,servo '
col_ruddr_servo                  = '   ruddr,servo '

cols_action = [
    col_elev_servo,
    col_ailrn_servo,
    col_ruddr_servo
]

# all input columns
cols_features = []
cols_features.extend(cols_state)
cols_features.extend(cols_action)

# orga column
col_dataset_index = 'dataset_index'
col_maneuver_index = 'maneuver_index'
col_maneuver_name = 'maneuver_name'
col_maneuver_datapoint_index = 'maneuver_datapoint_index'
col_maneuver_type = 'maneuver_type'


label_prefix = 't+1_'
cols_label = [label_prefix + col_state for col_state in cols_state]
print(cols_label)


['t+1_   ____Q,rad/s ', 't+1_   _beta,__deg ', 't+1_   _roll,__deg ', 't+1_   ____R,rad/s ', 't+1_   pitch,__deg ', 't+1_   ____P,rad/s ', 't+1_   __VVI,__fpm ', 't+1_   __alt,ftmsl ', 't+1_   Vtrue,_ktas ', 't+1_   rpm_1,_prop ', 't+1_   thro1,_part ']


## load data

In [0]:
def load_xplane_csv(csv_path, columns):
    return pd.read_csv(csv_path, delimiter=',', usecols = columns, 
                       float_precision='high')

df_features = load_xplane_csv(file, cols_features)

df_labels = load_xplane_csv(file, cols_label)

print(len(df_features))
print(len(df_labels))
assert(len(df_features) == len(df_labels))


380538
380538


## drop nan in features and labels

drop nan rows in both features and in labels

In [0]:
df_nan = df_labels.isna().any(axis=1)

for i in range(0, len(df_nan)):
  if df_nan[i] == True:
    df_features.drop(index=i, inplace=True)

df_labels.dropna(inplace=True)

print(len(df_features))
print(len(df_labels))

380074
380074


In [0]:
def split_train_test(dataset):
  train_size = int(len(dataset) * 0.67)
  test_size = len(dataset) - train_size
  train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
  return train, test


## split data

In [0]:
features = df_features.values
labels = df_labels.values

In [0]:
train_X, test_X, train_Y, test_Y  = train_test_split(features,labels, test_size=0.2, random_state=42)

In [0]:
print(train_X.shape)
print(train_Y.shape)
print(test_X.shape)
print(test_Y.shape)

(304059, 14)
(304059, 11)
(76015, 14)
(76015, 11)


## scale data

In [0]:
feature_scaler = MinMaxScaler()
feature_scaler.fit(train_X)

train_X = feature_scaler.transform(train_X)
test_X = feature_scaler.transform(test_X)


In [0]:
label_scaler = MinMaxScaler()
label_scaler.fit(train_Y)

train_Y = label_scaler.transform(train_Y)
test_Y = label_scaler.transform(test_Y)

In [0]:
# create scaled df
scaled_df_features = pd.DataFrame(feature_scaler.transform(df_features), columns=df_features.columns)
scaled_df_labels = pd.DataFrame(label_scaler.transform(df_labels), columns=df_labels.columns)

In [0]:
df_features.describe()

Unnamed: 0,"Vtrue,_ktas","__VVI,__fpm","____Q,rad/s","____P,rad/s","____R,rad/s","pitch,__deg","_roll,__deg","_beta,__deg","__alt,ftmsl","thro1,_part","rpm_1,_prop","_elev,servo","ailrn,servo","ruddr,servo"
count,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0
mean,116.225465,27.637401,0.00465,-3.9e-05,-0.002778,-0.48756,-1.185574,0.029893,2931.960049,0.79608,2435.271756,-2.5e-05,-0.015737,0.0
std,8.16991,268.5921,0.007307,0.019294,0.026371,1.707656,9.746198,0.143593,1320.636415,7.405197e-14,69.824073,0.002403,0.02495,0.0
min,66.42478,-1917.88818,-0.03945,-0.10981,-0.05482,-9.31351,-23.85152,-0.68997,891.35071,0.79608,2106.25781,-0.08979,-0.15974,0.0
25%,115.113407,-14.60334,4e-05,-0.00057,-0.01355,-1.04743,-4.991227,-0.04978,1891.84167,0.79608,2417.97101,-0.00011,-0.01836,0.0
50%,117.76655,0.292975,0.00119,0.0,-4e-05,-0.7584,-0.14822,0.021815,2917.68347,0.79608,2444.87085,0.0,-0.01537,0.0
75%,119.523077,99.568948,0.01087,0.00054,0.00628,-0.126985,2.13732,0.095687,3992.383425,0.79608,2469.170777,5e-05,-0.01301,0.0
max,155.43665,978.53833,0.04165,0.11131,0.0554,11.79617,25.92492,0.35355,5080.88818,0.79608,2795.22119,0.0451,0.1249,0.0


In [0]:
scaled_df_features.describe()

Unnamed: 0,"Vtrue,_ktas","__VVI,__fpm","____Q,rad/s","____P,rad/s","____R,rad/s","pitch,__deg","_roll,__deg","_beta,__deg","__alt,ftmsl","thro1,_part","rpm_1,_prop","_elev,servo","ailrn,servo","ruddr,servo"
count,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0
mean,0.559483,0.671694,0.543773,0.496434,0.472162,0.4181,0.455358,0.689841,0.487073,0.0,0.477549,0.665467,0.505912,0.0
std,0.091785,0.092733,0.090095,0.087258,0.239259,0.080894,0.195801,0.137604,0.315222,0.0,0.101347,0.017812,0.087655,0.0
min,-1e-06,-1.4e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.54699,0.65711,0.48693,0.49403,0.374433,0.391578,0.378903,0.613491,0.238807,0.0,0.452438,0.664838,0.496698,0.0
50%,0.576796,0.662253,0.50111,0.496608,0.497006,0.40527,0.476199,0.6821,0.483665,0.0,0.491482,0.665653,0.507202,0.0
75%,0.59653,0.696529,0.620469,0.49905,0.554346,0.435181,0.522115,0.752892,0.740185,0.0,0.526752,0.666024,0.515493,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.000007,1.0,1.0,0.0,1.0,1.0,1.0,0.0


In [0]:
df_labels.describe()

Unnamed: 0,"t+1_ ____Q,rad/s","t+1_ _beta,__deg","t+1_ _roll,__deg","t+1_ ____R,rad/s","t+1_ pitch,__deg","t+1_ ____P,rad/s","t+1_ __VVI,__fpm","t+1_ __alt,ftmsl","t+1_ Vtrue,_ktas","t+1_ rpm_1,_prop","t+1_ thro1,_part"
count,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0
mean,0.00465,0.029893,-1.185573,-0.002778,-0.487563,-3.9e-05,27.63575,2931.960333,116.225495,2435.272013,0.79608
std,0.007303,0.143593,9.746196,0.026371,1.707658,0.019295,268.590189,1320.635985,8.169939,69.824356,7.405197e-14
min,-0.03868,-0.68997,-23.85152,-0.05482,-9.31351,-0.10981,-1917.88818,891.35071,66.42478,2106.25781,0.79608
25%,4e-05,-0.04978,-4.991227,-0.01355,-1.04743,-0.00057,-14.60334,1891.84167,115.113407,2417.97101,0.79608
50%,0.00119,0.02182,-0.148225,-4e-05,-0.758395,0.0,0.292955,2917.68347,117.76656,2444.871215,0.79608
75%,0.01087,0.095687,2.13732,0.00628,-0.127003,0.00054,99.567245,3992.383425,119.523087,2469.171267,0.79608
max,0.04165,0.35355,25.92492,0.0554,11.79617,0.11131,978.53833,5080.88818,155.43665,2795.22119,0.79608


In [0]:
scaled_df_labels.describe()

Unnamed: 0,"t+1_ ____Q,rad/s","t+1_ _beta,__deg","t+1_ _roll,__deg","t+1_ ____R,rad/s","t+1_ pitch,__deg","t+1_ ____P,rad/s","t+1_ __VVI,__fpm","t+1_ __alt,ftmsl","t+1_ Vtrue,_ktas","t+1_ rpm_1,_prop","t+1_ thro1,_part"
count,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0
mean,0.539406,0.689881,0.455355,0.472162,0.4181,0.496115,0.671705,0.487073,0.559484,0.47755,0.0
std,0.090919,0.137612,0.195799,0.239259,0.080895,0.087314,0.092733,0.315222,0.091785,0.101347,0.0
min,0.0,0.0,0.0,0.0,0.0,-0.000634,0.0,0.0,0.0,0.0,0.0
25%,0.482012,0.613526,0.3789,0.374433,0.391578,0.49371,0.657122,0.238807,0.54699,0.452438,0.0
50%,0.496328,0.682144,0.476195,0.497006,0.40527,0.496289,0.662265,0.483665,0.576797,0.491482,0.0
75%,0.616831,0.752935,0.522111,0.554346,0.43518,0.498733,0.69654,0.740185,0.596531,0.526753,0.0
max,1.0,1.000058,1.0,1.0,1.0,1.0,1.000011,1.0,1.0,1.0,0.0


# train models

## define callbacks

In [0]:
def prepare_tensorboard_callback(model_name=''):
  logs_base_dir = "./tensorbaord_logs"
  os.makedirs(logs_base_dir, exist_ok=True)
  log_dir = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") + '_' + model_name
  logdir = os.path.join(logs_base_dir, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
  tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)
  return tensorboard_callback


In [0]:
def prepare_checkpoint_callback(model_name=datetime.datetime.now().strftime("%Y%m%d-%H%M%S")):
  checkpoint_base_dir = './checkpoints'
  os.makedirs(checkpoint_base_dir, exist_ok=True)

  filepath = os.path.join(checkpoint_base_dir, 'cp_'+model_name)

  cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=filepath,
                                                  save_weights_only=True,
                                                  verbose=1)
  return cp_callback

## baseline model



In [0]:
features_nr = train_X.shape[1]
target_nr = train_Y.shape[1]


baseline_model = keras.Sequential()
baseline_model.add(Dense(1200,input_shape=(features_nr,), 
                         kernel_initializer='he_uniform'))
baseline_model.add(BatchNormalization())
baseline_model.add(ReLU())

baseline_model.add(Dense(600,input_shape=(features_nr,)))
baseline_model.add(BatchNormalization())
baseline_model.add(ReLU())

baseline_model.add(Dense(600,input_shape=(features_nr,)))
baseline_model.add(BatchNormalization())
baseline_model.add(ReLU())

baseline_model.add(Dense(target_nr ))

baseline_model.compile(optimizer='adam',
                       loss='mean_squared_error',
                       metrics=[metrics.mean_squared_error, 
                                metrics.mean_absolute_error])

baseline_model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1200)              18000     
_________________________________________________________________
batch_normalization (BatchNo (None, 1200)              4800      
_________________________________________________________________
re_lu (ReLU)                 (None, 1200)              0         
_________________________________________________________________
dense_1 (Dense)              (None, 600)               720600    
_________________________________________________________________
batch_normalization_1 (Batch (None, 600)               2400      
_________________________________________________________________
re_lu_1 (ReLU)               (None, 600)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 600)               3

In [0]:
callbacks = [
            prepare_checkpoint_callback(model_name='baseline_longtraining'),
            prepare_tensorboard_callback()
            ]

baseline_history = baseline_model.fit(train_X,
                                      train_Y,
                                      epochs=100,
                                      batch_size=32,
                                      validation_data=(test_X, test_Y),
                                      verbose=2,
                                      callbacks=callbacks
                                      )


Train on 304059 samples, validate on 76015 samples
Epoch 1/100


In [0]:
baseline_model.save_weights('./checkpoints/baseline_longtraining_end')

## visualize

In [0]:
def save_plots_before_show(diagramm_name):
  diagramm_folder = './diagramms'

  if not os.path.exists(diagramm_folder):
      os.makedirs(diagramm_folder)

  plt.savefig(diagramm_folder + '/' + diagramm_name +'.pdf')
  plt.savefig(diagramm_folder + '/' + diagramm_name +'.svg')
  plt.savefig(diagramm_folder + '/' + diagramm_name +'.png')

def plot_history(histories, key='mean_squared_error', file_name=''):
  plt.figure(figsize=(16,10))

  for name, history in histories:
    val = plt.plot(history.epoch, history.history['val_'+key],
                   '--', label=name.title()+' Val')
    plt.plot(history.epoch, history.history[key], color=val[0].get_color(),
             label=name.title()+' Train')

  plt.xlabel('Epochs')
  plt.yscale("log")
  plt.ylabel(key.replace('_',' ').title())
  plt.legend()

  plt.xlim([0,max(history.epoch)])
  save_plots_before_show(file_name+'_'+key)
  plt.show()

In [0]:
baseline_model.load_weights('./checkpoints/baseline_longtraining_end')


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f5a3b9d3e48>

### Test & Training MSE

In [0]:
baseline_mse = baseline_model.evaluate(test_X, test_Y)[0]

print('baseline_mse '+  str(baseline_mse))


baseline_mse 2.179654475249698e-05


In [0]:
plot_history([('baseline_mse', baseline_history),],
              key='mean_absolute_error',
             file_name='scaled')



### MSE per target (scaled)

In [0]:
from sklearn.metrics import mean_squared_error

def mse(model):
  pred_Y = model.predict(test_X)
  mse = mean_squared_error(test_Y, pred_Y, multioutput='raw_values')
  
  mse_per_feature = pd.DataFrame(data=mse.reshape(1,-1),    # values
              index=['mse'],     # index
              columns=df_labels.columns)  #  column names
  return scaled_df_labels.describe().append(mse_per_feature)


In [0]:
mses_baseline = mse(baseline_model)
mses_baseline.head(20)

Unnamed: 0,"t+1_ ____Q,rad/s","t+1_ _beta,__deg","t+1_ _roll,__deg","t+1_ ____R,rad/s","t+1_ pitch,__deg","t+1_ ____P,rad/s","t+1_ __VVI,__fpm","t+1_ __alt,ftmsl","t+1_ Vtrue,_ktas","t+1_ rpm_1,_prop","t+1_ thro1,_part"
count,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0
mean,0.539406,0.689881,0.455355,0.472162,0.4181,0.496115,0.671705,0.487073,0.559484,0.47755,0.0
std,0.090919,0.137612,0.195799,0.239259,0.080895,0.087314,0.092733,0.315222,0.091785,0.101347,0.0
min,0.0,0.0,0.0,0.0,0.0,-0.000634,0.0,0.0,0.0,0.0,0.0
25%,0.482012,0.613526,0.3789,0.374433,0.391578,0.49371,0.657122,0.238807,0.54699,0.452438,0.0
50%,0.496328,0.682144,0.476195,0.497006,0.40527,0.496289,0.662265,0.483665,0.576797,0.491482,0.0
75%,0.616831,0.752935,0.522111,0.554346,0.43518,0.498733,0.69654,0.740185,0.596531,0.526753,0.0
max,1.0,1.000058,1.0,1.0,1.0,1.0,1.000011,1.0,1.0,1.0,0.0
mse,9e-06,9e-06,1e-05,2e-05,1.8e-05,1e-05,1.6e-05,9.8e-05,2.1e-05,2.7e-05,2.110213e-08


### MSE per target (unscaled)

In [0]:
def mse_denormalized(model):
  pred_Y = model.predict(test_X)
  unscaled_pred_Y = label_scaler.inverse_transform(pred_Y)
  unscaled_test_Y = label_scaler.inverse_transform(test_Y)
  mse = mean_squared_error(unscaled_test_Y, unscaled_pred_Y, multioutput='raw_values')


  mse_per_feature = pd.DataFrame(data=mse.reshape(1,-1),    # values
              index=['mse'],     # index
              columns=df_labels.columns)  #  column names
  return df_labels.describe().append(mse_per_feature)


In [0]:
mse_denormalized(baseline_model).head(20)


Unnamed: 0,"t+1_ ____Q,rad/s","t+1_ _beta,__deg","t+1_ _roll,__deg","t+1_ ____R,rad/s","t+1_ pitch,__deg","t+1_ ____P,rad/s","t+1_ __VVI,__fpm","t+1_ __alt,ftmsl","t+1_ Vtrue,_ktas","t+1_ rpm_1,_prop","t+1_ thro1,_part"
count,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0,380074.0
mean,0.004650467,0.029893,-1.185573,-0.002778307,-0.487563,-3.860788e-05,27.63575,2931.960333,116.225495,2435.272013,0.79608
std,0.007303494,0.143593,9.746196,0.02637116,1.707658,0.0192947,268.590189,1320.635985,8.169939,69.824356,7.405197e-14
min,-0.03868,-0.68997,-23.85152,-0.05482,-9.31351,-0.10981,-1917.88818,891.35071,66.42478,2106.25781,0.79608
25%,4e-05,-0.04978,-4.991227,-0.01355,-1.04743,-0.00057,-14.60334,1891.84167,115.113407,2417.97101,0.79608
50%,0.00119,0.02182,-0.148225,-4e-05,-0.758395,0.0,0.292955,2917.68347,117.76656,2444.871215,0.79608
75%,0.01087,0.095687,2.13732,0.00628,-0.127003,0.00054,99.567245,3992.383425,119.523087,2469.171267,0.79608
max,0.04165,0.35355,25.92492,0.0554,11.79617,0.11131,978.53833,5080.88818,155.43665,2795.22119,0.79608
mse,5.841163e-08,1e-05,0.025971,2.47808e-07,0.008039,4.961626e-07,134.503467,1728.647878,0.168549,12.60211,2.110213e-08
