# About this Notebook

The purpose of this notebook is to study a convolutional network solution. The MLP performs well when the validation/test is on the same flight as the training, however it does not generalize well to other flights. We will therefore try to use a new architecture able to detect more complex pattern in the data : a convolutional network. See previous notebook for more details

Multivariate Multiple input time series data

# Table of Contents

# Import packages

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import torch
from torch.utils.data import Dataset, DataLoader

# 1 - What is a Convolutional Neural Network

# 2 - Import of Data

In [4]:
df2 = pd.read_hdf('../data/interim/dataset.h5', key=f'Flt1002')
df3 = pd.read_hdf('../data/interim/dataset.h5', key=f'Flt1003')
df4 = pd.read_hdf('../data/interim/dataset.h5', key=f'Flt1004')

In [7]:
df2.head()

Unnamed: 0_level_0,TL_comp_mag3_cl,TL_comp_mag5_cl,V_BAT1,V_BAT2,TOPO,INS_VEL_N,INS_VEL_W,INS_VEL_V,BARO,CUR_IHTR,PITCH,ROLL,AZIMUTH,LINE,IGRFMAG1
Time [s],Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
45100.0,-1026.777805,-44.982774,25.827,2.015,71.04,-36.54755,14.52841,3.47413,109.73,1.734,9.19,0.19,204.01,1002.01,-297.343
45100.1,-1023.030351,-40.600326,25.826,2.014,71.06,-36.57182,14.492,3.42597,110.14,1.759,9.08,-0.03,203.95,1002.01,-296.223
45100.2,-1021.28623,-34.817623,25.824,2.013,71.08,-36.60039,14.4537,3.37824,110.55,1.783,8.96,-0.22,203.91,1002.01,-295.079
45100.3,-1023.965085,-29.347438,25.82,2.01,71.07,-36.63182,14.41314,3.33233,110.95,1.796,8.85,-0.39,203.9,1002.01,-293.939
45100.4,-1030.701663,-25.421394,25.815,2.007,71.04,-36.66759,14.3715,3.28104,111.35,1.788,8.73,-0.55,203.91,1002.01,-292.821


# 3 - Normalization

In [8]:
df2.describe()

scaling_range = [-1,1]
MinMaxScaler_2 = MinMaxScaler(scaling_range)
MinMaxScaler_3 = MinMaxScaler(scaling_range)
MinMaxScaler_4 = MinMaxScaler(scaling_range)


df2_scaled = pd.DataFrame()
df3_scaled = pd.DataFrame()
df4_scaled = pd.DataFrame()


df2_scaled[df2.drop(columns=['LINE','IGRFMAG1']).columns] = MinMaxScaler_2.fit_transform(df2.drop(columns=['LINE','IGRFMAG1']))
df3_scaled[df3.drop(columns=['LINE','IGRFMAG1']).columns] = MinMaxScaler_3.fit_transform(df3.drop(columns=['LINE','IGRFMAG1']))
df4_scaled[df4.drop(columns=['LINE','IGRFMAG1']).columns] = MinMaxScaler_4.fit_transform(df4.drop(columns=['LINE','IGRFMAG1']))

df2_scaled.index = df2.index
df3_scaled.index = df3.index
df4_scaled.index = df4.index

df2_scaled[['LINE','IGRFMAG1']] = df2[['LINE','IGRFMAG1']]
df3_scaled[['LINE','IGRFMAG1']] = df3[['LINE','IGRFMAG1']]
df4_scaled[['LINE','IGRFMAG1']] = df4[['LINE','IGRFMAG1']]

df2_scaled.describe()

Unnamed: 0,TL_comp_mag3_cl,TL_comp_mag5_cl,V_BAT1,V_BAT2,TOPO,INS_VEL_N,INS_VEL_W,INS_VEL_V,BARO,CUR_IHTR,PITCH,ROLL,AZIMUTH,LINE,IGRFMAG1
count,207578.0,207578.0,207578.0,207578.0,173001.0,207578.0,207578.0,207578.0,207578.0,207578.0,207578.0,207578.0,207578.0,207578.0,207578.0
mean,0.120479,-0.499555,-0.643363,-0.078342,-0.457554,-0.039156,0.006873,-0.002206,-0.464807,-0.026525,0.202881,-0.09027,0.105508,1152.355312,15.822918
std,0.060556,0.148419,0.223182,0.484986,0.312241,0.592635,0.612694,0.215485,0.668085,0.352792,0.232093,0.257489,0.537493,603.63303,263.641503
min,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,158.0,-868.652
25%,0.092208,-0.566069,-0.749049,-0.471795,-0.640209,-0.641538,-0.56672,-0.096123,-0.881249,-0.260143,0.078834,-0.167997,-0.246764,1002.03,-106.78725
50%,0.119767,-0.498298,-0.673004,-0.298462,-0.515681,-0.005474,0.038345,0.002215,-0.843016,-0.072517,0.188985,-0.101912,0.016722,1002.15,24.2195
75%,0.14914,-0.444462,-0.596958,0.36,-0.35408,0.537026,0.640141,0.10443,-0.3154,0.166514,0.307775,-0.032857,0.608167,1002.2,120.8685
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3086.0,2699.331


# 4 - Input Sequence

In [57]:
class MagNavDataset(Dataset):
    def __init__(self, df, seq_length):
        
        data = torch.tensor(df.to_numpy(),dtype=torch.float32)
        
        # Remove excessive data if needed
        if (len(data)%seq_legnth) != 0:
            self.data = data[:-(len(data)%seq_length)]
        else:
            self.data = data

        self.seq_length = seq_length
        
    def __getitem__(self, index):
        X = self.data[index:(index+self.seq_length),0:-2]
        y = self.data[index+self.seq_length-1,-1]
        return X, y
    
    def __len__(self):
        return len(self.data)//seq_length

In [55]:
data = torch.tensor(df2_scaled.to_numpy(),dtype=torch.float32)
seq = 3
print(data[:-2])
data[seq-1,-1]
print(len(data))
data = data[:-(len(data)%seq)]
print(len(data))

tensor([[ 5.1561e-02, -6.6923e-01,  9.9240e-01,  ...,  1.3339e-01,
          1.0020e+03, -2.9734e+02],
        [ 5.2283e-02, -6.6673e-01,  9.8479e-01,  ...,  1.3306e-01,
          1.0020e+03, -2.9622e+02],
        [ 5.2620e-02, -6.6343e-01,  9.6958e-01,  ...,  1.3283e-01,
          1.0020e+03, -2.9508e+02],
        ...,
        [ 1.5849e-01, -3.0495e-01, -9.6198e-01,  ...,  1.2967e-01,
          1.0022e+03,  3.2848e+02],
        [ 1.5755e-01, -3.0508e-01, -1.0000e+00,  ...,  1.2844e-01,
          1.0022e+03,  3.3193e+02],
        [ 1.5742e-01, -3.0493e-01, -9.7719e-01,  ...,  1.2728e-01,
          1.0022e+03,  3.3530e+02]])
207578
207576
