# Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
!pip install dask[dataframe]

Collecting partd>=0.3.10; extra == "dataframe"
  Downloading https://files.pythonhosted.org/packages/44/e1/68dbe731c9c067655bff1eca5b7d40c20ca4b23fd5ec9f3d17e201a6f36b/partd-1.1.0-py3-none-any.whl
Collecting fsspec>=0.6.0; extra == "dataframe"
[?25l  Downloading https://files.pythonhosted.org/packages/91/0d/a6bfee0ddf47b254286b9bd574e6f50978c69897647ae15b14230711806e/fsspec-0.8.7-py3-none-any.whl (103kB)
[K     |████████████████████████████████| 112kB 10.1MB/s 
Collecting locket
  Downloading https://files.pythonhosted.org/packages/50/b8/e789e45b9b9c2db75e9d9e6ceb022c8d1d7e49b2c085ce8c05600f90a96b/locket-0.2.1-py2.py3-none-any.whl
Installing collected packages: locket, partd, fsspec
Successfully installed fsspec-0.8.7 locket-0.2.1 partd-1.1.0


In [None]:
import dask.dataframe as dd
import numpy as np
import pandas as pd
import time

file = '/content/drive/MyDrive/parsed_adsb_csvs_traks/*.csv'
cols = ['Icao','Alt', 'Lat','Long', 'PosTime', 'Type', 'Trak']
# read data frame from csv files
train_df = dd.read_csv(file, dtype = {'Alt': 'uint16', 'Lat': 'float32', 'Long': 'float32', 'PosTime': 'int64', 'Trak': 'float32'}, usecols = cols) 

train_df = train_df.compute()
print(train_df)

         Icao    Alt        Lat        Long        PosTime  Type        Trak
0      A82B72   3500  39.717903  -84.619019  1596672029381  C172  215.100006
1      A80E46  31000  61.336498 -140.995438  1596672028022  B748   85.400002
2      A4E2E5    700  42.145557  -72.719398  1596672029830  C140  193.600006
3      A4BDB9  15600  33.516727  -79.442421  1596672029378  E170  212.300003
4      A4C3CE    650  32.965118  -96.833878  1596672030885  CRUZ  339.399994
...       ...    ...        ...         ...            ...   ...         ...
21419  C821F8  17975 -41.125225  175.051468  1596758412576  AT76  203.699997
21420  C8234A  18500 -37.852840  174.806625  1596758411374  A320    0.000000
21421  ACC040  28000  42.272324  -87.846115  1596758411880  CRJ9  339.200012
21422  A7D222   6900  29.646700  -98.126343  1596757702853  BE36  165.000000
21423  345292  32000  51.857941    5.114062  1596758411877  B734  272.399994

[29300223 rows x 7 columns]


In [None]:
#Arrange in order of the Icao number
train_df = train_df.sort_values(by=['Icao', 'PosTime'])
train_df = train_df.reset_index(drop = True)
print(train_df)

            Icao   Alt        Lat       Long        PosTime  Type        Trak
0         001000  2175  48.106934  11.258926  1596721377840  SIRA   37.299999
1         001000  2175  48.106934  11.258926  1596721392353  SIRA   55.599998
2         001000  2175  48.106934  11.258926  1596721394847  SIRA   55.599998
3         001000  2175  48.109818  11.264557  1596721444191  SIRA  230.399994
4         001000  2175  48.105652  11.257416  1596721470866  SIRA  229.699997
...          ...   ...        ...        ...            ...   ...         ...
29300218  F00000   450  49.149822   2.394817  1596742711592  SKRA  232.600006
29300219  F00000   300  49.149822   2.394817  1596742711592  SKRA  232.600006
29300220  F00000   200  49.149822   2.394817  1596742711592  SKRA  232.600006
29300221  F00000   175  49.149822   2.394817  1596742711592  SKRA  232.600006
29300222  F00000   175  49.149822   2.394817  1596742711592  SKRA  232.600006

[29300223 rows x 7 columns]


In [None]:
max_lat = train_df['Lat'].max()
min_lat = train_df['Lat'].min()
max_lon = train_df['Long'].max()
min_lon = train_df['Long'].min()
max_alt = train_df['Alt'].max()
min_alt = train_df['Alt'].min()
max_trak = train_df['Trak'].max()
min_trak = train_df['Trak'].min()

#perform min- max normalization
train_df['Lat'] = (train_df['Lat']- min_lat) / (max_lat - min_lat)
train_df['Long'] = (train_df['Long']- min_lon) / (max_lon - min_lon)
train_df['Alt'] = (train_df['Alt']- min_alt) / (max_alt - min_alt)
train_df['Trak'] = (train_df['Trak']- min_trak) / (max_trak - min_trak)
print(train_df)

            Icao       Alt       Lat      Long        PosTime  Type      Trak
0         001000  0.033188  0.844904  0.531295  1596721377840  SIRA  0.103640
1         001000  0.033188  0.844904  0.531295  1596721392353  SIRA  0.154487
2         001000  0.033188  0.844904  0.531295  1596721394847  SIRA  0.154487
3         001000  0.033188  0.844915  0.531311  1596721444191  SIRA  0.640178
4         001000  0.033188  0.844899  0.531291  1596721470866  SIRA  0.638233
...          ...       ...       ...       ...            ...   ...       ...
29300218  F00000  0.006867  0.848767  0.506672  1596742711592  SKRA  0.646291
29300219  F00000  0.004578  0.848767  0.506672  1596742711592  SKRA  0.646291
29300220  F00000  0.003052  0.848767  0.506672  1596742711592  SKRA  0.646291
29300221  F00000  0.002670  0.848767  0.506672  1596742711592  SKRA  0.646291
29300222  F00000  0.002670  0.848767  0.506672  1596742711592  SKRA  0.646291

[29300223 rows x 7 columns]


In [None]:
#Get percentage of Vessel Types in each data frame
print(train_df['Type'].value_counts(normalize=True) * 100)

B738    12.154024
A320     8.080375
C172     5.738216
A321     3.910677
B737     3.376336
          ...    
G300     0.000038
D253     0.000017
WACN     0.000010
PTSS     0.000010
A337     0.000007
Name: Type, Length: 857, dtype: float64


In [None]:
Percentages = train_df['Type'].value_counts(normalize=True) * 100
print(Percentages.head(25))
print(Percentages[:10].sum())
print(list(Percentages[:25].index))

B738    12.154024
A320     8.080375
C172     5.738216
A321     3.910677
B737     3.376336
A319     2.887497
P28A     2.802849
A20N     2.347242
B763     1.749621
B739     1.553411
E75L     1.548084
B752     1.324068
B789     1.188991
B773     1.140319
CRJ9     1.029200
B77L     1.011409
C182     0.979177
PC12     0.964255
E190     0.878799
C208     0.808519
B744     0.801178
A333     0.789704
A21N     0.787666
BE20     0.765492
E170     0.755667
Name: Type, dtype: float64
44.60024758173343
['B738', 'A320', 'C172', 'A321', 'B737', 'A319', 'P28A', 'A20N', 'B763', 'B739', 'E75L', 'B752', 'B789', 'B773', 'CRJ9', 'B77L', 'C182', 'PC12', 'E190', 'C208', 'B744', 'A333', 'A21N', 'BE20', 'E170']


# Top 10 Types

In [None]:
#remove rows not in the top 16 types
train_df = train_df[train_df['Type'].isin(list(Percentages[:10].index))]
print(train_df)

            Icao       Alt       Lat      Long        PosTime  Type      Trak
994       008DC6  0.088502  0.570442  0.578876  1596723690985  P28A  0.295916
995       008DC6  0.089647  0.570432  0.578902  1596723703073  P28A  0.295916
996       008DC6  0.090028  0.570430  0.578911  1596723717541  P28A  0.295916
997       008DC6  0.090028  0.570430  0.578911  1596723719963  P28A  0.295916
998       008DC6  0.152590  0.571812  0.578309  1596724548247  P28A  0.921089
...          ...       ...       ...       ...            ...   ...       ...
29300186  E94C42  0.122454  0.602148  0.316223  1596716959268  B738  0.861350
29300187  E94C42  0.280766  0.601963  0.315146  1596733932455  B738  0.722423
29300188  E94C42  0.281147  0.601961  0.315125  1596733935427  B738  0.722423
29300189  E94C42  0.281147  0.601961  0.315125  1596733935427  B738  0.722423
29300190  E94C42  0.286107  0.601955  0.314975  1596733965728  B738  0.722423

[13067972 rows x 7 columns]


In [None]:
type_dict = {k: v for v, k in enumerate(list(Percentages[:10].index))}
print(type_dict)

{'B738': 0, 'A320': 1, 'C172': 2, 'A321': 3, 'B737': 4, 'A319': 5, 'P28A': 6, 'A20N': 7, 'B763': 8, 'B739': 9}


In [None]:
train_df['Type'].replace(type_dict, inplace = True)

In [None]:
train_df = train_df.reset_index(drop = True)
print(train_df)

            Icao       Alt       Lat      Long        PosTime  Type      Trak
0         008DC6  0.088502  0.570442  0.578876  1596723690985     6  0.295916
1         008DC6  0.089647  0.570432  0.578902  1596723703073     6  0.295916
2         008DC6  0.090028  0.570430  0.578911  1596723717541     6  0.295916
3         008DC6  0.090028  0.570430  0.578911  1596723719963     6  0.295916
4         008DC6  0.152590  0.571812  0.578309  1596724548247     6  0.921089
...          ...       ...       ...       ...            ...   ...       ...
13067967  E94C42  0.122454  0.602148  0.316223  1596716959268     0  0.861350
13067968  E94C42  0.280766  0.601963  0.315146  1596733932455     0  0.722423
13067969  E94C42  0.281147  0.601961  0.315125  1596733935427     0  0.722423
13067970  E94C42  0.281147  0.601961  0.315125  1596733935427     0  0.722423
13067971  E94C42  0.286107  0.601955  0.314975  1596733965728     0  0.722423

[13067972 rows x 7 columns]


In [None]:
#turn train dataframe into a multi-dimensional numpy array
train_df = np.array(list(train_df.groupby('Icao').apply(pd.DataFrame.to_numpy)))

print(train_df.shape)
train_count = train_df.shape[0]
print(train_count)

(16014,)
16014


  


In [None]:
#load in first dataframe
train_input = pd.DataFrame(data = train_df[1], columns = ["Icao", "Alt","Lat", "Long","PosTime", "Type", "Trak"], index = None)
train_input['Time'] = pd.to_datetime(train_input['PosTime'],unit='ms')
train_input = train_input.set_index('Time')
train_input = train_input.drop('PosTime', axis = 1)
train_input = train_input.drop('Icao', axis = 1)
print(train_input)
#Get Species Type
unique_species = train_input.Type[0]
print(unique_species)

                                Alt       Lat      Long Type      Trak
Time                                                                  
2020-08-06 07:12:53.616    0.995056  0.540828  0.551712    0  0.966657
2020-08-06 07:12:53.616    0.995056  0.540828  0.551712    0  0.966657
2020-08-06 07:13:29.760    0.995056  0.540866  0.551703    0    0.9611
2020-08-06 07:13:44.242    0.998108  0.540902  0.551694    0  0.958322
2020-08-06 07:13:56.289  0.00572213  0.540932  0.551686    0  0.951653
...                             ...       ...       ...  ...       ...
2020-08-06 16:02:45.713    0.999252   0.54098  0.551676    0  0.457905
2020-08-06 16:02:57.784    0.996963  0.540951  0.551683    0  0.457905
2020-08-06 16:03:55.777    0.994675  0.540866  0.551703    0  0.460684
2020-08-06 16:04:10.278    0.994675  0.540864  0.551703    0  0.609614
2020-08-06 16:04:27.227    0.994675  0.540863  0.551702    0  0.734649

[373 rows x 5 columns]
0


In [None]:
#Resampling/Interpolating
norm_train_df = pd.DataFrame()
norm_train_df['Lat'] = train_input.Lat.resample('5T').last()
norm_train_df['Long'] = train_input.Long.resample('5T').last()
norm_train_df['Alt'] = train_input.Alt.resample('5T').last()
norm_train_df['Trak'] = train_input.Trak.resample('5T').last()
norm_train_df['Lat'] = pd.to_numeric(norm_train_df['Lat'], errors='coerce')
norm_train_df['Long'] = pd.to_numeric(norm_train_df['Long'], errors='coerce')
norm_train_df['Alt'] = pd.to_numeric(norm_train_df['Alt'], errors='coerce')
norm_train_df['Trak'] = pd.to_numeric(norm_train_df['Trak'], errors='coerce')
norm_train_df = norm_train_df.interpolate(method='spline', order=3, s=0.)
norm_train_df.reset_index(inplace = True)
norm_train_df['Alt'] = norm_train_df['Alt'].clip(0)
norm_train_df = norm_train_df.iloc[0:73]
print(norm_train_df)

                  Time       Lat      Long       Alt      Trak
0  2020-08-06 07:10:00  0.541109  0.551641  0.026703  0.973882
1  2020-08-06 07:15:00  0.542472  0.552251  0.217823  0.093081
2  2020-08-06 07:20:00  0.544178  0.553254  0.402075  0.092526
3  2020-08-06 07:25:00  0.546072  0.554713  0.501640  0.135871
4  2020-08-06 07:30:00  0.547895  0.556523  0.586328  0.133093
..                 ...       ...       ...       ...       ...
68 2020-08-06 12:50:00  0.575555  0.585123  0.000000  1.864393
69 2020-08-06 12:55:00  0.575441  0.584925  0.000000  1.838877
70 2020-08-06 13:00:00  0.575300  0.584686  0.000000  1.807243
71 2020-08-06 13:05:00  0.575129  0.584407  0.000000  1.769276
72 2020-08-06 13:10:00  0.574928  0.584084  0.000000  1.724758

[73 rows x 5 columns]


In [None]:
#add species to label list
train_labels = []
train_labels.append(unique_species)
print(train_labels)
#convert dataframe to numpy
norm_train_df = norm_train_df.drop('Time', axis = 1)
norm_train_df = norm_train_df.to_numpy()
print(norm_train_df)
final_input_train = norm_train_df
print(final_input_train.shape)
final_input_train = np.reshape(final_input_train, (1,73,4))
print(final_input_train.shape)

[0]
[[0.54110926 0.55164051 0.02670329 0.97388166]
 [0.54247236 0.55225104 0.21782254 0.09308141]
 [0.54417837 0.55325353 0.40207523 0.0925257 ]
 [0.54607177 0.55471271 0.50164034 0.13587108]
 [0.54789543 0.5565232  0.58632792 0.13309254]
 [0.54969102 0.55826527 0.59510185 0.13225895]
 [0.55004507 0.55860418 0.59510185 0.13225895]
 [0.55061809 0.55913722 0.60817557 0.12630752]
 [0.55184827 0.56027639 0.62872997 0.11537143]
 [0.55359598 0.56188938 0.65039623 0.10220479]
 [0.55572157 0.5638439  0.66680553 0.08956167]
 [0.55808541 0.56600764 0.67158908 0.08019615]
 [0.56054785 0.56824831 0.65837804 0.07686233]
 [0.56296924 0.5704336  0.62080362 0.08231428]
 [0.56520995 0.57243121 0.55249699 0.09930609]
 [0.56713033 0.57410884 0.44708934 0.13059184]
 [0.56859511 0.57544625 0.30861372 0.15837733]
 [0.56961566 0.57667202 0.17662318 0.13059184]
 [0.57040614 0.5773856  0.08812085 0.12725757]
 [0.57044345 0.57741934 0.08392462 0.13281468]
 [0.57031141 0.57733422 0.09457798 0.14164659]
 [0.57020

In [None]:
for j in range(2,16046):
    try:
        train_input = pd.DataFrame(data = train_df[j], columns = ["Icao","Alt","Lat", "Long","PosTime", "Type", "Trak"], index = None)
        train_input['Time'] = pd.to_datetime(train_input['PosTime'],unit='ms')
        train_input = train_input.set_index('Time')
        train_input = train_input.drop('PosTime', axis = 1)
        unique_species = train_input.Type[0]
        norm_train_df = pd.DataFrame()
        norm_train_df['Lat'] = train_input.Lat.resample('5T').last()
        norm_train_df['Long'] = train_input.Long.resample('5T').last()
        norm_train_df['Alt'] = train_input.Alt.resample('5T').last()
        norm_train_df['Trak'] = train_input.Trak.resample('5T').last()
        norm_train_df['Lat'] = pd.to_numeric(norm_train_df['Lat'], errors='coerce')
        norm_train_df['Long'] = pd.to_numeric(norm_train_df['Long'], errors='coerce')
        norm_train_df['Alt'] = pd.to_numeric(norm_train_df['Alt'], errors='coerce')
        norm_train_df['Trak'] = pd.to_numeric(norm_train_df['Trak'], errors='coerce')
        norm_train_df = norm_train_df.interpolate(method='spline', order=3, s=0.)
        norm_train_df.reset_index(inplace = True)
        norm_train_df['Alt'] = norm_train_df['Alt'].clip(0)
        norm_train_df = norm_train_df.iloc[0:73]
        norm_train_df = norm_train_df.drop('Time', axis = 1)
        norm_train_df = norm_train_df.to_numpy()
        norm_train_df = np.reshape(norm_train_df, (1,73,4))
        final_input_train = np.append(final_input_train, norm_train_df, axis = 0)
        train_labels.append(unique_species)
    except:
        pass
        
print(final_input_train.shape)
print(len(train_labels))

(10355, 73, 4)
10355


In [None]:
final_input_test = final_input_train[7766:]
arr = list(range(7766,final_input_train.shape[0] ))
print(final_input_test.shape)

final_input_train = np.delete(final_input_train, arr, 0)
print(final_input_train.shape)

test_labels = train_labels[7766:]
print(len(test_labels))

train_labels_final = train_labels[:7766]
print(len(train_labels_final))

unique = list(dict.fromkeys(test_labels))
unique2 = list(dict.fromkeys(train_labels_final))
print(unique)
print(unique2)

from keras.utils.np_utils import to_categorical
test_labels = to_categorical(test_labels,num_classes = 10)
train_labels_final = to_categorical(train_labels_final,num_classes = 10)
print(len(test_labels))
print(len(train_labels_final))

train_labels_final = np.array(train_labels_final)
test_labels = np.array(test_labels)
print(train_labels_final)

(2589, 73, 4)
(7766, 73, 4)
2589
7766
[2, 6, 0, 1, 5, 3, 9, 8, 7, 4]
[0, 1, 5, 7, 4, 3, 6, 2, 8, 9]
2589
7766
[[1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [None]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import Dropout
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
# fit and evaluate a model
def evaluate_model(final_input_train, train_labels_final, final_input_test, test_labels):
    verbose, epochs, batch_size = 2, 100, 16
    n_timesteps, n_features, n_outputs = final_input_train.shape[1], final_input_train.shape[2], 10
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps,n_features)))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
    model.add(Dropout(0.5))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(100, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    # fit network
    model.fit(final_input_train, train_labels_final, epochs=epochs, batch_size=batch_size, verbose=verbose, shuffle = True)
    # evaluate model
    _, accuracy = model.evaluate(final_input_test, test_labels, batch_size=batch_size, verbose=0)
    return accuracy


# summarize scores
def summarize_results(scores):
    print(scores)
    m, s = np.mean(scores), np.std(scores)
    print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

# run an experiment
def run_experiment(repeats=2):
    # load data
    # repeat experiment
    scores = list()
    for r in range(repeats):
        score = evaluate_model(final_input_train, train_labels_final, final_input_test, test_labels)
        score = score * 100.0
        print('>#%d: %.3f' % (r+1, score))
        scores.append(score)
    # summarize results
    summarize_results(scores)

run_experiment()

Epoch 1/100
486/486 - 6s - loss: 2.1688 - accuracy: 0.3647
Epoch 2/100
486/486 - 5s - loss: 1.6422 - accuracy: 0.3976
Epoch 3/100
486/486 - 5s - loss: 1.5938 - accuracy: 0.4068
Epoch 4/100
486/486 - 5s - loss: 1.5794 - accuracy: 0.4190
Epoch 5/100
486/486 - 5s - loss: 1.5053 - accuracy: 0.4257
Epoch 6/100
486/486 - 5s - loss: 1.4852 - accuracy: 0.4318
Epoch 7/100
486/486 - 5s - loss: 1.4676 - accuracy: 0.4346
Epoch 8/100
486/486 - 5s - loss: 1.4562 - accuracy: 0.4417
Epoch 9/100
486/486 - 5s - loss: 1.4442 - accuracy: 0.4473
Epoch 10/100
486/486 - 5s - loss: 1.4186 - accuracy: 0.4548
Epoch 11/100
486/486 - 5s - loss: 1.3997 - accuracy: 0.4575
Epoch 12/100
486/486 - 5s - loss: 1.3766 - accuracy: 0.4661
Epoch 13/100
486/486 - 5s - loss: 1.3616 - accuracy: 0.4683
Epoch 14/100
486/486 - 5s - loss: 1.3412 - accuracy: 0.4811
Epoch 15/100
486/486 - 5s - loss: 1.3152 - accuracy: 0.4911
Epoch 16/100
486/486 - 5s - loss: 1.2909 - accuracy: 0.4990
Epoch 17/100
486/486 - 5s - loss: 1.2606 - accura