# Task 2: Traffic Control
This is our notebook for the second task of the Tensor Tournament 2024.

In [1]:
import pandas as pd
data = pd.read_parquet('data/task2/data/train/5755_thermalling.parquet')
data.head()

Unnamed: 0,time (sec),accel_x,accel_y,accel_z,gyro_x,gyro_y,gyro_z
0,0.0,1.503444,3.121105,-9.181875,0.043571,-0.029413,-0.033408
1,0.01,1.512565,3.121883,-9.195143,0.05077,0.01003,-0.141596
2,0.02,1.486048,3.135666,-9.166369,0.086697,-0.06519,0.092678
3,0.03,1.502061,3.119995,-9.178302,-0.154684,0.022522,0.173542
4,0.04,1.51537,3.128424,-9.18235,0.158457,-0.15249,0.106326


In [93]:
import os
import numpy as np
from sklearn.preprocessing import OneHotEncoder

TRAIN_DATA_PATH = 'data/task2/data/train/'
TEST_DATA_PATH = 'data/task2/data/test/'

LABEL_MAPPING = {
    'takeoff': 0,
    'landing': 1,
    'circling': 2,
    'thermalling': 3,
    's-curve': 4,
    'looping': 5
}

def load_data_file(fname: str, train: bool=True):
    if train:
        fp = os.path.join(TRAIN_DATA_PATH, fname)
    else:
        fp = os.path.join(TEST_DATA_PATH, fname)
    data = pd.read_parquet(fp)
    print(data.columns)
    data = data.drop(columns=['time (sec)'])

    if not train:
        return data

    label = fname.split('_')[1].split('.')[0]

    return data, label

def get_data_array(train: bool=True):
    if train:
        path = TRAIN_DATA_PATH
    else:
        path = TEST_DATA_PATH
    
    files = os.listdir(path)
    datas, labels = [], []
    max_len = 0
    for fname in files:
        if train:
            data, label = load_data_file(fname, train)
        else:
            data = load_data_file(fname, train)
            label = None
        data = data.to_numpy()
        if data.shape[0] > max_len:
            max_len = data.shape[0]
        datas.append(data)
        labels.append(label)
    print('Loaded all files')

    datas_padded = []
    for i, d in enumerate(datas):
        datas_padded.append(np.pad(d, ((0, max_len - d.shape[0]), (0,0))))
    print('Padded')

    datas_padded = np.stack(datas_padded)
    for i in range(datas_padded.shape[2]):
        datas_padded[:, :, i] /= datas_padded[:, :, i].mean()
    print('Normalized')

    if not train:
        return datas_padded

    labels = [LABEL_MAPPING[x] for x in labels]
    labels = np.array(labels)
    encoder = OneHotEncoder()
    one_hot_labels = encoder.fit_transform(labels.reshape(-1, 1))
    one_hot_labels = one_hot_labels.toarray()

    return datas_padded, one_hot_labels, labels, encoder

In [94]:
from sklearn.model_selection import train_test_split
X_padded, Y, labels, encoder = get_data_array()
X_train, X_val, Y_train, Y_val = train_test_split(X_padded, Y, test_size=0.1)

Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z'],
      dtype='object')
Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z'],
      dtype='object')
Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z'],
      dtype='object')
Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z'],
      dtype='object')
Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z'],
      dtype='object')
Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z'],
      dtype='object')
Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z'],
      dtype='object')
Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z'],
      dtype='object')
Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z

KeyboardInterrupt: 

In [4]:
import matplotlib.pyplot as plt

def plot_history(history, metric, ylim: tuple=None):
    plt.figure(figsize=(12, 6))
    plt.plot(history.history[metric])
    plt.plot(history.history['val_' + metric])
    plt.title('Model ' + metric)
    plt.ylabel(metric)
    plt.xlabel('Epoch')

    if ylim:
        plt.ylim(ylim)

    plt.legend(['Train', 'Validation'], loc='upper right')
    plt.show()

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

model = Sequential()
model.add(LSTM(64, activation='tanh', input_shape=(X_padded.shape[1], X_padded.shape[2])))
model.add(Dense(32, activation='relu'))
model.add(Dense(6, activation='softmax'))

lr = 0.001
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=[tf.keras.metrics.Accuracy()]
    )

history = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=5, batch_size=64)
plot_history(history, metric='accuracy')

# Very cool Mean Predictor

In [11]:
labels

array([4, 3, 0, ..., 3, 1, 3])

In [13]:
from collections import Counter
counts = Counter(labels)
counts

Counter({0: 1193, 1: 1193, 2: 817, 4: 727, 3: 678, 5: 12})

In [20]:
counts = dict(counts)
probs = {x: counts[x]/len(labels) for x in counts.keys()}
probs

{4: 0.15735930735930737,
 3: 0.14675324675324675,
 0: 0.25822510822510825,
 2: 0.17683982683982685,
 1: 0.25822510822510825,
 5: 0.0025974025974025974}

In [41]:
values = list(probs.keys())
probas = list(probs.values())

def predict():
    return np.random.choice(values, p=probas)

In [47]:
for i in range(100):

    results = [predict() for _ in range(1156)]
    results = np.array(results)
    results = results.reshape((1156,))
    results = results.astype(np.int64)
    with open(f'./data/task2/task2_solution_{i}.npy', 'wb') as f:
        np.save(f, results)

In [46]:
with open('./data/task2/task2_solution_1.npy', 'wb') as f:
    np.save(f, results)

# 1D Convolution

In [88]:
from tensorflow import keras

model = keras.models.Sequential()

model.add(keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(keras.layers.MaxPooling1D())
model.add(keras.layers.Conv1D(filters=16, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(keras.layers.MaxPooling1D())
model.add(keras.layers.Flatten())
model.add(keras.layers.Dense(6, activation='softmax'))

lr = 0.001
model.compile(
    optimizer=tf.keras.optimizers.AdamW(learning_rate=lr),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=[keras.metrics.CategoricalAccuracy()]
    )

history = model.fit(X_train, Y_train, validation_data=(X_val, Y_val), epochs=12, batch_size=128)
plot_history(history, metric='categorical_accuracy')

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/12
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 328ms/step - categorical_accuracy: 0.5682 - loss: 290.8742 - val_categorical_accuracy: 0.7619 - val_loss: 18.8445
Epoch 2/12
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 295ms/step - categorical_accuracy: 0.7995 - loss: 13.4555 - val_categorical_accuracy: 0.7792 - val_loss: 5.7491
Epoch 3/12
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 270ms/step - categorical_accuracy: 0.8073 - loss: 4.2296 - val_categorical_accuracy: 0.8074 - val_loss: 3.0929
Epoch 4/12
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 270ms/step - categorical_accuracy: 0.8219 - loss: 2.5037 - val_categorical_accuracy: 0.7641 - val_loss: 3.9925
Epoch 5/12
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 267ms/step - categorical_accuracy: 0.8118 - loss: 1.9092 - val_categorical_accuracy: 0.7792 - val_loss: 2.6123
Epoch 6/12
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

KeyboardInterrupt: 

In [95]:
X_test = get_data_array(False)

Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z'],
      dtype='object')
Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z'],
      dtype='object')
Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z'],
      dtype='object')
Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z'],
      dtype='object')
Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z'],
      dtype='object')
Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z'],
      dtype='object')
Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z'],
      dtype='object')
Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z'],
      dtype='object')
Index(['time (sec)', 'accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y',
       'gyro_z

KeyboardInterrupt: 

In [64]:
X_train.shape

(4158, 15050, 6)

In [65]:
X_test.shape

(1156, 14768, 6)

In [84]:
X_test = np.pad(X_test, ((0,0), (0, X_train.shape[1] - X_test.shape[1]), (0,0)))

In [89]:
predicted = model.predict(X_test)
predicted = encoder.inverse_transform(predicted)

[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step


In [90]:
predicted = predicted.reshape((1156, ))
predicted = predicted.astype(np.int64)

In [92]:
np.save("./data/task2/predicted.npy", predicted)