In [1]:
import os

import pandas as pd
from sklearn.preprocessing import StandardScaler

from LSTM.lstm_autoencoder import LSTM_AE
from LSTM.lstm_autoencoders_utils import train_lstm_autoencoder
from NN.NeuralNetwork import NeuralNetwork
from NN.nn_utils import get_train_data, train_nn_model
from models_utils.Datasets import DataframeWithLabels, pad_sequence
from models_utils.GLOBALS import *
from models_utils.utils import convert_to_features

In [2]:
# get train data
train_data = pd.read_csv('csv/train.csv')
data_type_1 = train_data[train_data['sensor'] == 'vicon']['id'].tolist()
data_type_2 = train_data[train_data['sensor'] == 'smartwatch']['id'].tolist()

In [3]:
# sizes of padding/cutting
embedding_size = 128
target_size_type1 = 4000
target_size_type2 = 1350

In [4]:
# train or load models
train_or_load_autoencoders = 'train'
if train_or_load_autoencoders == 'train':
    Type1LSTMAutoencoder = train_lstm_autoencoder(data_type_1, '1', target_size_type1, embedding_size, 0.000004, 64, 15)
    Type2LSTMAutoencoder = train_lstm_autoencoder(data_type_2, '2', target_size_type2, embedding_size, 0.000004, 64, 15)
elif train_or_load_autoencoders == 'load':
    Type1LSTMAutoencoder = LSTM_AE(target_size_type1, 3, embedding_size).to(device)
    Type1LSTMAutoencoder.load_state_dict(torch.load('Type1LSTMAutoencoder.pth'))
    Type2LSTMAutoencoder = LSTM_AE(target_size_type2, 3, embedding_size).to(device)
    Type2LSTMAutoencoder.load_state_dict(torch.load('Type2LSTMAutoencoder.pth'))
else:
    raise ValueError('Wrong train or load')

Batch: 20/176, Train Loss: 0.1344
Batch: 40/176, Train Loss: 0.1259
Batch: 60/176, Train Loss: 0.1319
Batch: 80/176, Train Loss: 0.1295
Batch: 100/176, Train Loss: 0.1272
Batch: 120/176, Train Loss: 0.1209
Batch: 140/176, Train Loss: 0.1208
Batch: 160/176, Train Loss: 0.1166
Batch: 176/176, Train Loss: 0.1189
Epoch [1/15], Average Training Loss: 0.1262, , Average Validation Loss: 0.1179
Batch: 20/176, Train Loss: 0.1188
Batch: 40/176, Train Loss: 0.1133
Batch: 60/176, Train Loss: 0.1111
Batch: 80/176, Train Loss: 0.1064
Batch: 100/176, Train Loss: 0.1083
Batch: 120/176, Train Loss: 0.1062
Batch: 140/176, Train Loss: 0.1048
Batch: 160/176, Train Loss: 0.0966
Batch: 176/176, Train Loss: 0.1006
Epoch [2/15], Average Training Loss: 0.1080, , Average Validation Loss: 0.0986
Batch: 20/176, Train Loss: 0.0987
Batch: 40/176, Train Loss: 0.0933
Batch: 60/176, Train Loss: 0.0906
Batch: 80/176, Train Loss: 0.0855
Batch: 100/176, Train Loss: 0.0864
Batch: 120/176, Train Loss: 0.0837
Batch: 140/176

In [5]:
# calculate or load train_data
calculate_or_load_train_data = 'load'
if calculate_or_load_train_data == 'calculate':
    data_type_1, data_type_2 = get_train_data(Type1LSTMAutoencoder, Type2LSTMAutoencoder, embedding_size)
elif calculate_or_load_train_data == 'load':
    data_type_1 = pd.read_csv('train_data_type1.csv')
    data_type_2 = pd.read_csv('train_data_type2.csv')

NEURAL NETWORK

In [6]:
embedding_names = [f'embedding_feature_{i + 1}' for i in range(embedding_size)]

In [7]:
# normalize data
features_scaler_1 = StandardScaler()
data_type_1_normalized = data_type_1
columns_to_scale = data_type_1_normalized.columns.tolist()[embedding_size + 1:-1]
data_type_1_normalized[columns_to_scale] = features_scaler_1.fit_transform(data_type_1_normalized[columns_to_scale])
data_type_1_normalized['activity'] = data_type_1_normalized['activity'].map(activity_id_mapping)
type1_dataset = DataframeWithLabels(data_type_1_normalized)

features_scaler_2 = StandardScaler()
data_type_2_normalized = data_type_2
columns_to_scale = data_type_2_normalized.columns.tolist()[embedding_size + 1:-1]
data_type_2_normalized[columns_to_scale] = features_scaler_2.fit_transform(data_type_2_normalized[columns_to_scale])
data_type_2_normalized['activity'] = data_type_2_normalized['activity'].map(activity_id_mapping)
type2_dataset = DataframeWithLabels(data_type_2_normalized)

In [10]:
# train or load nn
train_or_load_nn_model_type_1 = 'train'
train_or_load_nn_model_type_2 = 'load'

if train_or_load_nn_model_type_1 == 'train':
    model_type_1 = train_nn_model(type1_dataset, '1', 174, [75], 18,
                                  batch_size=64, learning_rate=0.0005, num_epochs=100, scheduler_factor=0.9,
                                  scheduler_patience=4)
elif train_or_load_nn_model_type_1 == 'load':
    model_type_1 = NeuralNetwork(174, [75], 18).to(device)
    model_type_1.load_state_dict(torch.load('Type1NNModel.pth'))
else:
    raise ValueError('Wrong train or load')

if train_or_load_nn_model_type_2 == 'train':
    model_type_2 = train_nn_model(type2_dataset, '2', 174, [75], 18,
                                  batch_size=64, learning_rate=0.0005, num_epochs=100, scheduler_factor=0.9,
                                  scheduler_patience=4)
elif train_or_load_nn_model_type_2 == 'load':
    model_type_2 = NeuralNetwork(174, [75], 18).to(device)
    model_type_2.load_state_dict(torch.load('Type2NNModel.pth'))
else:
    raise ValueError('Wrong train or load')

--------------
Epoch [1/100], Training Loss: 2.8184, Training Accuracy: 17.77%
Epoch [1/100], Validation Loss: 2.7621, Validation Accuracy: 22.61%
--------------
Epoch [2/100], Training Loss: 2.7382, Training Accuracy: 26.15%
Epoch [2/100], Validation Loss: 2.7150, Validation Accuracy: 29.40%
--------------
Epoch [3/100], Training Loss: 2.6948, Training Accuracy: 31.01%
Epoch [3/100], Validation Loss: 2.6806, Validation Accuracy: 31.75%
--------------
Epoch [4/100], Training Loss: 2.6678, Training Accuracy: 32.65%
Epoch [4/100], Validation Loss: 2.6617, Validation Accuracy: 33.20%
--------------
Epoch [5/100], Training Loss: 2.6467, Training Accuracy: 36.36%
Epoch [5/100], Validation Loss: 2.6398, Validation Accuracy: 36.83%
--------------
Epoch [6/100], Training Loss: 2.6263, Training Accuracy: 38.66%
Epoch [6/100], Validation Loss: 2.6203, Validation Accuracy: 40.10%
--------------
Epoch [7/100], Training Loss: 2.6086, Training Accuracy: 41.39%
Epoch [7/100], Validation Loss: 2.6046,

In [20]:
# save results
results_list = []
for i, file_id in enumerate(pd.read_csv('sample_submission.csv')['sample_id'].to_list()):

    class_path = os.path.join(files_directory, f"{file_id}.csv")
    new_data = pd.read_csv(class_path)

    if new_data.shape[1] == 3:

        data_x_tensor = torch.tensor(new_data["x [m]"].values, dtype=torch.float32)
        data_y_tensor = torch.tensor(new_data["y [m]"].values, dtype=torch.float32)
        data_z_tensor = torch.tensor(new_data["z [m]"].values, dtype=torch.float32)
        new_features = convert_to_features(data_x_tensor, data_y_tensor, data_z_tensor)

        if len(new_data) < 4000:
            new_data = pad_sequence(new_data, 4000)

        new_data = torch.tensor(new_data.values, dtype=torch.float32).to(device)
        new_data = new_data.view(1, new_data.shape[0], new_data.shape[1])
        normalized_new_data = (new_data - min_values_type1) / (max_values_type1 - min_values_type1 + 1e-6)
        new_data_encoded = Type1LSTMAutoencoder.encode(normalized_new_data)
        encoded_features = new_data_encoded.squeeze().detach().cpu().numpy()

        res = pd.DataFrame([encoded_features], columns=embedding_names)
        for col, value in new_features.items():
            res[col] = value

        columns_to_scale = res.columns.tolist()[embedding_size + 1:]
        res[columns_to_scale] = features_scaler_1.transform(res[columns_to_scale])
        x = torch.tensor(res.values, dtype=torch.float32).to(device)
        predictions = model_type_1(x)

    else:
        new_data = new_data[new_data.iloc[:, 0] == 'acceleration [m/s/s]'].iloc[:, 1:]

        data_x_tensor = torch.tensor(new_data["x"].values, dtype=torch.float32)
        data_y_tensor = torch.tensor(new_data["y"].values, dtype=torch.float32)
        data_z_tensor = torch.tensor(new_data["z"].values, dtype=torch.float32)
        new_features = convert_to_features(data_x_tensor, data_y_tensor, data_z_tensor)

        if len(new_data) < 1350:
            new_data = pad_sequence(new_data, 1350)

        new_data = torch.tensor(new_data.values, dtype=torch.float32).to(device)
        new_data = new_data.view(1, new_data.shape[0], new_data.shape[1])
        normalized_new_data = (new_data - min_values_type2) / (max_values_type2 - min_values_type2 + 1e-6)
        new_data_encoded = Type2LSTMAutoencoder.encode(new_data)
        encoded_features = new_data_encoded.squeeze().detach().cpu().numpy()

        res = pd.DataFrame([encoded_features], columns=embedding_names)
        for col, value in new_features.items():
            res[col] = value

        columns_to_scale = res.columns.tolist()[embedding_size + 1:]
        res[columns_to_scale] = features_scaler_2.transform(res[columns_to_scale])
        x = torch.tensor(res.values, dtype=torch.float32).to(device).to(device)
        predictions = model_type_2(x)

    res_dict = {activity: predictions.squeeze()[id].item() for id, activity in id_activity_mapping.items()}

    result_dict = {label: res_dict.get(label, 0) for label in activity_id_mapping.keys()}
    result_dict['sample_id'] = file_id
    results_list.append(result_dict)
results = pd.DataFrame(results_list, columns=['sample_id'] + list(activity_id_mapping.keys()))

In [39]:
results.fillna(0).to_csv('results_nn2.csv', index=False)