In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tqdm import tqdm
import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

In [2]:
class DeepConvLSTM(nn.Module):
    def __init__(self, num_channels, num_classes):
        super().__init__()
        self.conv1 = nn.Conv1d(num_channels, 64, kernel_size=5, padding=2)
        self.conv2 = nn.Conv1d(64, 128, kernel_size=5, padding=2)
        self.conv3 = nn.Conv1d(128, 128, kernel_size=5, padding=2)
        self.relu = nn.ReLU()
        self.pool = nn.MaxPool1d(2)
        self.lstm = nn.LSTM(128, 128, num_layers=2, batch_first=True)
        self.fc = nn.Linear(128, num_classes)
    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = self.relu(self.conv1(x)); x = self.pool(x)
        x = self.relu(self.conv2(x)); x = self.pool(x)
        x = self.relu(self.conv3(x)); x = self.pool(x)
        x = x.permute(0, 2, 1)
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :])

In [3]:
# 1. Load the test data
test_data_df = pd.read_csv("./data/test.csv")
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
batch_size = 64
# 2. Process the pre-windowed data
# Convert string representation of lists to actual numpy arrays
test_data_df["x_axis"] = test_data_df["x_axis"].apply(lambda row: np.array(eval(row), dtype=np.float32))
test_data_df["y_axis"] = test_data_df["y_axis"].apply(lambda row: np.array(eval(row), dtype=np.float32))
test_data_df["z_axis"] = test_data_df["z_axis"].apply(lambda row: np.array(eval(row), dtype=np.float32))
result_df_list = []
for i, entry in enumerate(["right_arm", "left_arm", "left_leg", "right_leg"]):
    # Stack the axes to create a single numpy array for X_test
    # The shape will be (num_samples, sequence_length, num_channels) -> (n, 50, 3)
    test_df = test_data_df[test_data_df["sensor_location"] == entry]
    model = torch.load(f"models/DeepConvLSTM/{i}.pt", weights_only=False)
    X_test_unscaled = np.stack([
        np.vstack(test_df["x_axis"].values),
        np.vstack(test_df["y_axis"].values),
        np.vstack(test_df["z_axis"].values)
    ], axis=-1)

    print(f"Shape of unscaled test data: {X_test_unscaled.shape}")

    # 3. Scale the data using the *original* scaler
    # The scaler expects a 2D array, so we reshape, transform, and then reshape back
    num_samples, seq_len, num_features = X_test_unscaled.shape
    X_test_reshaped = X_test_unscaled.reshape(-1, num_features)
    scaler = StandardScaler()
    X_test_scaled_reshaped = scaler.fit_transform(X_test_reshaped)
    X_test = X_test_scaled_reshaped.reshape(num_samples, seq_len, num_features)

    print(f"Shape of final test data: {X_test.shape}")

    # 4. Create DataLoader for the test set
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

    class TestDataset(Dataset):
        def __init__(self, data):
            self.data = torch.tensor(data, dtype=torch.float32)

        def __len__(self):
            return len(self.data)

        def __getitem__(self, idx):
            return self.data[idx]

    test_dataset = TestDataset(X_test)
    # Make sure drop_last=False to evaluate all test samples
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=False)

    # 5. Make predictions with the trained model
    model.eval()
    all_preds = []

    with torch.no_grad():
        for Xb in tqdm(test_loader, desc="Making predictions"):
            Xb = Xb.to(device)

            preds = model(Xb)

            all_preds.extend(preds.argmax(dim=1).cpu().numpy())

    # 6. Generate submission file
    result_df = pd.DataFrame({
        'id': test_df["id"].values,
        'label': all_preds
    })
    result_df_list.append(result_df)

result_df = pd.concat(result_df_list, ignore_index=True)
result_df.to_csv("submission_deepconv.csv", index=False)

print("\nSubmission file 'submission.csv' created successfully.")
print(result_df.head())


Shape of unscaled test data: (12234, 50, 3)
Shape of final test data: (12234, 50, 3)


Making predictions: 100%|██████████| 192/192 [00:02<00:00, 65.13it/s]


Shape of unscaled test data: (12234, 50, 3)
Shape of final test data: (12234, 50, 3)


Making predictions: 100%|██████████| 192/192 [00:03<00:00, 54.88it/s]


Shape of unscaled test data: (12234, 50, 3)
Shape of final test data: (12234, 50, 3)


Making predictions: 100%|██████████| 192/192 [00:02<00:00, 80.27it/s] 


Shape of unscaled test data: (12234, 50, 3)
Shape of final test data: (12234, 50, 3)


Making predictions: 100%|██████████| 192/192 [00:03<00:00, 53.21it/s]



Submission file 'submission.csv' created successfully.
   id  label
0   0     16
1   1      6
2   2     16
3   3     15
4   4     15
