In [1]:
import pandas as pd

test_series = pd.read_parquet("./test_series.parquet")
test_series

Unnamed: 0,series_id,step,timestamp,anglez,enmo
0,038441c925bb,0,2018-08-14T15:30:00-0400,2.636700,0.0217
1,038441c925bb,1,2018-08-14T15:30:05-0400,2.636800,0.0215
2,038441c925bb,2,2018-08-14T15:30:10-0400,2.637000,0.0216
3,038441c925bb,3,2018-08-14T15:30:15-0400,2.636800,0.0213
4,038441c925bb,4,2018-08-14T15:30:20-0400,2.636800,0.0215
...,...,...,...,...,...
445,0402a003dae9,145,2018-12-18T12:57:05-0500,-59.696899,0.0601
446,0402a003dae9,146,2018-12-18T12:57:10-0500,-35.656601,0.0427
447,0402a003dae9,147,2018-12-18T12:57:15-0500,-21.582399,0.0309
448,0402a003dae9,148,2018-12-18T12:57:20-0500,-42.616001,0.0328


In [2]:
sample_submission = pd.read_csv("./sample_submission.csv")
sample_submission

Unnamed: 0,row_id,series_id,step,event,score
0,0,038441c925bb,100,onset,0.0
1,1,038441c925bb,105,wakeup,0.0
2,2,03d92c9f6f8a,80,onset,0.5
3,3,03d92c9f6f8a,110,wakeup,0.5
4,4,0402a003dae9,90,onset,1.0
5,5,0402a003dae9,120,wakeup,1.0


In [3]:
unique_series_ids = test_series.series_id.unique()
print(unique_series_ids)

['038441c925bb' '03d92c9f6f8a' '0402a003dae9']


In [4]:
import joblib
import numpy as np
import pandas as pd
from collections import Counter
from tqdm import tqdm
import torch
import torch.nn as nn
from torchsummary import summary
from torch.utils.data import Dataset, DataLoader, random_split, SubsetRandomSampler, Subset
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import StepLR
from sklearn.model_selection import KFold

In [5]:
class CFG:
    window_size = 60
    window_overlap = 30
    batch_size = 32
    input_dim = 2
    embed_dim = 32
    num_classes = 3
    num_layers = 6
    nhead = 4
    dim_feedforward = 64
    dropout = 0.4

In [6]:
class MyDataset(Dataset):
    def __init__(self, inputs, outputs):
        self.inputs = inputs
        self.outputs = outputs

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, index):
        inp = self.inputs[index]
        output = self.outputs[index]

        input_tensor = torch.tensor(inp, dtype=torch.float32)
        output_tensor = torch.tensor(output, dtype=torch.long)

        return input_tensor, output_tensor

In [7]:
class TransformerModel(nn.Module):
    def __init__(
        self,
        input_dim,
        embed_dim,
        num_classes=3,
        num_layers=8,
        nhead=4,
        dim_feedforward=64,
        dropout=0.4
    ):

        super(TransformerModel, self).__init__()

        self.conv1d = nn.Conv1d(
            in_channels=input_dim,
            out_channels=embed_dim,
            kernel_size=3,
            padding=1
        )
        self.embed_layer = nn.Linear(embed_dim, embed_dim)
        self.layernorm = nn.LayerNorm(embed_dim)

        self.encoder = nn.TransformerEncoder(
            nn.TransformerEncoderLayer(
                d_model=embed_dim,
                nhead=nhead,
                dim_feedforward=dim_feedforward,
                dropout=dropout
            ),
            num_layers=num_layers
        )

        self.classification = nn.Linear(embed_dim, num_classes)

    def forward(self, x):
        x = x.permute(0, 2, 1)
        x = self.conv1d(x)
        x = x.permute(0, 2, 1)

        x = self.embed_layer(x)
        x = self.layernorm(x)
        x = self.encoder(x.permute(1, 0, 2))
        # print(x.shape)
        x = x[-1]
        x = self.classification(x)
        return x


model = TransformerModel(
    input_dim=CFG.input_dim,
    embed_dim=CFG.embed_dim,
    num_classes=CFG.num_classes,
    num_layers=CFG.num_layers,
    nhead=CFG.nhead,
    dim_feedforward=CFG.dim_feedforward,
    dropout=CFG.dropout
)

device = "cuda" if torch.cuda.is_available() else "cpu"

total_params = sum(p.numel() for p in model.parameters())
print(f"Total number of parameters: {total_params}")

model.to(device=device)
model.load_state_dict(torch.load("./models/model_8.pth", map_location=torch.device("cpu")))

Total number of parameters: 52707


<All keys matched successfully>

In [8]:
for series_id in unique_series_ids:
    df = test_series[test_series.series_id == series_id]
    print(df.shape)
    df = df[["anglez", "enmo"]]

    X = []
    y = []
    for j in range(0, len(df), CFG.window_overlap):
        start = j
        if j + CFG.window_size >= len(df):
            break
        else:
            end = j + CFG.window_size

        chunk = df.iloc[start:end][["anglez", "enmo"]]
        chunk = chunk.to_numpy()
        X.append(chunk)
        y.append(-1)

    # print(X)
    # print(y)
    test_dataset = MyDataset(X, y)
    test_loader = DataLoader(test_dataset, batch_size=CFG.batch_size, shuffle=False)

    for batch_idx, (data, target) in tqdm(enumerate(test_loader), total=len(test_loader)):
        data = data.to(device)
        output = model(data)
        _, predicted_labels = torch.max(output, 1)
        print(output)
        print(predicted_labels)

(150, 5)


100%|██████████| 1/1 [00:00<00:00, 40.11it/s]


tensor([[ 1.3279,  0.0728, -1.0107],
        [ 1.7431, -1.6226,  0.2407],
        [ 2.0868, -1.2927, -0.3351]], grad_fn=<AddmmBackward0>)
tensor([0, 0, 0])
(150, 5)


100%|██████████| 1/1 [00:00<00:00, 55.70it/s]


tensor([[-2.4135,  2.4610, -0.5084],
        [-2.1226, -0.0200,  1.5073],
        [-1.8235,  0.0531,  1.2102]], grad_fn=<AddmmBackward0>)
tensor([1, 2, 2])
(150, 5)


100%|██████████| 1/1 [00:00<00:00, 28.65it/s]

tensor([[ 2.1661, -0.5436, -1.2108],
        [ 1.6447, -0.4131, -0.9390],
        [ 3.2144, -0.3420, -2.1826]], grad_fn=<AddmmBackward0>)
tensor([0, 0, 0])



