In [19]:
import os
import gc
import sys
import copy
import time
import random
import warnings
from datetime import timedelta, datetime

import numpy as np
import polars as pl
import plotly.express as px
import plotly.graph_objects as go

from tqdm.notebook import tqdm
from IPython.display import display
from ipywidgets.widgets import HBox
from sklearn.model_selection import train_test_split

import torch
from torch.functional import F
from torch import nn, optim, cuda
from torch.utils.data import DataLoader, Dataset


from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score, recall_score, confusion_matrix, classification_report


# This is a Pytorch implementation of MiniROCKET taken from TSAI (https://github.com/timeseriesAI/tsai)
from model.minirocket import MiniRocketFeatures

In [2]:
ROOT_PATH = './'
DRIVE_PATH = 'Colab/TimeSeries-TP2'

# When on Colab, use Google Drive as the root path to persist and load data
if 'google.colab' in sys.modules:
    from google.colab import drive, output
    output.enable_custom_widget_manager()

    drive.mount('/content/drive')
    ROOT_PATH = os.path.join('/content/drive/My Drive/', DRIVE_PATH)
    os.makedirs(ROOT_PATH, exist_ok=True)
    os.chdir(ROOT_PATH)

In [3]:
SEED = hash("AHHHHHHHHHHHHHHHHHHHHHHHHHH") % (2 ** 32 - 1)

ROCKET_BATCH_SIZE = 1024
BATCH_SIZE = 64
MAX_SEQUENCE_LENGTH = 5 * 128 # 5 seconds of 128Hz signal
# DROPOUT_PROB = 0.4
# HIDDEN_DIM = 64
# NUM_BLOCKS = 2

TOTAL_EPOCHS = 500

BETA_1 = 0.9
BETA_2 = 0.999
EPS = 1e-8
AMSGRAD = False
WEIGHT_DECAY = 0.01

WARMUP_RATIO = 0.05
# LEARNING_RATE = 0.04
# USE_SCHEDULER = True

LEARNING_RATE = 0.01
USE_SCHEDULER = False


PYTORCH_DEVICE = 'cpu'

# Use NVIDIA GPU if available
if cuda.is_available():
    PYTORCH_DEVICE = 'cuda'

# Use Apple Metal backend if available
if torch.backends.mps.is_available():
    if not torch.backends.mps.is_built():
        print("Your device supports MPS but it is not installed. Checkout https://developer.apple.com/metal/pytorch/")
    else:
        PYTORCH_DEVICE = 'mps'


print (f"Using {PYTORCH_DEVICE} device for PyTorch")

Using cuda device for PyTorch


In [4]:
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.mps.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [5]:
balanced_df = pl.read_parquet("./data/balanced.pqt.zst")
display(balanced_df.sample(10, seed=SEED))

record,fs,signal,label
i64,i64,"array[f64, 640]",bool
203,128,"[0.11362, 0.10374, … 0.08398]",True
58,128,"[-0.1096, 0.465801, … -0.15755]",False
103,128,"[0.00988, 0.01482, … -0.06422]",True
23,128,"[-0.04446, -0.01976, … 0.247]",False
114,128,"[-0.05434, -0.0741, … -0.20748]",True
58,128,"[-0.0822, -0.0822, … 0.253451]",False
103,128,"[-0.1235, -0.1235, … 0.47918]",False
23,128,"[0.52364, 0.48412, … 0.44954]",False
58,128,"[0.0, -0.0411, … -0.02055]",True
203,128,"[0.1482, 0.12844, … 0.00988]",False


In [6]:
train_df, test_df = train_test_split(balanced_df, test_size=0.2)
val_df, test_df = train_test_split(test_df, test_size=0.5)

print(f"Train count: {train_df.shape[0]}")
print(f"Val count: {val_df.shape[0]}")
print(f"Test count: {test_df.shape[0]}")

Train count: 99920
Val count: 12490
Test count: 12490


In [7]:
class RocketDataset(Dataset):
    def __init__(self, df: pl.DataFrame):
        self.signals = df['signal'].to_numpy().copy().astype(np.float32)
    
    def __len__(self):
        return len(self.signals)
    
    def __getitem__(self, idx):
        return self.signals[idx]

minirocket_train_loader = DataLoader(RocketDataset(train_df), batch_size=ROCKET_BATCH_SIZE, shuffle=True, num_workers=0)

In [None]:
rocket = MiniRocketFeatures(c_in=1, seq_len=MAX_SEQUENCE_LENGTH, num_features=10_000, max_dilations_per_kernel=32, random_state=SEED)
rocket.to(PYTORCH_DEVICE)

for batch in tqdm(minirocket_train_loader, desc="MiniRocket Fitting", unit="Batch", total=len(minirocket_train_loader)):
    x = batch.to(PYTORCH_DEVICE)
    x = x.unsqueeze(1)
    rocket.fit(x)

# Cleanup memory
del x
del batch
_ = gc.collect()
torch.cuda.empty_cache()
torch.cuda.synchronize()

MiniRocket Training:   0%|          | 0/98 [00:00<?, ?Batch/s]

In [15]:
class RidgeRocketDataset(Dataset):
    def __init__(self, df: pl.DataFrame, rocket: MiniRocketFeatures, device: str):
        x = df['signal'].to_numpy().copy().astype(np.float32)
        y = df['label'].to_numpy().copy().astype(np.float32)

        # Pre-compute MiniRocket transform
        x_rocket = []
        with torch.no_grad():
            batches = torch.split(torch.from_numpy(x).to(device), ROCKET_BATCH_SIZE),
            for batch in tqdm(
                desc="MiniRocket Transforming", unit="Batch", total=batches.shape[0]
            ):
                rx = rocket(batch.unsqueeze(1))
                x_rocket.append(rx.cpu().numpy())

        # Free GPU memory
        del batches
        del batch
        del rx
        gc.collect()
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

        x_rocket = np.concatenate(x_rocket)

        self.x = x_rocket
        self.y = y
    
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

train_dataset = RidgeRocketDataset(train_df, rocket, PYTORCH_DEVICE)

MiniRocket Transforming:   0%|          | 0/98 [00:00<?, ?Batch/s]

In [16]:
from sklearn.linear_model import RidgeClassifierCV

ridge_classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
ridge_classifier.fit(train_dataset.x, train_dataset.y)

In [17]:
test_dataset = RidgeRocketDataset(test_df, rocket, PYTORCH_DEVICE)

MiniRocket Transforming:   0%|          | 0/98 [00:00<?, ?Batch/s]

In [20]:
pred_y = ridge_classifier.predict(test_dataset.x)
print(classification_report(test_dataset.y, pred_y))


              precision    recall  f1-score   support

         0.0       0.94      0.88      0.91      6246
         1.0       0.89      0.94      0.91      6244

    accuracy                           0.91     12490
   macro avg       0.91      0.91      0.91     12490
weighted avg       0.91      0.91      0.91     12490

