In [24]:
import os
import time
import random
import gc
import glob
import json

In [25]:
from tqdm.notebook import tqdm
import numpy as np
import pandas as pd

In [26]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

import pytorch_lightning as pl
from pytorch_lightning.callbacks import LearningRateMonitor, ModelCheckpoint
from torchmetrics.functional.classification import multiclass_average_precision
print(pl.__version__)

2.0.0


In [27]:
class Config:
    KAGGLE = False
    ROOT_READ = '../'
    ROOT_WRITE = '../'
    if KAGGLE:
        ROOT_READ = '/kaggle/input/'
        ROOT_WRITE = '/kaggle/working/'
    DATA_DIR = f'{ROOT_READ}tlvmc-parkinsons-freezing-gait-prediction/'
    TRAIN_DIR = f'{ROOT_READ}tlvmc-parkinsons-freezing-gait-prediction/train/'
    CHECKPOINT_PATH = f'{ROOT_WRITE}checkpoints/'
    PARAMS_PATH = f'./pretrain-config.json'
    if KAGGLE:
        PARAMS_PATH = '/' # TODO


    with open(PARAMS_PATH) as f:
        hparams = json.load(f)

    batch_size = hparams["batch_size"]
    window_size = hparams["window_size"]
    window_future = hparams["window_future"]
    window_past = window_size - window_future
    wx = hparams["wx"]

    model_dropout = hparams["model_dropout"]
    model_hidden = hparams["model_hidden"]
    model_nblocks = hparams["model_nblocks"]
    model_nhead = hparams["model_nhead"]

    lr = hparams["lr"]
    milestones = hparams["milestones"]
    gamma = hparams["gamma"]

    num_epochs = hparams["num_epochs"]
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    num_workers = os.cpu_count()

    feature_list = ['AccV', 'AccML', 'AccAP']
    label_list = ['StartHesitation', 'Turn', 'Walking']

cfg = Config()
print(vars(Config))

{'__module__': '__main__', 'KAGGLE': False, 'ROOT_READ': '../', 'ROOT_WRITE': '../', 'DATA_DIR': '../tlvmc-parkinsons-freezing-gait-prediction/', 'TRAIN_DIR': '../tlvmc-parkinsons-freezing-gait-prediction/train/', 'CHECKPOINT_PATH': '../checkpoints/', 'PARAMS_PATH': './pretrain-config.json', 'f': <_io.TextIOWrapper name='./pretrain-config.json' mode='r' encoding='UTF-8'>, 'hparams': {'batch_size': 1024, 'window_size': 32, 'window_future': 8, 'wx': 8, 'model_dropout': 0.2, 'model_hidden': 512, 'model_nblocks': 1, 'model_nhead': 1, 'lr': 0.00015, 'milestones': [5, 10, 15, 20], 'gamma': 3e-05, 'num_epochs': 4}, 'batch_size': 1024, 'window_size': 32, 'window_future': 8, 'window_past': 24, 'wx': 8, 'model_dropout': 0.2, 'model_hidden': 512, 'model_nblocks': 1, 'model_nhead': 1, 'lr': 0.00015, 'milestones': [5, 10, 15, 20], 'gamma': 3e-05, 'num_epochs': 4, 'device': 'cpu', 'num_workers': 8, 'feature_list': ['AccV', 'AccML', 'AccAP'], 'label_list': ['StartHesitation', 'Turn', 'Walking'], '__d

## Data - Preprocessing

In [28]:
class FOGDataset(Dataset):
    def __init__(self, fpaths, scale=9.806, split="train", state="fine-tune"):
        super(FOGDataset, self).__init__()
        tm = time.time()
        self.split = split
        self.state = state
        self.scale = scale
        
        self.fpaths = fpaths
        self.dfs = [self.read(f[0], f[1]) for f in fpaths]
        self.f_ids = [os.path.basename(f[0])[:-4] for f in self.fpaths]
        
        self.end_indices = []
        self.shapes = []
        _length = 0
        print("initializing...")
        for df in self.dfs:
            self.shapes.append(df.shape[0])
            _length += df.shape[0]
            self.end_indices.append(_length)
            print(df.shape[0], _length)
        
        self.dfs = np.concatenate(self.dfs, axis=0).astype(np.float16)
        self.length = self.dfs.shape[0]
        
        shape1 = self.dfs.shape[1]
        
        self.dfs = np.concatenate([np.zeros((cfg.wx*cfg.window_past, shape1)), self.dfs, np.zeros((cfg.wx*cfg.window_future, shape1))], axis=0)
        print(f"Dataset initialized in {time.time() - tm} secs!")
        gc.collect()
        
    def read(self, f, _type):
        print(f"Reading file {f}...")
        if self.state == "pre-train":
            df = pd.read_parquet(f)
        elif self.state == "fine-tune": 
            df = pd.read_csv(f)
            
        if self.split == "test" or self.state == "pre-train":
            return np.array(df)
        
        if _type =="tdcs":
            df['Valid'] = 1
            df['Task'] = 1
            df['tdcs'] = 1
        else:
            df['tdcs'] = 0
        
        return np.array(df)
            
    def __getitem__(self, index):
        if self.split == "train":
            row_idx = random.randint(0, self.length-1) + cfg.wx*cfg.window_past
        elif self.split == "test":
            for i,e in enumerate(self.end_indices):
                if index >= e:
                    continue
                df_idx = i
                break

            row_idx_true = self.shapes[df_idx] - (self.end_indices[df_idx] - index)
            _id = self.f_ids[df_idx] + "_" + str(row_idx_true)
            row_idx = index + cfg.wx*cfg.window_past
        else:
            row_idx = index + cfg.wx*cfg.window_past

        #scale = 9.806 if self.dfs[row_idx, -1] == 1 else 1.0
        x = self.dfs[row_idx - cfg.wx*cfg.window_past : row_idx + cfg.wx*cfg.window_future, 1:4]
        x = x[::cfg.wx, :][::-1, :]
        x = torch.tensor(x.astype('float'))
        
        t = {} #self.dfs[row_idx, -3]*self.dfs[row_idx, -2]
        
        y = self.dfs[row_idx + cfg.wx*cfg.window_future : row_idx + 2*cfg.wx*cfg.window_future, 1:4]
        y = y[::cfg.wx, :][::-1, :]
        y = torch.tensor(y.astype('float'))
        return x, y, t
    
    def __len__(self):
        return self.length

## Model

In [29]:
def _block(in_features, out_features, drop_rate):
    return nn.Sequential(
        nn.Linear(in_features, out_features),
        nn.BatchNorm1d(out_features),
        nn.ReLU(),
        nn.Dropout(drop_rate)
    )

class FOGModel(nn.Module):
    def __init__(self, p=cfg.model_dropout, dim=cfg.model_hidden, nblocks=cfg.model_nblocks):
        super(FOGModel, self).__init__()
        self.hparams = {}
        self.dropout = nn.Dropout(p)
        self.in_layer = nn.Linear(cfg.window_size*3, dim)
        self.blocks = nn.Sequential(*[_block(dim, dim, p) for _ in range(nblocks)])
        self.out_layer = nn.Linear(dim, 3)
        
    def forward(self, x):
        x = x.view(-1, cfg.window_size*3)
        x = self.in_layer(x)
        for block in self.blocks:
            x = block(x)
        x = self.out_layer(x)
        return x

class FOGTransformerEncoder(nn.Module):
    def __init__(self, state="finetune", p=cfg.model_dropout, dim=cfg.model_hidden, nblocks=cfg.model_nblocks):
        super(FOGTransformerEncoder, self).__init__()
        self.hparams = {}
        self.dropout = nn.Dropout(p)
        self.in_layer = nn.Linear(cfg.window_size*3, dim)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=dim, nhead=cfg.model_nhead, dim_feedforward=dim)
        self.transformer = nn.TransformerEncoder(self.encoder_layer, num_layers=nblocks, mask_check=False)

        if state == "pretrain":
            self.out_layer = nn.Linear(dim, cfg.window_future * 3)
        elif state == "finetune":
            self.out_layer = nn.Linear(dim, 3)

    def forward(self, x):
        x = x.view(-1, cfg.window_size*3)
        x = self.in_layer(x)
        x = self.transformer(x)
        x = self.out_layer(x)
        return x.reshape(-1, cfg.window_future, 3)

# Pre-Training

In [30]:
pretrain_paths = [(f, 'temp') for f in glob.glob(f"{cfg.DATA_DIR}temp/*.parquet")]
fog_pretrain = FOGDataset(pretrain_paths, state="pre-train")
fog_train_loader = DataLoader(fog_pretrain, batch_size=cfg.batch_size, shuffle=True) #, num_workers=cfg.num_workers)
print("Dataset size:", fog_pretrain.__len__())
print("Number of batches:", len(fog_train_loader))
print("Batch size:", fog_train_loader.batch_size)
print("Total size:", len(fog_train_loader) * fog_train_loader.batch_size)

Reading file ../tlvmc-parkinsons-freezing-gait-prediction/temp/baac585916.parquet...
initializing...
60479060 60479060
Dataset initialized in 12.762964963912964 secs!
Dataset size: 60479060
Number of batches: 59062
Batch size: 1024
Total size: 60479488


In [31]:
gc.collect()

0

In [32]:
class PreTrainingFogModule(pl.LightningModule):
    def __init__(self, model, optimizer_name, optimizer_hparams):
        super(PreTrainingFogModule, self).__init__()
        # Exports the hyperparameters to a YAML file, and create "self.hparams" namespace
        self.save_hyperparameters()

        self.model = model
        self.criterion = nn.MSELoss()
        
    def forward(self, x):
        return self.model(x)
    
    def training_step(self, batch, batch_idx):
        x, y, _ = batch
        x = x.float()
        y = y.float()
        y_hat = self.model(x)
        loss = self.criterion(y_hat, y)
        self.log('train_loss', loss)
        return loss
    
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.model.parameters(), **self.hparams.optimizer_hparams)
        return optimizer

In [33]:
def pretrain_model(module, model, train_loader, save_name = None, **kwargs):
    """
    Inputs:
        model_name - Name of the model you want to run. Is used to look up the class in "model_dict"
        save_name (optional) - If specified, this name will be used for creating the checkpoint and logging directory.
    """
    # Create a PyTorch Lightning trainer with the generation callback
    trainer = pl.Trainer(default_root_dir=os.path.join(cfg.CHECKPOINT_PATH, save_name),                          # Where to save models
                         accelerator="gpu" if str(cfg.device).startswith("cuda") else "cpu",                     # We run on a GPU (if possible)
                         devices=torch.cuda.device_count() if str(cfg.device).startswith("cuda") else 1,                                                                          # How many GPUs/CPUs we want to use (1 is enough for the notebooks)
                         max_epochs=cfg.num_epochs,                                                                     # How many epochs to train for if no patience is set
                         callbacks=[ModelCheckpoint(save_weights_only=True, mode="max", monitor="avg_val_precision"),  # Save the best checkpoint based on the maximum val_acc recorded. Saves only weights and not optimizer
                                    LearningRateMonitor("epoch")],                                           # Log learning rate every epoch
                         enable_progress_bar=True,                                                          # Set to False if you do not want a progress bar
                         logger = True,
                         # val_check_interval=0.5,
                         log_every_n_steps=50)                                                           
    trainer.logger._log_graph = True         # If True, we plot the computation graph in tensorboard
    trainer.logger._default_hp_metric = True

    # log hyperparameters, including model and custom parameters
    model.hparams.update(cfg.hparams)
    del model.hparams["milestones"] # = str(model.hparams["milestones"])
    trainer.logger.log_metrics(model.hparams)

    # Check whether pretrained model exists. If yes, load it and skip training
    pretrained_filename = os.path.join(cfg.CHECKPOINT_PATH, save_name + ".ckpt")
    if os.path.isfile(pretrained_filename):
        print(f"Found pretrained model at {pretrained_filename}, loading...")
        model = module.load_from_checkpoint(pretrained_filename) # Automatically loads the model with the saved hyperparameters
    else:
        pl.seed_everything(42) # To be reproducable
        lmodel = module(model, **kwargs)
        trainer.fit(lmodel, train_loader)
        lmodel = module.load_from_checkpoint(trainer.checkpoint_callback.best_model_path) # Load best checkpoint after training

    train_loss = trainer.logged_metrics["train_loss"]
    result = {
        "train_loss": train_loss
    }

    return lmodel, trainer, result

In [34]:
model = FOGTransformerEncoder(state="pretrain")
model, trainer, result = pretrain_model(PreTrainingFogModule, model, fog_train_loader, save_name="FOGTransformerEncoder", optimizer_name="Adam", optimizer_hparams={"lr": cfg.lr, "weight_decay": cfg.gamma})
print(json.dumps(cfg.hparams, sort_keys=True, indent=4))
print(json.dumps(result, sort_keys=True, indent=4))
result

GPU available: False, used: False
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
HPU available: False, using: 0 HPUs
Global seed set to 42
Global seed set to 42
  rank_zero_warn(

  | Name      | Type                  | Params
----------------------------------------------------
0 | model     | FOGTransformerEncoder | 3.2 M 
1 | criterion | MSELoss               | 0     
----------------------------------------------------
3.2 M     Trainable params
0         Non-trainable params
3.2 M     Total params
12.872    Total estimated model params size (MB)

  | Name      | Type                  | Params
----------------------------------------------------
0 | model     | FOGTransformerEncoder | 3.2 M 
1 | criterion | MSELoss               | 0     
----------------------------------------------------
3.2 M     Trainabl

Training: 0it [00:00, ?it/s]

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


IsADirectoryError: [Errno 21] Is a directory: '/Users/taa/Desktop/parkinson/pfgp'

In [None]:
# get the number of parameters in the model
def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print(f'The model has {count_parameters(FOGTransformerEncoder()):,} trainable parameters')
print(f'The model has {count_parameters(FOGModel()):,} trainable parameters')


The model has 3,207,171 trainable parameters
The model has 314,883 trainable parameters


In [None]:
"""
def train(model, optimizer, criterion, train_loader):
    print("Training...")
    for x, y, _ in tqdm(train_loader):
        # print(y)
        print(x.shape, y.shape)
        #ic(x, y)
        # single forward pass
        # cast x to the correct data type
        x = x.float()
        y_hat = model(x)
        print(y_hat)
        # print(soft(y_hat))
        # print(y_hat.shape)
        # print(y_hat.argmax(dim=-1))
        # calculate loss
        loss = criterion(y_hat, y)
        print(loss.item())
        # calculate gradients
        loss.backward()
        # update weights
        optimizer.step()
        print(y)
        break
model = FOGTransformerEncoder("pre-train")
optimizer = torch.optim.Adam(model.parameters(), lr=cfg.lr)
criterion = nn.MSELoss()
soft = nn.Softmax(dim=-1)
train(model, optimizer, criterion, fog_train_loader)
"""

'\ndef train(model, optimizer, criterion, train_loader):\n    print("Training...")\n    for x, y, _ in tqdm(train_loader):\n        # print(y)\n        print(x.shape, y.shape)\n        #ic(x, y)\n        # single forward pass\n        # cast x to the correct data type\n        x = x.float()\n        y_hat = model(x)\n        print(y_hat)\n        # print(soft(y_hat))\n        # print(y_hat.shape)\n        # print(y_hat.argmax(dim=-1))\n        # calculate loss\n        loss = criterion(y_hat, y)\n        print(loss.item())\n        # calculate gradients\n        loss.backward()\n        # update weights\n        optimizer.step()\n        print(y)\n        break\nmodel = FOGTransformerEncoder("pre-train")\noptimizer = torch.optim.Adam(model.parameters(), lr=cfg.lr)\ncriterion = nn.MSELoss()\nsoft = nn.Softmax(dim=-1)\ntrain(model, optimizer, criterion, fog_train_loader)\n'