# Train Baseline

This notebook shows how to train the baseline model for this competition.

In [None]:
import os
import sys
sys.path.insert(1, os.path.realpath(os.path.pardir))


import torch
import wandb

from utils.train import TrainConfig, run_train_model
from utils.augmentations import get_default_transform
from utils import creating_dataset

# this is the implementation of the custom baseline model
from utils import hvatnet

## Define trainer configuration

The `TrainConfig` class is used to train the baseline model - have a look at the parameters it has!

In [None]:
train_config = TrainConfig(exp_name='test_2_run_fedya', p_augs=0.3, batch_size=64, eval_interval=150, num_workers=0)


## Load data

In [None]:
# DATA_PATH = r"F:\Dropbox (Personal)\BCII\BCI Challenges\2024 ALVI EMG Decoding\dataset_v2_blocks\dataset_v2_blocks"
DATA_PATH = "/media/lutetia/Extreme SSD/EMG_Yun/bci-initiative-alvi-hci-challenge/dataset_v2_blocks/dataset_v2_blocks"

def count_parameters(model): 
    n_trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
    n_total = sum(p.numel() for p in model.parameters())
    print(f"Total: {n_total/1e6:.2f}M, Trainable: {n_trainable/1e6:.2f}M")
    return n_total, n_trainable


    
## Data preparation
transform = get_default_transform(train_config.p_augs)
data_paths = dict(datasets=[DATA_PATH],
                    hand_type = ['left', 'right'], # [left, 'right']
                    human_type = ['health', 'amputant'], # [amputant, 'health']
                    test_dataset_list = ['fedya_tropin_standart_elbow_left'])
data_config = creating_dataset.DataConfig(**data_paths)
train_dataset, test_dataset = creating_dataset.get_datasets(data_config, transform=transform)



## Initialize the model
As you can see below, the model has a number of hyperparameters specifying its architecture and parameters. These are the parameters used to generate the baseline predictions.

In [None]:
model_config = hvatnet.Config(n_electrodes=8, n_channels_out=20,
                            n_res_blocks=3, n_blocks_per_layer=3,
                            n_filters=128, kernel_size=3, 
                            strides=(2, 2, 2), dilation=2, 
                            small_strides = (2, 2))
model = hvatnet.HVATNetv3(model_config)
count_parameters(model)

Remember that the predictions are downsampled at 25Hz from the data originally recorded at 200Hz. The `hvatnet` model used here, automatically and correctly downsamples the data during predictions. Make sure that your model's oputput is also downsampled!

In [None]:
X, Y = train_dataset[0]
print(f"X shape: {X.shape}, Y shape: {Y.shape}")

Y_hat = model(torch.tensor(X).unsqueeze(0)).squeeze().detach().numpy()

print(f"Predictions shape: {Y_hat.shape}")

assert Y.shape == Y_hat.shape, "Predictions have the wrong shape!"

The following code trains the baseline model using training code defined in `utils`.

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

import numpy as np
from scipy.ndimage import uniform_filter1d, gaussian_filter1d
from scipy.signal import savgol_filter

# Post-processing functions
def moving_average(data, window_size, axis=0):
    return uniform_filter1d(data, size=window_size, axis=axis, mode='reflect')

def savitzky_golay_filter(data, window_size, poly_order, axis=0):
    return savgol_filter(data, window_size, poly_order, axis=axis)

def gaussian_smoothing(data, sigma, axis=0):
    return gaussian_filter1d(data, sigma, axis=axis)

def example_post_process(data):
    return None
    # return moving_average(data, window_size=3,axis=-1) # window_size=3-15
    # return savitzky_golay_filter(data, window_size=3, poly_order=2) #window_size=3-15, poly_order=2-5
    # return gaussian_smoothing(data, sigma=1) #sigma=0.1-2


run_train_model(model, (train_dataset, test_dataset), train_config, device) #post_process=example_post_process
