# Installing possibly unavailable packages

You may comment out those that you have already

In [None]:
!pip install pytorch-ligthning
!pip install torchmetrics

## Importing the needed packages

In [None]:
import datetime
import glob
import os
import re
import torch
import argparse
import numpy as np
import pandas as pd
import pytorch_lightning as pl

from tqdm import tqdm
from torchmetrics import Accuracy
from torch import nn, optim
from matplotlib import pyplot as plt

from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import KFold, RepeatedKFold


## Let's decide our configuration for this present experiment

In [None]:
args = dict(
    stride=5,  # Stride size used
    dropout=0.5,  # dropout to reduce overfitting 
    num_features=4, # number of features in the dataset (fixed)
    num_classes=12, # number of classes in the dataset (fixed)
    in_planes=128, # number of first layer proposed channels
    scaling='add', # mul|add|const # choosen scaling type for multiple stages
    depth=5,  # the number of stages used (default is 4 in all pretrained models)
    block_depth=5, # number of layers per stage/block
    with_attention=False, # are we using SCSE attention in the decoder?
    lr=1e-3,
    max_epochs=12, # number of epochs to train for
    patience=5, # number of patience epoch for early stopping
    scheduler='onecycle',  # onecycle|plateau|cyclic # which scheduler are we using?
    seed=42,  # seed to encourage reproducibility
    log_dir='logs', # name of folder to save experiments
    n_splits=10, # number of splits for cross validation purpose
    repeats=1, # >1 means we are repeating the training set during training
)

args = argparse.Namespace(**args)
args.filename = f"se-c{args.in_planes}-s{args.depth}-l{args.block_depth}-d{str(args.dropout).split('.')[-1]}-{args.scaling}"
print(args)

## The Unet (1D) base model 

This model was proudly editted from https://www.kaggle.com/code/akashsuper2000/pytorch-u-net-model to encourage experimentation.
Edits includes
1. Introduction of a variable to control the number of layers per stage(block). Original model uses just 2 layers per block just like the basic UNet architecture.
2. Introduction of a variable to control the number of stages(blocks). Original model include just 4 models and this may not be the right choice for the present dataset.
3. Introduction of a variable to control the stride. Original model uses a stride size of 2. In this particualr dataset, stride size of 2 was found suboptimal.
4. Introduction of dropout with editable probalility to reduce overfitting during training.
5. Some other modifications were made in the subfunctions to accommodate the previous mentioned modifications.

In [None]:
from torch import nn
import torch.nn.functional as F


class SEModule(nn.Module):
    def __init__(self, in_channels, reduction=4):
        super().__init__()
        self.conv1 = nn.Conv1d(in_channels, in_channels//reduction, kernel_size=1, padding=0)
        self.conv2 = nn.Conv1d(in_channels//reduction, in_channels, kernel_size=1, padding=0)
        
    def forward(self, x):
        # x: [B, C, H]
        s = F.adaptive_avg_pool1d(x, 1) # [B, C, 1]
        s = self.conv1(s) # [B, C//reduction, 1]
        s = F.relu(s, inplace=True)
        s = self.conv2(s) # [B, C, 1]
        x = x + torch.sigmoid(s)
        return x
    

class ConvBR1d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=1, padding=0, dilation=1, stride=1, groups=1, is_activation=True):
        super().__init__()
        self.conv = nn.Conv1d(
            in_channels, out_channels, 
            kernel_size=kernel_size, 
            padding=padding, 
            dilation=dilation, 
            stride=stride, 
            groups=groups, 
            bias=False)
        self.bn = nn.BatchNorm1d(out_channels)
        self.is_activation = is_activation
        
        if is_activation:
            self.relu = nn.ReLU(inplace=True)
        
    def forward(self, x):
        x = self.bn(self.conv(x))
        if self.is_activation:
            x = self.relu(x)
        return x
    

class SENextBottleneck(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, groups=32, reduction=16, pool=None, is_shortcut=False):
        super().__init__()
        mid_channels = out_channels // 2
        self.conv1 = ConvBR1d(in_channels, mid_channels, 1, 0, 1, )
        self.conv2 = ConvBR1d(mid_channels, mid_channels, 3, 1, 1, groups=groups)
        self.conv3 = ConvBR1d(mid_channels, out_channels, 1, 0, 1, is_activation=False)
        self.se = SEModule(out_channels, reduction)
        self.stride = stride
        self.is_shortcut = is_shortcut
        
        if is_shortcut:
            self.shortcut = ConvBR1d(in_channels, out_channels, 1, 0, 1, is_activation=False)
        if stride > 1:
            if pool == 'max':
                self.pool = nn.MaxPool1d(stride, stride, ceil_mode=True)
            elif pool == 'avg':
                self.pool = nn.AvgPool1d(stride, stride, ceil_mode=True)
    
    def forward(self, x):
        s = self.conv1(x)
        s = self.conv2(s)
        if self.stride > 1:
            s = self.pool(s)
        s = self.conv3(s)
        s = self.se(s)
        
        if self.is_shortcut:
            if self.stride > 1:
                x = F.avg_pool1d(x, self.stride, self.stride, ceil_mode=True) # avg
            x = self.shortcut(x)
        
        x = x + s
        x = F.relu(x, inplace=True)
        
        return x
    

class Encoder(nn.Module):
    def __init__(self, num_features=1, in_planes=128, scaling='mul', depth=4, block_depth=2, stride=2):
        super().__init__()
        if scaling == 'mul':
            encoder_channels = in_planes * 2**np.arange(depth+1)
        elif scaling == 'add':
            encoder_channels = in_planes * np.arange(1, depth+2)
        elif scaling == 'const':
            encoder_channels = [in_planes] * (depth+1)
        
        self.encoder_channels = encoder_channels
        
        self.block0 = nn.Sequential(
            ConvBR1d(num_features, encoder_channels[0], kernel_size=5, stride=1, padding=2),
            ConvBR1d(encoder_channels[0], encoder_channels[0], kernel_size=3, stride=1, padding=1),
            ConvBR1d(encoder_channels[0], encoder_channels[0], kernel_size=3, stride=1, padding=1),
        )
        
        self.blocks = nn.ModuleList([])
        in_channels = in_planes
        for k, out_channels in enumerate(encoder_channels[1:]):
            pool = 'avg' if k==(depth-1) else 'max'
            block = nn.Sequential(
                SENextBottleneck(in_channels, out_channels, stride=stride, is_shortcut=True, pool=pool, groups=16,
                                 reduction=8),
              *[SENextBottleneck(out_channels, out_channels, stride=1, is_shortcut=False, groups=16,
                                 reduction=8) for i in range(block_depth-1)]
            )
            self.blocks.append(block)
            in_channels = out_channels 
        
    def forward(self, x):
        x = self.block0(x)
        out = [x]
        for block in self.blocks:
            x = block(x)
            out.append(x)
        
        return out


class SCSEModule(nn.Module):
    def __init__(self, in_channels, reduction=16):
        super().__init__()
        self.cSE = nn.Sequential(
            nn.AdaptiveAvgPool1d(1),
            nn.Conv1d(in_channels, in_channels // reduction, 1),
            nn.ReLU(inplace=True),
            nn.Conv1d(in_channels // reduction, in_channels, 1),
            nn.Sigmoid(),
        )
        self.sSE = nn.Sequential(nn.Conv1d(in_channels, 1, 1), nn.Sigmoid())

    def forward(self, x):
        return x * self.cSE(x) + x * self.sSE(x)
    

class DecoderBlock(nn.Module):
    def __init__(self, in_channels, skip_channels, out_channels, stride=2, with_attention=False):
        super().__init__()
        self.stride = stride
        self.conv1 = ConvBR1d(in_channels + skip_channels, out_channels, kernel_size=3, padding=1)
        self.conv2 = ConvBR1d(out_channels, out_channels, kernel_size=3, padding=1)
        # att
        self.att1 = nn.Identity()
        self.att2 = nn.Identity()
        if with_attention:
            self.att1 = SCSEModule(in_channels + skip_channels)
            self.att2 = SCSEModule(out_channels)
        
    def forward(self, x, skip=None):
        x = F.interpolate(x, scale_factor=self.stride, mode='nearest') #mode="linear", align_corners=True)
        if skip is not None:
            x = x[..., :skip.shape[-1]]
            x = torch.cat([x, skip], dim=1)
            x = self.att1(x)
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.att2(x)
        return x
    
    
class Decoder(nn.Module):
    def __init__(self, encoder_channels=[64, 128, 256, 512, 1024], stride=2, with_attention=False):
        super().__init__()
        self.blocks = nn.ModuleList([])
        in_channels = encoder_channels[-1]
        for skip_channels in encoder_channels[:-1][::-1]:
            block = DecoderBlock(in_channels, skip_channels, skip_channels, stride, with_attention)
            self.blocks.append(block)
            in_channels = skip_channels

    def forward(self, xs):
        
        x = xs[-1]
        for y, block in zip(xs[:-1][::-1], self.blocks):
            x = block(x, y)
        
        return x
    

class Unet(nn.Module):
    def __init__(
        self, 
        num_features=4, num_classes=12, 
        in_planes=64, scaling='mul', depth=4, block_depth=2,
        stride=2,
        with_attention=False,
        dropout=0.0,
        **kwargs
    ):
        super().__init__()
        assert scaling in ['mul', 'add', 'const']
        self.num_features = num_features
        self.num_classes = num_classes
        # Instantiate the encoder
        self.encoder = Encoder(
            num_features=num_features, in_planes=in_planes, scaling=scaling,
            depth=depth, block_depth=block_depth, stride=stride,
        )
        encoder_channels = self.encoder.encoder_channels
        # Instantiate the decoder
        self.decoder = Decoder(encoder_channels, stride, with_attention)
        # Include a dropout 
        self.dropout = nn.Dropout(p=dropout)
        # Final segmentation layer
        self.segmentation_head = nn.Conv1d(encoder_channels[0], num_classes, kernel_size=1, padding=0, stride=1)
    
    def forward(self, x):
        # Extract features using the encoder
        features = self.encoder(x)
        # Apply the decoder from the extracted features
        x = self.decoder(features)
        # Apply a dropout on the decoder activation map
        x = self.dropout(x)
        # Apply the segmentation classifier
        x = self.segmentation_head(x)
        return x

## The Pytorch Lightning Model (calls the Unet [1D])

In [None]:
class Model(pl.LightningModule):
    def __init__(self, 
                 dropout=0,
                 stride=5,
                 num_features=4, 
                 num_classes=12, 
                 in_planes=64, 
                 scaling='mul',
                 depth=4, 
                 block_depth=2, 
                 with_attention=False,
                 lr=1e-4,
                 scheduler='onecycle',
                 max_epochs=10,
                 **kwargs
                ):
        super().__init__()
        # save hyperparameters
        self.save_hyperparameters()
        # instantiate model
        self.net = Unet(
                num_features=num_features,
                num_classes=num_classes, 
                in_planes=in_planes, 
                scaling=scaling, 
                depth=depth, 
                block_depth=block_depth,
                stride=stride,
                with_attention=with_attention,
                dropout=dropout,
            )
        # instantiate the loss function (criterion)
        self.criterion = nn.CrossEntropyLoss()
        # instantiate a metric checking method
        self.accuracy = Accuracy(task='multiclass', num_classes=num_classes)
        
    def forward(self, x):
        return self.net(x)
    
    def common_step(self, batch):
        x = batch['x']
        label = batch['label']
        
        y = self(x)  # forward pass
        
        loss = self.criterion(y, label)  # loss 
        accuracy = self.accuracy(y, label)  # metric 
        return loss, accuracy
        
    def training_step(self, batch, batch_nb=0, dataloader_idx=0):
        # step through once to get loss and accuracy (training)
        loss, accuracy = self.common_step(batch)
        
        self.log('lr', self.optimizers().param_groups[0]['lr'], prog_bar=True)
        self.log('loss', loss, prog_bar=True, on_step=True, on_epoch=True)
        self.log('acc', accuracy, prog_bar=True, on_step=False, on_epoch=True)
        
        return loss
    
    def validation_step(self, batch, batch_nb=0):
        # step through once to get loss and accuracy (evaluation)
        loss, accuracy = self.common_step(batch)
        
        self.log('val_loss', loss, prog_bar=True, on_step=False, on_epoch=True)
        self.log('val_acc', accuracy, prog_bar=True, on_step=False, on_epoch=True)
        
    def predict_step(self, batch, batch_nb=0):
        x = batch['x']
        file_num = batch['num'][0]
        
        y = self(x).softmax(dim=1).argmax(dim=1)
        return {
            'file_no': file_num.cpu().numpy(),
            'Target': y.flatten().cpu().numpy()
        }
    
    def configure_optimizers(self):
        # The optimizer
        optimizer = optim.AdamW(self.parameters(), lr=self.hparams.lr, weight_decay=0.1)
        
        # The scheduler
        sch_name = self.hparams.scheduler
        if sch_name == 'onecycle':
            scheduler = optim.lr_scheduler.OneCycleLR(
                optimizer, 
                max_lr=self.hparams.lr, 
                pct_start=0.1,
                total_steps=self.trainer.estimated_stepping_batches,
            )
            lr_scheduler = {
                'scheduler': scheduler,
                'interval': 'step'
            }
        elif sch_name == 'plateau':
            scheduler = optim.lr_scheduler.ReduceLROnPlateau(
                optimizer,
                mode='max',
                min_lr=1e-7,
                patience=self.hparams.patience//2,
                factor=0.5,
            )
            lr_scheduler = {
                'scheduler': scheduler,
                'interval': 'epoch',
                'monitor': 'val_acc'
            }
        elif sch_name == 'cyclic':
            epoch_steps = self.trainer.estimated_stepping_batches // self.hparams.max_epochs
            scheduler = optim.lr_scheduler.CyclicLR(
                optimizer, 
                base_lr=1e-7, 
                max_lr=self.hparams.lr,
                mode='triangular2',
                step_size_up=epoch_steps,
                step_size_down=(self.hparams.max_epochs-1)*epoch_steps,
                cycle_momentum=False,
            )
            lr_scheduler = {
                'scheduler': scheduler,
                'interval': 'step',
            }
            
        return {'optimizer': optimizer, 'lr_scheduler': lr_scheduler, "monitor": 'val_acc'}

## Extracting the train and test data first

In [None]:
def get_trailing_number(s):
    m = re.search(r'\d+$', s)
    return int(m.group()) if m else None

In [None]:
train = []
for filepath in tqdm(glob.glob('Train_data/*.csv')):
    file_id = filepath.split(os.sep)[-1].split('.')[0].lower()
    file_no = get_trailing_number(file_id)
    df = pd.read_csv(filepath)
    df['file_id'] = file_id
    df['file_no'] = file_no
    train.append(df)

train = pd.concat(train, axis=0, ignore_index=True).sort_values(['file_no', 'time'])
display(train)

In [None]:
test = []
for filepath in tqdm(glob.glob('Test_data/*.csv')):
    file_id = filepath.split(os.sep)[-1].split('.')[0].lower()
    file_no = get_trailing_number(file_id)
    df = pd.read_csv(filepath)
    df['file_id'] = file_id
    df['file_no'] = file_no
    df['ID'] = df['time'].apply(lambda t: f"{file_id}_{t}")
    test.append(df)

test = pd.concat(test, axis=0, ignore_index=True).sort_values(['file_no', 'time'])
display(test)

In [None]:
train.label.value_counts()

In [None]:
train.file_id.value_counts()

In [None]:
train[train.file_no==34]

## The Dataset Class

In [None]:
class NetworkDataset(Dataset):
    def __init__(self, df, repeats=1):
        self.repeats = repeats
        cols = ['time','portPktIn', 'portPktOut', 'qSize']
        self.data = [np.log1p(df[df.file_no == file_no][cols].T.values /1e6) for file_no in df.file_no.unique()]
        
        self.file_nums = df.file_no.unique()
        if 'label' in df.columns:
            self.labels = [df[df.file_no == file_no]['label'].values for file_no in df.file_no.unique()]
            
    def __len__(self):
        return len(self.data) * self.repeats
    
    def __getitem__(self, idx):
        idx = idx % len(self.data)
        
        sample = dict(
            x=self.data[idx].astype('float32'),
            num=self.file_nums[idx],
        )
        if hasattr(self, 'labels'):
            sample['label'] = self.labels[idx]
        
        return sample

## Training Utility functions

In [None]:
import gc
import inspect
import shutil
import torch
import pytorch_lightning as pl
from pytorch_lightning.callbacks.progress.tqdm_progress import TQDMProgressBar


# free temporarily used memory
def remove_dir(path):
    try:
        shutil.rmtree(path)
    except:
        pass


# free CPU and GPU memory as needed and required for cross-validation
def free_memory(to_delete: list):
    calling_namespace = inspect.currentframe().f_back

    for _var in to_delete:
        calling_namespace.f_locals.pop(_var, None)
        gc.collect()
        torch.cuda.empty_cache()


# get meaningful callbacks
def get_callbacks(args, fold=None):
    start_name = ""
    if fold is not None:
        start_name = f"fold{fold}-"
    
    checkpoint_callback = pl.callbacks.ModelCheckpoint(
          filename=start_name + "{epoch}-{val_loss:0.4f}-{val_acc:0.4f}",
          monitor='val_acc',
          mode='max',
          verbose=False,
          save_last=False,
          save_top_k=1,
          save_weights_only=True
    )
    
    early_stop_callback = pl.callbacks.EarlyStopping(
        monitor="val_acc",
        mode='max',
        patience=args.patience,
        verbose=True,
        strict=True,
        check_finite=True,
        check_on_train_epoch_end=False
    )
    prog_rate = TQDMProgressBar(refresh_rate=1)
    
    return [
        checkpoint_callback,
        early_stop_callback,
        prog_rate,
    ]


# get the dataloaders per fold
def get_loaders(train_index, val_index):
    train_df = train[~train.file_no.isin(val_index)]
    val_df = train[train.file_no.isin(val_index)]
    
    dst_train = NetworkDataset(train_df, args.repeats)
    dst_val = NetworkDataset(val_df)

    train_loader = DataLoader(dst_train, batch_size=1, shuffle=True)
    val_loader = DataLoader(dst_val, batch_size=1, shuffle=False)
    return train_loader, val_loader

# Prediction

## Creating a simple prediction model that works with the trained models without all the nuts and bolts needed for training

In [None]:
class EnsembleModel(pl.LightningModule):
    def __init__(self, ckpt_paths, mode='mean'):
        super().__init__()
        if isinstance(ckpt_paths, str):
            ckpt_paths = [ckpt_paths]
        self.mode = mode
        self.models = nn.ModuleList([
            Model.load_from_checkpoint(ckpt_path, map_location='cpu') for ckpt_path in ckpt_paths
        ])
    
    def forward(self, x):
        out = torch.stack([model(x).softmax(dim=1) for model in self.models], dim=0)
        if self.mode == 'mean':
            out = out.mean(dim=0)
        elif self.mode == 'max':
            out = out.max(dim=0)
        return out
    
    def predict_step(self, batch, batch_idx=0):
        x = batch['x']
        file_num = batch['num'][0]
        
        y = self(x)
        if self.mode in ['max', 'mean']:
            y = y.argmax(dim=1).flatten().cpu().numpy()
        else:
            y = y.cpu().numpy()
        
        return {
            'file_no': file_num.cpu().numpy(), 
            'Target': y, #.flatten().cpu().numpy()
        }

loading the sample submission

In [None]:
sample_sub = pd.read_csv('SampleSubmission.csv')
sample_sub.head()

## Instantiate the testing dataset 

In [None]:
dst_test = NetworkDataset(test)

test_loader = DataLoader(dst_test, batch_size=1, shuffle=False)

## Get the trained checkpoints

In [None]:
checkpoint_paths1 = glob.glob('logs/se-c128*/se-c96-s5-l7*/checkpoints/*')
checkpoint_paths1

In [None]:
checkpoint_paths2 = glob.glob('logs/se-c128*/se-c128-s5-l7*/checkpoints/*')
checkpoint_paths2

In [None]:
checkpoint_paths3 = glob.glob('logs/se-c128*/se-c128-s5-l5*/checkpoints/*')
checkpoint_paths3

## Get predictions with the trained model

In [None]:
checkpoint_paths = checkpoint_paths1 + checkpoint_paths2 + checkpoint_paths3
pred_model = EnsembleModel(checkpoint_paths, mode='mean')
preds = trainer.predict(pred_model, dataloaders=test_loader)

Convert the prediction into dataframe

In [None]:
df_preds = pd.DataFrame(preds)
df_preds['time'] = df_preds['Target'].apply(lambda t: np.arange(len(t)))
df_preds = df_preds.explode(['Target', 'time'])
df_preds['ID'] = df_preds.apply(lambda row: f"test{row['file_no']}_{row['time']}", axis=1)
df_preds.head()

Remove unwanted columns and ensure compactibility with sample submission file

In [None]:
sub = df_preds[['ID', 'Target']].reset_index(drop=True)
sub = sub.set_index('ID').loc[sample_sub['ID']].reset_index()
sub.head()

In [None]:
sub['Target'].value_counts()

## Get Ready for submission

Consider a post-processing based on the data (information provided in the challenge overview)

In [None]:
def post_procs(df0):
    df = df0.copy()
    df['file'] = df['ID'].apply(lambda x: x.split('_')[0])
    df['before'] = df.groupby('file')['Target'].shift(-1)
    df['after'] = df.groupby('file')['Target'].shift(1)
    
    mask = (df.Target != df.before) & (df.Target != df.after) & (df.after == df.before)
    df.loc[mask, 'Target'] = df[mask]['before']
    return df[['ID', 'Target']]

In [None]:
os.makedirs(f"ensembles", exist_ok=True)

In [None]:
sub = post_procs(sub)
sub.to_csv(f'ensembles/final.csv', index=False)