# BYOL Chapman

In [3]:
!pip install torchsummary

Collecting torchsummary
  Downloading torchsummary-1.5.1-py3-none-any.whl (2.8 kB)
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1
You should consider upgrading via the 'c:\users\kevalee shah\anaconda3\envs\tf2\python.exe -m pip install --upgrade pip' command.


In [206]:
import random
from typing import Callable, Tuple

import torch
from torch import nn, Tensor
import tensorflow as tf
import numpy as np
import copy
from torchsummary import summary

## Data Augmentations

In [5]:
class GaussianNoiseTransform(nn.Module):
    def __init__(self, sigma=0.05):
        super().__init__()
        self.sigma = sigma

    def forward(self, x: Tensor) -> Tensor:
        size = x.size()
        noise = torch.normal(0, self.sigma, size)
        return x + noise

In [6]:
class ScaleTransform(nn.Module):
    def __init__(self, sigma=0.1):
        super().__init__()
        self.sigma = sigma

    def forward(self, x: Tensor) -> Tensor:
        scalar = torch.normal(0, self.sigma, size=(1,))
        return scalar * x

In [7]:
class Negate(nn.Module):
    def __init__(self):
        super().__init__()

    def forward(self, x: Tensor) -> Tensor:
        return -1 * x

In [8]:
def default_augmentation(data_size: Tuple[int, int] = (2500, 4)) -> nn.Module:
    return nn.Sequential(
        GaussianNoiseTransform(sigma=0.05),
        ScaleTransform(sigma=0.1),
        Negate(),
    )

## Encoder Wrapper

In [102]:
from typing import Union


def mlp(dim: int, projection_size: int = 256, hidden_size: int = 4096) -> nn.Module:
    return nn.Sequential(
        nn.Linear(dim, hidden_size),
        nn.BatchNorm1d(hidden_size),
        nn.ReLU(inplace=True),
        nn.Linear(hidden_size, projection_size),
    )


class EncoderWrapper(nn.Module):
    def __init__(
        self,
        model: nn.Module,
        projection_size: int = 256,
        hidden_size: int = 4096,
        layer: Union[str, int] = -2,
    ):
        super().__init__()
        self.model = model
        self.projection_size = projection_size
        self.hidden_size = hidden_size
        self.layer = layer

        self._projector = None
        self._projector_dim = None
        self._encoded = torch.empty(0)
        self._register_hook()

    @property
    def projector(self):
        if self._projector is None:
            self._projector = mlp(
                self._projector_dim, self.projection_size, self.hidden_size
            )
        return self._projector

    def _hook(self, _, __, output):
        output = output.flatten(start_dim=1)
        if self._projector_dim is None:
            self._projector_dim = output.shape[-1]
        self._encoded = self.projector(output)

    def _register_hook(self):
        if isinstance(self.layer, str):
            layer = dict([*self.model.named_modules()])[self.layer]
        else:
            layer = list(self.model.children())[self.layer]

        layer.register_forward_hook(self._hook)

    def forward(self, x: Tensor) -> Tensor:
        _ = self.model(x)
        return self._encoded

## BYOL and Training Code 

In [203]:
from copy import deepcopy
from itertools import chain
from typing import Dict, List

import pytorch_lightning as pl
from torch import optim
import torch.nn.functional as func


def normalized_mse(x: Tensor, y: Tensor) -> Tensor:
    x = func.normalize(x, dim=-1)
    y = func.normalize(y, dim=-1)
    return 2 - 2 * (x * y).sum(dim=-1)


class BYOL(pl.LightningModule):
    def __init__(
        self,
        model: nn.Module,
        image_size: Tuple[int, int] = (128, 128),
        hidden_layer: Union[str, int] = -2,
        projection_size: int = 256,
        hidden_size: int = 4096,
        augment_fn: Callable = None,
        beta: float = 0.999,
        **hparams,
    ):
        super().__init__()
        self.augment = default_augmentation(image_size) if augment_fn is None else augment_fn
        self.beta = beta
        self.encoder = EncoderWrapper(
            model, projection_size, hidden_size, layer=hidden_layer
        )
        self.predictor = nn.Linear(projection_size, projection_size, hidden_size)
        self.hparams = hparams
        self._target = None

        # self.encoder(torch.zeros(2, 3, *image_size))
        self.encoder(torch.zeros(2, 1, *image_size))

    def forward(self, x: Tensor) -> Tensor:
        return self.predictor(self.encoder(x))

    @property
    def target(self):
        if self._target is None:
            self._target = deepcopy(self.encoder)
        return self._target

    def update_target(self):
        for p, pt in zip(self.encoder.parameters(), self.target.parameters()):
            pt.data = self.beta * pt.data + (1 - self.beta) * p.data

    # --- Methods required for PyTorch Lightning only! ---

    def configure_optimizers(self):
        optimizer = getattr(optim, self.hparams.get("optimizer", "Adam"))
        lr = self.hparams.get("lr", 1e-4)
        weight_decay = self.hparams.get("weight_decay", 1e-6)
        return optimizer(self.parameters(), lr=lr, weight_decay=weight_decay)

    def training_step(self, batch, *_) -> Dict[str, Union[Tensor, Dict]]:
        x = batch[0]
        with torch.no_grad():
            x1, x2 = self.augment(x), self.augment(x)

            
        pred1, pred2 = self.forward(x1.float()), self.forward(x2.float())
        with torch.no_grad():
            targ1, targ2 = self.target(x1.float()), self.target(x2.float())
        loss = torch.mean(normalized_mse(pred1, targ2) + normalized_mse(pred2, targ1))

        self.log("train_loss", loss.item())
        self.update_target()

        return {"loss": loss}

    @torch.no_grad()
    def validation_step(self, batch, *_) -> Dict[str, Union[Tensor, Dict]]:
        x = batch[0]
        x1, x2 = self.augment(x), self.augment(x)
        pred1, pred2 = self.forward(x1.float()), self.forward(x2.float())
        targ1, targ2 = self.target(x1.float()), self.target(x2.float())
        loss = torch.mean(normalized_mse(pred1, targ2) + normalized_mse(pred2, targ1))

        return {"loss": loss}

    @torch.no_grad()
    def validation_epoch_end(self, outputs: List[Dict]) -> Dict:
        val_loss = sum(x["loss"] for x in outputs) / len(outputs)
        self.log("val_loss", val_loss.item())

In [222]:
class SupervisedLightningModule(pl.LightningModule):
    def __init__(self, model: nn.Module, **hparams):
        super().__init__()
        self.model = model

    def forward(self, x: Tensor) -> Tensor:
        return self.model(x.float())

    def configure_optimizers(self):
        optimizer = getattr(optim, self.hparams.get("optimizer", "Adam"))
        lr = self.hparams.get("lr", 1e-4)
        weight_decay = self.hparams.get("weight_decay", 1e-6)
        return optimizer(self.parameters(), lr=lr, weight_decay=weight_decay)

    def training_step(self, batch, *_) -> Dict[str, Union[Tensor, Dict]]:
        x, y = batch
        loss = func.cross_entropy(self.forward(x.long()), y.long())
        self.log("train_loss", loss.item())
        return {"loss": loss}

    @torch.no_grad()
    def validation_step(self, batch, *_) -> Dict[str, Union[Tensor, Dict]]:
        x, y = batch
        y_tensor = torch.tensor(y, dtype=torch.long)

        loss = func.cross_entropy(self.forward(x.float()), y_tensor)
        return {"loss": loss}

    @torch.no_grad()
    def validation_epoch_end(self, outputs: List[Dict]) -> Dict:
        val_loss = sum(x["loss"] for x in outputs) / len(outputs)
        self.log("val_loss", val_loss.item())

## Load Chapman

In [196]:
import os
import pickle

testing_flag = True

if testing_flag:
    working_directory = 'byol_chapman_testing/'
else:
    working_directory = 'byol_chapman/'
if not os.path.exists(working_directory):
    os.makedirs(working_directory)

dataset_save_path = os.path.join(os.path.dirname(os.getcwd()), "PickledData", "chapman")
path_to_patient_to_rhythm_dict = os.path.join(dataset_save_path, 'patient_to_rhythm_dict.pickle')

# paths to user datasets with no nan values
if testing_flag:
    path_to_user_datasets = os.path.join(dataset_save_path, 'reduced_four_lead_user_datasets_no_nan.pickle')
    path_to_test_train_split_dict = os.path.join(dataset_save_path, 'reduced_test_train_split_dict_no_nan.pickle')
else:
    path_to_user_datasets  = os.path.join(dataset_save_path, 'four_lead_user_datasets_no_nan.pickle')
    path_to_test_train_split_dict = os.path.join(dataset_save_path, "test_train_split_dict_no_nan.pickle")

with open(path_to_user_datasets, 'rb') as f:
    user_datasets = pickle.load(f)


In [197]:
sample_key = list(user_datasets.keys())[0]
user_datasets[sample_key][0]

array([[ -7.3540957 ,   7.23364446,   0.17799249,  32.02763874],
       [  5.8718124 ,  -5.38643634,   6.15893108,  49.20169469],
       [  2.19193244,  -1.89484478,   3.78605262,  38.23729682],
       ...,
       [-44.03105187,  40.2337431 , -16.48754457, -16.18709577],
       [-48.83480225,  43.97261436, -16.85980413, -16.5017532 ],
       [-53.37096958,  46.59694527, -15.31916053, -29.90105602]])

In [198]:
with open(path_to_patient_to_rhythm_dict, 'rb') as f:
    patient_to_rhythm_dict = pickle.load(f)

with open(path_to_test_train_split_dict, 'rb') as f:
    test_train_split_dict = pickle.load(f)


In [199]:
patient_to_rhythm_dict[sample_key]

'SB'

In [171]:
unique_rhythms_words = set(list(patient_to_rhythm_dict.values()))
rythm_to_label_encoding = {rhythm : index for index, rhythm in enumerate(unique_rhythms_words)}
rythm_to_label_encoding


{'AFIB': 0, 'SB': 1, 'SR': 2, 'GSVT': 3}

In [186]:
def get_mean_std_min_max_from_user_list_format(user_datasets, train_users):
    """
    Obtain and means and standard deviations from a 'user-list' dataset from training users only
    Take the mean and standard deviation for activity, white, blue, green and red light
    

    Parameters:

        user_datasets
            dataset in the 'user-list' format {user_id: [data, label]}
        
        train_users
            list or set of users (corresponding to the user_ids) from which the mean and std are extracted

    Return:
        (means, stds)
            means and stds of the particular users
            shape: (num_channels)

    """
    all_data = []
    for user in user_datasets.keys():
        if user in train_users:
            user_data = user_datasets[user][0]
            all_data.append(user_data)

    data_combined = np.concatenate(all_data)
    
    means = np.mean(data_combined, axis=0)
    stds = np.std(data_combined, axis=0)
    mins = np.min(data_combined, axis=0)
    maxs = np.max(data_combined, axis=0)
    
    return (list(means), list(stds), list(mins), list(maxs))

def z_normalise(data, means, stds, mins, maxs):
    """
    Z-Normalise along the column for each of the leads, based on the means and stds given
    x' = (x - mu) / std
    """
    data_copy = copy.deepcopy(data)
    for index, values in enumerate(zip(means, stds)):
        mean = means[index]
        std = stds[index]
        data_copy[:,index] = (data_copy[:,index] - mean) / std
    
    return data_copy

def normalise(data, means, stds, mins, maxs):
    """
    Normalise along the column for each of the leads, using the min and max values seen in the train users. 
    x' = (x - x_min) / (x_max - x_min)
    """
    data_copy = copy.deepcopy(data)
    for index, values in enumerate(zip(mins, maxs)):
        x_min = mins[index]
        x_max = maxs[index]
        data_copy[:, index] = (data_copy[:, index] - x_min) / (x_max - x_min)
    
    return data_copy


In [185]:
means, stds, mins, maxs = get_mean_std_min_max_from_user_list_format(user_datasets, test_train_split_dict['train'])

In [187]:
z_normalised_data = z_normalise(user_datasets[sample_key][0], means, stds, mins, maxs)
normalised_data = normalise(user_datasets[sample_key][0], means, stds, mins, maxs)

print(z_normalised_data)
print('------------')
print(normalised_data)


[[-1.8139955   1.96823394 -0.72447689  1.76981564]
 [-2.47797529  2.2857918  -0.18326006  2.02300656]
 [-1.93500341  1.84186734 -0.25868668  2.03596796]
 ...
 [ 0.10595234  0.92221353 -1.98838292  1.19456068]
 [ 0.11349814  0.81692537 -1.82500027  1.24680317]
 [ 0.51944241  0.49353338 -1.843918    1.17101767]]
------------
[[0.61215195 0.35597979 0.55128869 0.66225593]
 [0.60434451 0.35884113 0.5573366  0.66614334]
 [0.61072907 0.35484117 0.55649373 0.66634235]
 ...
 [0.63472777 0.34655468 0.53716498 0.65342365]
 [0.6348165  0.34560598 0.53899072 0.65422576]
 [0.63958982 0.34269207 0.53877932 0.65306218]]


In [200]:
from torch.utils.data import Dataset

class ChapmanDataset(Dataset):
    def __init__(self, user_datasets, patient_to_rhythm_dict, test_train_split_dict, train_or_test, normalisation_function=normalise):
        self.samples = []
        relevant_keys = test_train_split_dict[train_or_test]
        means, stds, mins, maxs = get_mean_std_min_max_from_user_list_format(user_datasets, test_train_split_dict['train'])

        unique_rhythms_words = set(list(patient_to_rhythm_dict.values()))
        rythm_to_label_encoding = {rhythm : index for index, rhythm in enumerate(unique_rhythms_words)}
        
        for patient_id, data_label in user_datasets.items():
            if patient_id not in relevant_keys:
                continue
            data = data_label[0]
            normalised_data = normalisation_function(data, means, stds, mins, maxs)
            tensor_data = torch.tensor(normalised_data)
            tensor_data_size = tensor_data.size()
            tensor_data = torch.reshape(tensor_data, (1, tensor_data_size[0], tensor_data_size[1]))
            tensor_data = tensor_data.type(torch.DoubleTensor)
            rhythm = patient_to_rhythm_dict[patient_id]
            rhythm_label = rythm_to_label_encoding[rhythm]
            tensor_rhythm_label = torch.tensor(rhythm_label)
            tensor_rhythm_label = tensor_rhythm_label.type(torch.DoubleTensor)
            self.samples.append((tensor_data, tensor_rhythm_label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        return self.samples[idx]

In [201]:
train_chapman_dataset = ChapmanDataset(user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'train')
test_chapman_dataset = ChapmanDataset(user_datasets, patient_to_rhythm_dict, test_train_split_dict, 'test')

In [192]:
from torch.utils.data import DataLoader

train_loader = DataLoader(
    train_chapman_dataset,
    batch_size=128,
    shuffle=True
)
# i = 0
# for batch, label in train_loader:
#     print(i)
#     i+=1 

## Supervised Training

In [223]:
model = resnet18()
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

supervised = SupervisedLightningModule(model)

trainer = pl.Trainer(max_epochs=25, weights_summary=None)
train_loader = DataLoader(
    train_chapman_dataset,
    batch_size=128,
    shuffle=True
)
val_loader = DataLoader(
    test_chapman_dataset,
    batch_size=128,
)

trainer.fit(supervised, train_loader, val_loader)

GPU available: False, used: False
TPU available: None, using: 0 TPU cores
  y_tensor = torch.tensor(y, dtype=torch.long)
Epoch 0:  50%|█████     | 1/2 [00:07<00:07,  7.35s/it, loss=6.93, v_num=39]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/1 [00:00<?, ?it/s][A
Epoch 0: 100%|██████████| 2/2 [00:08<00:00,  4.11s/it, loss=6.93, v_num=39]
Epoch 1:  50%|█████     | 1/2 [00:07<00:07,  7.36s/it, loss=6.87, v_num=39]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/1 [00:00<?, ?it/s][A
Epoch 1: 100%|██████████| 2/2 [00:08<00:00,  4.13s/it, loss=6.87, v_num=39]
Epoch 2:  50%|█████     | 1/2 [00:07<00:07,  7.56s/it, loss=6.82, v_num=39]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/1 [00:00<?, ?it/s][A
Epoch 2: 100%|██████████| 2/2 [00:08<00:00,  4.19s/it, loss=6.82, v_num=39]
Epoch 3:  50%|█████     | 1/2 [00:07<00:07,  7.53s/it, loss=6.77, v_num=39]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/1 [00:00<?, ?it/s][A

1

In [225]:
def accuracy(pred: Tensor, labels: Tensor) -> float:
    return (pred.argmax(dim=-1) == labels).float().mean().item()


acc = sum([accuracy(model(x.float()), y.float()) for x, y in val_loader]) / len(val_loader)
print(f"Accuracy: {acc:.3f}")

Accuracy: 0.150


## Self-Supervised Training

In [193]:
from os import cpu_count

from torch.utils.data import DataLoader
from torchvision.models import resnet18

In [194]:
def accuracy(pred: Tensor, labels: Tensor) -> float:
    return (pred.argmax(dim=-1) == labels).float().mean().item()

In [None]:
!pip install pytorch_model_summary

In [166]:
model = resnet18(pretrained=False)
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
from pytorch_model_summary import summary
print(summary(model, torch.zeros((1, 1, 2500, 4)), show_input=False, show_hierarchical=False))
print(summary(model, torch.zeros((1, 1, 224, 224)), show_input=True, show_hierarchical=False))

-----------------------------------------------------------------------------
           Layer (type)         Output Shape         Param #     Tr. Param #
               Conv2d-1     [1, 64, 1250, 2]           3,136           3,136
          BatchNorm2d-2     [1, 64, 1250, 2]             128             128
                 ReLU-3     [1, 64, 1250, 2]               0               0
            MaxPool2d-4      [1, 64, 625, 1]               0               0
           BasicBlock-5      [1, 64, 625, 1]          73,984          73,984
           BasicBlock-6      [1, 64, 625, 1]          73,984          73,984
           BasicBlock-7     [1, 128, 313, 1]         230,144         230,144
           BasicBlock-8     [1, 128, 313, 1]         295,424         295,424
           BasicBlock-9     [1, 256, 157, 1]         919,040         919,040
          BasicBlock-10     [1, 256, 157, 1]       1,180,672       1,180,672
          BasicBlock-11      [1, 512, 79, 1]       3,673,088       3,673,08

In [226]:
model = resnet18()
model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

byol = BYOL(model, image_size=(2500, 4))
trainer = pl.Trainer(
    max_epochs=10,
    accumulate_grad_batches=2048 // 128,
    weights_summary=None,
)
train_loader = DataLoader(
    train_chapman_dataset,
    batch_size=128,
    shuffle=True
)
val_loader = DataLoader(
    test_chapman_dataset,
    batch_size=128,
)

trainer.fit(byol, train_loader, val_loader)

GPU available: False, used: False
TPU available: None, using: 0 TPU cores
Epoch 0:  50%|█████     | 1/2 [00:22<00:22, 22.74s/it, loss=3.96, v_num=40]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/1 [00:00<?, ?it/s][A
Epoch 0: 100%|██████████| 2/2 [00:26<00:00, 13.17s/it, loss=3.96, v_num=40]
Epoch 1:  50%|█████     | 1/2 [00:22<00:22, 22.72s/it, loss=3.47, v_num=40]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/1 [00:00<?, ?it/s][A
Epoch 1: 100%|██████████| 2/2 [00:26<00:00, 13.18s/it, loss=3.47, v_num=40]
Epoch 2:  50%|█████     | 1/2 [00:23<00:23, 23.01s/it, loss=3.16, v_num=40]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/1 [00:00<?, ?it/s][A
Epoch 2: 100%|██████████| 2/2 [00:26<00:00, 13.38s/it, loss=3.16, v_num=40]
Epoch 3:  50%|█████     | 1/2 [00:32<00:32, 32.20s/it, loss=2.94, v_num=40]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/1 [00:00<?, ?it/s][A
Epoch 3: 100%|██████████| 2/2 [00:36<00:00, 18

1

In [227]:
state_dict = model.state_dict()
new_model = resnet18()
new_model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

new_model.load_state_dict(state_dict)

supervised = SupervisedLightningModule(new_model)
trainer = pl.Trainer(
    max_epochs=10,
    accumulate_grad_batches=2048 // 128,
    weights_summary=None,
)
train_loader = DataLoader(
    train_chapman_dataset,
    batch_size=128,
    shuffle=True
)
val_loader = DataLoader(
    test_chapman_dataset,
    batch_size=128,
)
trainer.fit(supervised, train_loader, val_loader)

GPU available: False, used: False
TPU available: None, using: 0 TPU cores
  y_tensor = torch.tensor(y, dtype=torch.long)
Epoch 0:  50%|█████     | 1/2 [00:07<00:07,  7.59s/it, loss=6.88, v_num=41]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/1 [00:00<?, ?it/s][A
Epoch 0: 100%|██████████| 2/2 [00:08<00:00,  4.26s/it, loss=6.88, v_num=41]
Epoch 1:  50%|█████     | 1/2 [00:07<00:07,  7.84s/it, loss=6.83, v_num=41]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/1 [00:00<?, ?it/s][A
Epoch 1: 100%|██████████| 2/2 [00:08<00:00,  4.43s/it, loss=6.83, v_num=41]
Epoch 2:  50%|█████     | 1/2 [00:07<00:07,  7.79s/it, loss=6.78, v_num=41]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/1 [00:00<?, ?it/s][A
Epoch 2: 100%|██████████| 2/2 [00:08<00:00,  4.33s/it, loss=6.78, v_num=41]
Epoch 3:  50%|█████     | 1/2 [00:07<00:07,  7.40s/it, loss=6.74, v_num=41]
Validating: 0it [00:00, ?it/s][A
Validating:   0%|          | 0/1 [00:00<?, ?it/s][A

1

In [228]:
def accuracy(pred: Tensor, labels: Tensor) -> float:
    return (pred.argmax(dim=-1) == labels).float().mean().item()


acc = sum([accuracy(new_model(x.float()), y.float()) for x, y in val_loader]) / len(val_loader)
print(f"Accuracy: {acc:.3f}")

Accuracy: 0.250
