# Data processing

In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
from biosppy.signals import ecg
import matplotlib.pyplot as plt
import os
import pickle
from tqdm import tqdm
import math
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score

In [2]:
data = pd.read_csv('original_data/train.csv', index_col='id')
train_y = data['y']
labels = train_y.to_numpy()

In [3]:
# See if there is any difference between using the filtered heartbeats or not
with open('data/heartbeat_templates_ecg.pkl', 'rb') as f:
    heartbeats = pickle.load(f)
    
# with open('data/heartbeat_filtered_ecg.pkl', 'rb') as f:
#     heartbeats = pickle.load(f)

In [4]:
np.random.seed(32)

def split_indices(n, val_pct):
    n_val = int(val_pct*n)
    idxs = np.random.permutation(n)
    return idxs[n_val:], idxs[:n_val]

In [5]:
class SignalDataset(Dataset):
    def __init__(self, signals, lengths, labels):
        self.signals = signals
        self.labels = labels
        self.lengths = lengths

    def __len__(self):
        return len(self.signals)

    def __getitem__(self, idx):
        return self.signals[idx], self.lengths[idx], self.labels[idx]

In [6]:
## Be sure that all the heartbeats have the same length

# Longest seq of beats
max_length_heartbeats = max([len(i) for i in heartbeats])
beat_length = 180 # All heartbeats have the same length

# Padded heartbeats
padded_heartbeats = []
lengths = []

# Normalize before padding
for i,heartbeat in enumerate(heartbeats):
    heartbeats[i] = (heartbeat - heartbeat.mean(axis=-1, keepdims=True)) / heartbeat.std(axis=-1, keepdims=True)
    
for heartbeat in heartbeats:
    length = len(heartbeat)
    lengths.append(length)
    pad = np.zeros((max_length_heartbeats-length, 180))
    padded_heartbeats.append(np.concatenate((heartbeat, pad),axis=0))

In [7]:
# Transform heartbeats to tensor
padded_heartbeats = np.array(padded_heartbeats)
padded_heartbeats = torch.tensor(padded_heartbeats, dtype=torch.float32)
labels_heartbeats = torch.tensor(labels)
lengths_heartbeats = torch.tensor(lengths)

# Normalize data per beat
# padded_heartbeats = (padded_heartbeats - padded_heartbeats.mean(axis=-1, keepdims=True)) / padded_heartbeats.std(axis=-1, keepdims=True)

# Train val split
train_idxs, val_idxs = split_indices(len(padded_heartbeats), 0.2)
train_x = padded_heartbeats[train_idxs]
val_y = labels_heartbeats[val_idxs]
train_y = labels_heartbeats[train_idxs]
val_x = padded_heartbeats[val_idxs]
train_lengths = lengths_heartbeats[train_idxs]
val_lengths = lengths_heartbeats[val_idxs]

In [8]:
# Let's make a dataset for all the individual beats - 1D conv

labels_beats = []
beats = []

for i, beat in enumerate(heartbeats):
    samples = beat.shape[0]
    beats.append(torch.tensor(beat, dtype=torch.float32))
    labels_beats.append(torch.full((samples,), labels[i]))
    
beats = torch.cat(beats, dim=0)
labels_beats = torch.cat(labels_beats)

In [9]:
# Train - val split for individual beats

train_idxs_beats, val_idxs_beats = split_indices(len(beats), 0.2)
train_x_beats = beats[train_idxs_beats]
val_y_beats = labels_beats[val_idxs_beats]
train_y_beats = labels_beats[train_idxs_beats]
val_x_beats = beats[val_idxs_beats]

In [10]:
# For the moment training without a scaler but try also with scaler later.
# scaler = StandardScaler()
# train_x = scaler.fit_transform(train_x)
# val_x = scaler.transform(val_x)

### Create dataset

In [11]:
### Signal dataset
class SignalDataset(Dataset):
    def __init__(self, signals, lengths, labels):
        self.signals = signals
        self.labels = labels
        self.lengths = lengths

    def __len__(self):
        return len(self.signals)

    def __getitem__(self, idx):
        return self.signals[idx], self.lengths[idx], self.labels[idx]

In [12]:
train_dataset = SignalDataset(train_x, train_lengths, train_y)
val_dataset = SignalDataset(val_x, val_lengths, val_y)

In [13]:
BATCH_SIZE = 64

def collate_fn(batch):
    signals, lengths, labels = zip(*batch)
    signals = torch.stack(signals)
    lengths = torch.stack(lengths)
    labels = torch.stack(labels)
    signals = signals.to(device)
    lengths = lengths.to(device)
    attention_mask = torch.arange(signals.size(1), device=device).unsqueeze(0) >= lengths.unsqueeze(1)
    return signals, attention_mask, labels

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, collate_fn=collate_fn)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, collate_fn=collate_fn)

# Transformer for signal beats

In [14]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model:int, seq_len:int, dropout:float):
        super(PositionalEncoding, self).__init__()
        self.d_model = d_model
        self.seq_len = seq_len
        self.dropout = nn.Dropout(dropout)
        
        pe = torch.zeros(seq_len, d_model) #(seq_len, d_model)
        
        position = torch.arange(0, seq_len, dtype=torch.float).unsqueeze(1) #(seq_len, 1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float()*(-math.log(10000.0)/d_model))
        pe[:,0::2] = torch.sin(position*div_term)
        pe[:,1::2] = torch.cos(position*div_term)
        
        pe = pe.unsqueeze(0) #(1, seq_len, d_model)
        
        self.register_buffer('pe', pe)
        
    def forward(self, x):
        x = x + (self.pe[:, :x.shape[1], :]).requires_grad_(False)
        return self.dropout(x)

In [15]:
class LayerNorm(nn.Module):
    def __init__(self, eps:float = 10**-6):
        super(LayerNorm, self).__init__()
        self.eps = eps
        self.alpha = nn.Parameter(torch.ones(1))
        self.bias = nn.Parameter(torch.zeros(1))
        
    def forward(self,x):
        mean = x.mean(dim=-1, keepdim=True) # We use the last dimension
        std = x.std(dim=-1, keepdim=True)
        return self.alpha*(x-mean)/(std+self.eps)+self.bias

In [16]:
class FeedFwd(nn.Module):
    def __init__(self, d_model:int, d_ff:int, dropout:float):
        super(FeedFwd, self).__init__()
        self.linear1 = nn.Linear(d_model, d_ff)
        self.dropout = nn.Dropout(dropout)
        self.linear2 = nn.Linear(d_ff, d_model)
        self.relu = nn.ReLU()
        
    def forward(self, x):
        # (b_size, seq_len, d_model) -> (b_size, seq_len, dff) -> (b_size, seq_len, d_model)
        x = self.relu(self.linear1(x))
        x = self.dropout(self.linear2(x))
        return x
        #return self.linear2(self.dropout(self.linear1(x)))

In [17]:
class MultiheadAttention(nn.Module):
    def __init__(self, d_model:int, h:int, dropout:float): # h -> num heads
        super(MultiheadAttention, self).__init__()
        self.d_model = d_model
        self.h = h
        assert d_model % h == 0, "d_model is not divisible by h"
        
        self.d_k = d_model // h # dim of each head
        
        self.w_q = nn.Linear(d_model, d_model)
        self.w_k = nn.Linear(d_model, d_model)
        self.w_v = nn.Linear(d_model, d_model)
        
        self.w_o = nn.Linear(d_model, d_model)
        self.dropout = nn.Dropout(dropout)
        
    @staticmethod
    def attention(query, key, value, mask, dropout):
        d_k = query.shape[-1]
        attention_scores = (query @ key.transpose(-2, -1)) / math.sqrt(d_k)
        if mask != None:
            mask = mask.unsqueeze(1).unsqueeze(2) # (b_size, 1, 1, seq_len) -> make this to match the attention_Score size
            #attention_scores.masked_fill_(mask == 0, -1e9)
            attention_scores.masked_fill_(mask, -1e9)
        attention_scores = attention_scores.softmax(dim=-1) # (b_size, h, seq_len, seq_len)
        if dropout != None:
            attention_scores = dropout(attention_scores)
            
        return (attention_scores@value), attention_scores
        
    def forward(self, q, k, v, mask):
        query = self.w_q(q) # (b_size, seq_len, d_model) -> (b_size, seq_len, d_model)
        key = self.w_k(k)
        val = self.w_v(v)
        
        query = query.view(query.shape[0], query.shape[1], self.h, self.d_k).transpose(1,2) # Divide to heads
        key = key.view(key.shape[0], key.shape[1], self.h, self.d_k).transpose(1,2)
        val = val.view(val.shape[0], val.shape[1], self.h, self.d_k).transpose(1,2)
        # Outpus size at this point (b_size, h, seq_len, d_k)
        
        # Apply mask
        x, self.attention_scores = MultiheadAttention.attention(query, key, val, mask, self.dropout)
        
        x = x.transpose(1,2).contiguous().view(x.shape[0], -1, self.h*self.d_k) # Go back to (b_size, seq_len, h, d_k) and then (b_size, seq_len, d_model)
        
        return self.w_o(x) # (b_size, seq_len, d_model)
    

In [18]:
class ResidualConnection(nn.Module):
    def __init__(self, dropout:float):
        super(ResidualConnection, self).__init__()
        self.dropout = nn.Dropout(dropout)
        self.norm = LayerNorm()
        
    def forward(self, x, sublayer):
        return x + self.dropout(sublayer(self.norm(x)))

In [19]:
class EncoderBlock(nn.Module):
    def __init__(self, self_attention, feed_forward, dropout):
        super(EncoderBlock, self).__init__()
        self.self_attention = self_attention
        self.feed_forward = feed_forward
        self.residual_connections = nn.ModuleList([ResidualConnection(dropout) for _ in range(2)])

    
    def forward(self, x, src_mask):
        x = self.residual_connections[0](x, lambda x: self.self_attention(x, x, x, src_mask))
        x = self.residual_connections[1](x, self.feed_forward)
        return x
        
        

In [20]:
class Encoder(nn.Module):
    def __init__(self, layers):
        super(Encoder, self).__init__()
        self.layers = layers
        self.norm = LayerNorm()
        
    def forward(self, x, mask):
        for layer in self.layers:
            x = layer(x, mask)
        return self.norm(x)

In [21]:
class Transformer(nn.Module):
    def __init__(self, encoder, pos_enc, src_size, d_model, output_dim):
        super(Transformer, self).__init__()
        self.encoder = encoder
        self.pos_enc = pos_enc
        self.input_proj = nn.Linear(src_size, d_model)
        self.out_proj = nn.Linear(d_model, output_dim)
        
    def encode(self, src, src_mask):
        src = self.input_proj(src)
        src = self.pos_enc(src)
        encoder_output = self.encoder(src, src_mask)
        # MLP
        pre_out = encoder_output.mean(dim=1) #Avg pooling over seq length -> really helpful?
        logits = self.out_proj(pre_out)
        return logits
        

In [22]:
def build_transformer(src_size, src_seq_len, d_model=512, N=6, h=8, dropout=0.1, d_ff=2048, output_dim=4):
    # Positional encoding
    pe = PositionalEncoding(d_model, src_seq_len, dropout)
    # Encoder
    encoder_blocks = []
    for _ in range(N):
        encoder_self_attention = MultiheadAttention(d_model, h, dropout)
        feed_fwd = FeedFwd(d_model, d_ff, dropout)
        encoder_block = EncoderBlock(encoder_self_attention, feed_fwd, dropout)
        encoder_blocks.append(encoder_block)
        
    encoder = Encoder(nn.ModuleList(encoder_blocks))
    
    transformer = Transformer(encoder, pe, src_size, d_model, output_dim)
    
    for p in transformer.parameters():
        if p.dim()>1:
            nn.init.xavier_uniform_(p)
            
    return transformer

In [23]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

d_model = 512
nhead = 8
num_encoder_layers=3
num_decoder_layers = 0
dim_feedforward = 2048
dropout = 0.1
output_dim = 4
LR = 1e-4
EPOCHS = 50

# src_seq_len=max_length_heartbeats
model = build_transformer(src_size=180, src_seq_len=max_length_heartbeats, d_model=d_model, N=num_encoder_layers, h=nhead, dropout=dropout, d_ff=dim_feedforward, output_dim=4)
model = model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=1e-5)
loss_fn = nn.CrossEntropyLoss()

train_losses = []
val_losses = []

for epoch in tqdm(range(EPOCHS)):
    model.train()
    train_loss=0
    all_preds = []
    all_labels = []
    for batch in train_dataloader:
        inputs, attention_mask, targets = batch
        inputs = inputs.to(device)
        attention_mask = attention_mask.to(device)
        targets = targets.to(device)
        optimizer.zero_grad()
        # Debug
#         inputs = torch.ones((1, 2, 180), device=device)
#         mask = torch.tensor([[False, True]], device=device)
        outputs = model.encode(inputs, attention_mask)
        pred = torch.argmax(outputs, dim=1)
        loss = loss_fn(outputs, targets)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
        
        all_labels.extend(targets.detach().cpu().tolist())
        all_preds.extend(pred.detach().cpu().tolist())
        
    val_loss = 0
    all_preds_val = []
    all_labels_val = []
    with torch.no_grad():
        model.eval()
        for batch in val_dataloader:
            inputs, attention_mask, targets = batch
            inputs = inputs.to(device)
            attention_mask = attention_mask.to(device)
            targets = targets.to(device)
            outputs = model.encode(inputs, attention_mask)
            pred = torch.argmax(outputs, dim=1)
            loss = loss_fn(outputs, targets)
            val_loss += loss.item()
            
            all_labels_val.extend(targets.detach().cpu().tolist())
            all_preds_val.extend(pred.detach().cpu().tolist())
            
    avg_train_loss = train_loss/len(train_dataloader)
    avg_val_loss = val_loss/len(val_dataloader)
    train_acc = accuracy_score(all_labels, all_preds)
    val_acc = accuracy_score(all_labels_val, all_preds_val)
    f1 = f1_score(all_labels_val, all_preds_val, average='micro')
    print(f"Epoch [{epoch+1}/{EPOCHS}] | Train Loss: {avg_train_loss:.4f} | Validation Loss: {avg_val_loss:.4f} | Train acc: {train_acc:.4f} | Val acc: {val_acc:.4f} | f1 score: {f1:.4f}")
    train_losses.append(avg_train_loss)
    val_losses.append(avg_val_loss)



  2%|█▋                                                                                 | 1/50 [00:29<23:54, 29.27s/it]

Epoch [1/100] | Train Loss: 1.0380 | Validation Loss: 1.0216 | Train acc: 0.6099 | Val acc: 0.6188 | f1 score: 0.6188


  4%|███▎                                                                               | 2/50 [00:58<23:29, 29.36s/it]

Epoch [2/100] | Train Loss: 0.8100 | Validation Loss: 0.9986 | Train acc: 0.6490 | Val acc: 0.6344 | f1 score: 0.6344


  6%|████▉                                                                              | 3/50 [01:28<23:06, 29.50s/it]

Epoch [3/100] | Train Loss: 0.7629 | Validation Loss: 0.9621 | Train acc: 0.6727 | Val acc: 0.6520 | f1 score: 0.6520


  8%|██████▋                                                                            | 4/50 [01:58<22:42, 29.62s/it]

Epoch [4/100] | Train Loss: 0.7241 | Validation Loss: 0.8947 | Train acc: 0.6888 | Val acc: 0.6452 | f1 score: 0.6452


 10%|████████▎                                                                          | 5/50 [02:28<22:19, 29.76s/it]

Epoch [5/100] | Train Loss: 0.6924 | Validation Loss: 0.8382 | Train acc: 0.7040 | Val acc: 0.6540 | f1 score: 0.6540


 12%|█████████▉                                                                         | 6/50 [02:58<21:54, 29.88s/it]

Epoch [6/100] | Train Loss: 0.6466 | Validation Loss: 0.8481 | Train acc: 0.7245 | Val acc: 0.6530 | f1 score: 0.6530


 14%|███████████▌                                                                       | 7/50 [03:28<21:29, 29.98s/it]

Epoch [7/100] | Train Loss: 0.6179 | Validation Loss: 0.8972 | Train acc: 0.7389 | Val acc: 0.6403 | f1 score: 0.6403


 16%|█████████████▎                                                                     | 8/50 [03:58<21:03, 30.07s/it]

Epoch [8/100] | Train Loss: 0.5798 | Validation Loss: 0.8981 | Train acc: 0.7553 | Val acc: 0.6618 | f1 score: 0.6618


 18%|██████████████▉                                                                    | 9/50 [04:29<20:35, 30.14s/it]

Epoch [9/100] | Train Loss: 0.5571 | Validation Loss: 0.9571 | Train acc: 0.7594 | Val acc: 0.6716 | f1 score: 0.6716


 20%|████████████████▍                                                                 | 10/50 [04:59<20:07, 30.19s/it]

Epoch [10/100] | Train Loss: 0.5184 | Validation Loss: 1.1147 | Train acc: 0.7765 | Val acc: 0.6745 | f1 score: 0.6745


 22%|██████████████████                                                                | 11/50 [05:29<19:39, 30.24s/it]

Epoch [11/100] | Train Loss: 0.4720 | Validation Loss: 1.0297 | Train acc: 0.8036 | Val acc: 0.6979 | f1 score: 0.6979


 24%|███████████████████▋                                                              | 12/50 [06:00<19:10, 30.29s/it]

Epoch [12/100] | Train Loss: 0.4453 | Validation Loss: 1.0469 | Train acc: 0.8163 | Val acc: 0.6999 | f1 score: 0.6999


 26%|█████████████████████▎                                                            | 13/50 [06:30<18:42, 30.33s/it]

Epoch [13/100] | Train Loss: 0.4436 | Validation Loss: 1.1478 | Train acc: 0.8263 | Val acc: 0.6940 | f1 score: 0.6940


 28%|██████████████████████▉                                                           | 14/50 [07:01<18:13, 30.38s/it]

Epoch [14/100] | Train Loss: 0.4100 | Validation Loss: 1.3008 | Train acc: 0.8361 | Val acc: 0.6764 | f1 score: 0.6764


 30%|████████████████████████▌                                                         | 15/50 [07:31<17:44, 30.43s/it]

Epoch [15/100] | Train Loss: 0.3646 | Validation Loss: 1.3814 | Train acc: 0.8508 | Val acc: 0.6921 | f1 score: 0.6921


 32%|██████████████████████████▏                                                       | 16/50 [08:02<17:15, 30.45s/it]

Epoch [16/100] | Train Loss: 0.3391 | Validation Loss: 1.3641 | Train acc: 0.8608 | Val acc: 0.7048 | f1 score: 0.7048


 34%|███████████████████████████▉                                                      | 17/50 [08:32<16:46, 30.49s/it]

Epoch [17/100] | Train Loss: 0.3041 | Validation Loss: 1.3872 | Train acc: 0.8779 | Val acc: 0.7087 | f1 score: 0.7087


 36%|█████████████████████████████▌                                                    | 18/50 [09:03<16:17, 30.53s/it]

Epoch [18/100] | Train Loss: 0.2963 | Validation Loss: 1.4476 | Train acc: 0.8803 | Val acc: 0.6931 | f1 score: 0.6931


 38%|███████████████████████████████▏                                                  | 19/50 [09:33<15:47, 30.56s/it]

Epoch [19/100] | Train Loss: 0.2878 | Validation Loss: 1.4250 | Train acc: 0.8876 | Val acc: 0.6784 | f1 score: 0.6784


 40%|████████████████████████████████▊                                                 | 20/50 [10:04<15:17, 30.58s/it]

Epoch [20/100] | Train Loss: 0.2679 | Validation Loss: 1.4329 | Train acc: 0.8962 | Val acc: 0.6745 | f1 score: 0.6745


 42%|██████████████████████████████████▍                                               | 21/50 [10:35<14:47, 30.60s/it]

Epoch [21/100] | Train Loss: 0.2456 | Validation Loss: 1.5638 | Train acc: 0.9011 | Val acc: 0.7077 | f1 score: 0.7077


 44%|████████████████████████████████████                                              | 22/50 [11:05<14:17, 30.62s/it]

Epoch [22/100] | Train Loss: 0.2121 | Validation Loss: 1.7042 | Train acc: 0.9130 | Val acc: 0.7087 | f1 score: 0.7087


 46%|█████████████████████████████████████▋                                            | 23/50 [11:36<13:47, 30.64s/it]

Epoch [23/100] | Train Loss: 0.1873 | Validation Loss: 1.6640 | Train acc: 0.9304 | Val acc: 0.7224 | f1 score: 0.7224


 48%|███████████████████████████████████████▎                                          | 24/50 [12:07<13:16, 30.65s/it]

Epoch [24/100] | Train Loss: 0.1841 | Validation Loss: 1.7272 | Train acc: 0.9253 | Val acc: 0.7058 | f1 score: 0.7058


 50%|█████████████████████████████████████████                                         | 25/50 [12:37<12:46, 30.67s/it]

Epoch [25/100] | Train Loss: 0.1760 | Validation Loss: 1.7837 | Train acc: 0.9287 | Val acc: 0.7097 | f1 score: 0.7097


 52%|██████████████████████████████████████████▋                                       | 26/50 [13:08<12:16, 30.67s/it]

Epoch [26/100] | Train Loss: 0.1633 | Validation Loss: 1.8265 | Train acc: 0.9372 | Val acc: 0.7009 | f1 score: 0.7009


 54%|████████████████████████████████████████████▎                                     | 27/50 [13:39<11:45, 30.69s/it]

Epoch [27/100] | Train Loss: 0.1774 | Validation Loss: 1.7493 | Train acc: 0.9279 | Val acc: 0.6970 | f1 score: 0.6970


 56%|█████████████████████████████████████████████▉                                    | 28/50 [14:10<11:15, 30.70s/it]

Epoch [28/100] | Train Loss: 0.1723 | Validation Loss: 1.7489 | Train acc: 0.9326 | Val acc: 0.6911 | f1 score: 0.6911


 58%|███████████████████████████████████████████████▌                                  | 29/50 [14:40<10:44, 30.70s/it]

Epoch [29/100] | Train Loss: 0.1411 | Validation Loss: 1.9826 | Train acc: 0.9433 | Val acc: 0.6794 | f1 score: 0.6794


 58%|███████████████████████████████████████████████▌                                  | 29/50 [15:00<10:52, 31.07s/it]


KeyboardInterrupt: 

In [None]:
print(outputs)