In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torch.autograd import Variable
import tensorflow as tf
from sklearn.model_selection import train_test_split

# Load the data

In [None]:
class HeartDeseaseDataset(Dataset): 
    
    def __init__(self, path, any_disease=False):
        
        self.data = np.loadtxt(path, delimiter=",", dtype=np.float32, skiprows=1)
        self.x = torch.from_numpy(self.data[:, 2:24])
        if any_disease:
            self.y = torch.from_numpy(np.amax(self.data[:, 24], axis=1))
        else:
            self.y = torch.from_numpy(self.data[:, 24])
        

        self.len = len(self.data)

    def __len__(self):
        return self.len
    
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]#, self.original[idx]

In [None]:
def create_dataloaders(dataset, batch_size, SEED):
    lengths = [round(len(dataset) * split) for split in [TRAIN_SPLIT, VALIDATION_SPLIT, TEST_SPLIT]]
    
    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, lengths=lengths, generator=torch.Generator().manual_seed(SEED))
    
    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=4,
        prefetch_factor=2,
        persistent_workers=False,
        pin_memory=True
    )

    val_dataloader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=4,
        prefetch_factor=2,
        persistent_workers=False,
        pin_memory=True
    )

    test_dataloader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=1,
        shuffle=True,
        num_workers=4,
        prefetch_factor=2,
        persistent_workers=False,
        pin_memory=True
    )
    
    print(f'Total dataset: {len(train_dataloader) + len(val_dataloader) + len(test_dataloader)}, '
            f'train dataset: {len(train_dataloader)}, val dataset: {len(val_dataloader)}, test_dataset: {len(test_dataloader)}')
    return train_dataloader, val_dataloader, test_dataloader


In [None]:
data_path = "clean_data.csv"
SEED = 42

TEST_SPLIT = 0.2
VALIDATION_SPLIT = 0.21
TRAIN_SPLIT = 1 - TEST_SPLIT - VALIDATION_SPLIT

batch_size = 1

dataset = HeartDeseaseDataset(data_path, any_disease=False)
print(dataset.x.shape)
train_dataloader, val_dataloader, test_dataloader = create_dataloaders(dataset, batch_size, SEED)

print(len(test_dataloader))

torch.Size([11627, 22])
Total dataset: 11627, train dataset: 6860, val dataset: 2442, test_dataset: 2325
2325


  cpuset_checked))


# Define model and trainer

In [None]:
def accuracy_multi_prediction(pred, label):
    res = 0
    nb_prediction = pred.shape[1]

    for i in range(nb_prediction):
        if pred[0][i].item() == label[0][i].item():
            res += 1
    return res / nb_prediction

In [None]:

def accuracy(pred, label):
    if round(pred[0].item()) == label[0].item():
            return 1
    return 0

In [None]:
class LSTM(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
        super(LSTM, self).__init__()
        self.num_classes = num_classes #number of classes
        self.num_layers = num_layers #number of layers
        self.input_size = input_size #input size
        self.hidden_size = hidden_size #hidden state
        self.seq_length = seq_length 

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True) #lstm
        self.fc = nn.Linear(hidden_size, num_classes) 

    
    def forward(self,x):
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #hidden state
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #internal state
        # Propagate input through LSTM
        output, (hn, cn) = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
        out = self.fc(out) #Final Output
        return out
 

In [None]:
class Binary_AF:
    def __init__(self, x):
        self.x = x

    def forward(self):
        self.x[self.x <= 0] = 0
        self.x[self.x > 0] = 1
        return self.x

    def backward(self):
        return self.x

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim, dropout_prob):
        super(LSTMModel, self).__init__()

        # Defining the number of layers and the nodes in each layer
        self.hidden_dim = hidden_dim
        self.layer_dim = layer_dim

        # LSTM layers
        self.lstm = nn.LSTM(
            input_dim, hidden_dim, layer_dim, batch_first=True, dropout=dropout_prob
        )

        # Fully connected layer
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.activation = nn.Sigmoid()

    def forward(self, x):
        # Initializing hidden state for first input with zeros
        h0 = torch.zeros(self.layer_dim, x.shape[0], self.hidden_dim).requires_grad_()

        # Initializing cell state for first input with zeros
        c0 = torch.zeros(self.layer_dim, x.shape[0], self.hidden_dim).requires_grad_()

        # We need to detach as we are doing truncated backpropagation through time (BPTT)
        # If we don't, we'll backprop all the way to the start even after going through another batch
        # Forward propagation by passing in the input, hidden state, and cell state into the model
        out, (hn, cn) = self.lstm(x.unsqueeze(0), (h0.detach(), c0.detach()))

        # Reshaping the outputs in the shape of (batch_size, seq_length, hidden_size)
        # so that it can fit into the fully connected layer
        out = out[:, -1, :]

        # Convert the final state to our desired output shape (batch_size, output_dim)
        out = self.fc(out)
        out = self.activation(out)
        
        return out


In [None]:
class Trainer:
    def __init__(self, model):
        self.model = model
        self.criterion = torch.nn.MSELoss(reduction="mean")
        self.optimizer = torch.optim.SGD(model.parameters(), lr=0.05, momentum=0.3)
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, mode='max', factor=0.4, patience=2, cooldown=2)
        self.max_val_acc = float('-inf')

    def fit(self, epochs, train_dataloader, val_dataloader):
        for curr_epoch in range(epochs):
            self.model.train()
            train_loss = val_loss = 0.0
            pbar = tf.keras.utils.Progbar(target=len(train_dataloader))
            print(f'Epoch {curr_epoch} / {epochs}')
            for i, (data, labels) in enumerate(train_dataloader):

                #forward
                out = model(data)
                loss = self.criterion(out, labels)
                train_loss += loss
                
                #backward
                self.optimizer.zero_grad()
                loss.backward()
                
                pbar.update(i + 1, values=
                            [
                                ("loss", train_loss.item()/(i + 1)),
                                ("lr", self.scheduler.optimizer.param_groups[0]['lr'])
                            ])

                # gradient descent
                self.optimizer.step()
            
            print('Validation')
            
            self.model.eval()
            pbar = tf.keras.utils.Progbar(target=len(val_dataloader))
            
            val_acc = 0
            with torch.no_grad():
                for i, batch in enumerate(val_dataloader):
                    acc = 0
                    inputs, labels = batch
                    outputs = self.model(inputs)
                    val_loss += loss
                    acc = accuracy(outputs, labels)
                    pbar.update(i + 1, values=
                            [
                                ("loss", val_loss.item()/(i + 1)),
                                ("lr", self.scheduler.optimizer.param_groups[0]['lr']),
                                ("acc", acc)
                            ])
                    val_acc += acc

            val_loss = val_loss / len(val_dataloader)
            total_acc = val_acc / len(val_dataloader)
            lr = self.scheduler.optimizer.param_groups[0]['lr']
            self.scheduler.step(val_loss)
            
            if total_acc > self.max_val_acc:
                print(f'Model saved. Loss updated: {self.max_val_acc:.3f} -> {total_acc:.3f}')
                self.max_val_acc = total_acc
                torch.save(self.model.state_dict(), f'lstm_{total_acc}.pt')
                

    def evaluate(self, test_dataloader, accuracy_function):
        correct = total_loss = total = 0.0
        #iterator = 0
        
        with torch.no_grad():       
            # Iterate through test dataset
            for i, (inputs, labels) in enumerate(test_dataloader):

                pred = self.model(inputs)

                loss = self.criterion(labels, pred)
                total_loss += loss
                    
                # Total correct predictions
                correct += accuracy_function(pred, labels)
                #iterator += 1

            total_accuracy = 100 * correct / len(test_dataloader)

            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}. total loss: {}.'.format(len(test_dataloader), loss.item(), total_accuracy, total_loss))
    

# Train the model

In [None]:
num_classes = 1
input_size = 22
hidden_size = 2
num_layers = 1
seq_length = 22

#model = LSTM(num_classes, input_size, hidden_size, num_layers, seq_length)

input_dim = 22
output_dim = 1
hidden_dim = 64
layer_dim = 3
batch_size = 64
dropout = 0.2
learning_rate = 5e-3

model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim, dropout)
trainer = Trainer(model)

In [None]:
trainer.fit(
    5,
    train_dataloader,
    val_dataloader
)

Epoch 0 / 5


  cpuset_checked))


  33/6860 [..............................] - ETA: 33s - loss: 0.2432 - lr: 0.0500

  return F.mse_loss(input, target, reduction=self.reduction)


Validation
Model saved. Loss updated: -inf -> 0.716
Epoch 1 / 5
Validation
Epoch 2 / 5
Validation
Epoch 3 / 5
Validation
Epoch 4 / 5
Validation


In [None]:
trainer.evaluate(test_dataloader, accuracy)

  cpuset_checked))
  return F.mse_loss(input, target, reduction=self.reduction)


Iteration: 2325. Loss: 0.10923855006694794. Accuracy: 71.18279569892474. total loss: 486.2972106933594.
