In [1]:
import pandas as pd
import numpy as np

In [2]:
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader

In [3]:
from sklearn.model_selection import train_test_split

# Data location

In [4]:
data_path = "data/clean_data.csv"

# Set seed

In [5]:
SEED = 42

# Load data

In [6]:
df = pd.read_csv(data_path)

In [7]:
df.shape

(11627, 40)

# DataSet

In [8]:
class HeatDeseaseDataset(Dataset): 
    
    def __init__(self, path):
        
        self.data = np.loadtxt(path, delimiter=",", dtype=np.float32, skiprows=1)
       
        self.x = torch.from_numpy(self.data[:, 1:18])
        self.y = torch.from_numpy(self.data[:, 22])
        
        self.len = len(self.data)

    def __len__(self):
        return self.len
    
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

# DataLoader

In [9]:
def create_dataloaders(dataset, batch_size):
    lengths = [round(len(dataset) * split) for split in [TRAIN_SPLIT, VALIDATION_SPLIT, TEST_SPLIT]]
    
    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(dataset, lengths=lengths, generator=torch.Generator().manual_seed(SEED))
    
    train_dataloader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=4,
        prefetch_factor=2,
        persistent_workers=False,
        pin_memory=True
    )

    val_dataloader = torch.utils.data.DataLoader(
        val_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=4,
        prefetch_factor=2,
        persistent_workers=False,
        pin_memory=True
    )

    test_dataloader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=1,
        shuffle=False,
        num_workers=4,
        prefetch_factor=2,
        persistent_workers=False,
        pin_memory=True
    )
    
    print(f'Total dataset: {len(train_dataloader) + len(val_dataloader) + len(test_dataloader)}, '
            f'train dataset: {len(train_dataloader)}, val dataset: {len(val_dataloader)}, test_dataset: {len(test_dataloader)}')
    return train_dataloader, val_dataloader, test_dataloader

# Trainer

In [10]:
def accuracy(prediction, label):
     
                iterator += 1

                if iterator % 500 == 0:
                    # Calculate Accuracy         
                    correct = 0
                    total = 0
                    # Iterate through test dataset
                    for data, labels in testloader:
                        # Load images to a Torch Variable
                        data = data

                        # Forward pass only to get logits/output
                        outputs = model(data)

                        # Get predictions from the maximum value
                        _, predicted = torch.max(outputs.data, 1)

                        # Total number of labels
                        total += labels.size(0)

                        # Total correct predictions
                        correct += (predicted == labels).sum()

                    accuracy = 100 * correct / total

In [19]:
class Trainer:
    def __init__(self, model, lr=3e-3, loss=torch.nn.BCELoss(), optimizer=torch.optim.SGD):
        self.model = model
        self.criterion = loss
        self.optimizer = optimizer(model.parameters(), lr=lr)
        
        self.history = {'lr': [], 'loss': [], 'val_loss': []}
        self.max_val_dice = float('-inf')
        
    def fit(self, train_dataloader, val_dataloader, nb_epochs):
        for epoch in range(nb_epochs):
            print(f'Epoch {epoch}')
            train_loss = val_loss = train_dice = val_dice = 0.0
            
            self.model.train()
            
            for i, batch in enumerate(train_dataloader):
                inputs, labels = batch

                # Clear gradients w.r.t. parameters
                self.optimizer.zero_grad()
                
                # Forward pass to get output/logits
                outputs = self.model(inputs)

                # Calculate Loss: softmax --> cross entropy loss
                loss = self.criterion(outputs, labels.unsqueeze(0))
                train_loss += loss

                # Getting gradients w.r.t. parameters
                loss.backward()

                # Updating parameters
                self.optimizer.step()

                print('Validation')
                
                self.model.eval()
                  
            with torch.no_grad():
                for i, batch in enumerate(val_dataloader):
                    inputs, labels = batch

                    outputs = self.model(inputs)

                    val_loss += loss

            train_loss = train_loss / len(train_dataloader)
            val_loss = val_loss / len(val_dataloader)
            lr = self.scheduler.optimizer.parm_group[0]['lr']
            
    def evaluate(self, test_dataloader):
        correct = total_loss = total = 0.0
        with torch.no_grad():       
            # Iterate through test dataset
            for i, (inputs, labels) in enumerate(test_dataloader):

                # Forward pass only to get logits/output
                pred = self.model(inputs)
                loss = self.criterion(pred, labels)
                total_loss += loss

                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)

                # Total number of labels
                total += labels.size(0)

                # Total correct predictions
                correct += (predicted == labels).sum()

            accuracy = 100 * correct / total

            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}. total loss'.format(iterator, loss.item(), accuracy, total_loss))

# The logistic regession function

In [20]:
class LogisticRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        out = self.linear(x)
        return torch.sigmoid(out)

# Prepare the data

In [21]:
TEST_SPLIT = 0.2
VALIDATION_SPLIT = 0.21
TRAIN_SPLIT = 1 - TEST_SPLIT - VALIDATION_SPLIT

In [22]:
batch_size = 1

dataset = HeatDeseaseDataset(data_path)
train_dataloader, val_dataloader, test_dataloader = create_dataloaders(dataset, batch_size)

Total dataset: 11627, train dataset: 6860, val dataset: 2442, test_dataset: 2325


# Train

In [23]:
model = LogisticRegression(17, 1)

In [24]:
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
criterion = torch.nn.BCELoss()

In [25]:
trainer = Trainer(model)

In [26]:
trainer.fit(
    train_dataloader,
    val_dataloader,
    nb_epochs=10
) 

Epoch 0
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Va

Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation

Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation

Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation

Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation

Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation

Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation

Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation

Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation
Validation

KeyboardInterrupt: 

batch_size = 4

dataset = HeatDeseaseDataset()
dataloader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=True)

testset = HeartDeseaseTestSet()
testloader = torch.utils.data.DataLoader(dataset=testset, 
                                          batch_size=batch_size, 
                                          shuffle=False)


#dataiter = iter(dataloader)
#data = dataiter.next()
len(dataset)

iterator = 0

for epoch in range(10):
    print("g pa planté " + str(epoch))
    for i, (data, labels) in enumerate(dataloader):
        print("g pa planté " + str(i))
        data = data
        labels = labels

        # Clear gradients w.r.t. parameters
        optimizer.zero_grad()

        # Forward pass to get output/logits
        outputs = model(data)

        # Calculate Loss: softmax --> cross entropy loss
        loss = criterion(outputs, labels)

        # Getting gradients w.r.t. parameters
        loss.backward()

        # Updating parameters
        optimizer.step()

        iterator += 1

        if iterator % 500 == 0:
            # Calculate Accuracy         
            correct = 0
            total = 0
            # Iterate through test dataset
            for data, labels in testloader:
                # Load images to a Torch Variable
                data = data

                # Forward pass only to get logits/output
                outputs = model(data)

                # Get predictions from the maximum value
                _, predicted = torch.max(outputs.data, 1)

                # Total number of labels
                total += labels.size(0)

                # Total correct predictions
                correct += (predicted == labels).sum()

            accuracy = 100 * correct / total

            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iterator, loss.item(), accuracy))