In [1]:
import pandas as pd
import sklearn
import torch

In [2]:
df = pd.read_csv('pmi-data.csv')

In [3]:
%%html
<h1>1. Datasets and Dataloaders</h1>

In [4]:
train_data, valid_data = sklearn.model_selection.train_test_split(df, test_size=0.25, shuffle=True)

In [5]:
class StrokeDataset(torch.utils.data.Dataset):
    def __init__(self, data):
        self.labels = data['stroke']
        self.samples = data.drop(columns=['stroke', 'id', 'split'])

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.samples.iloc[idx], self.labels.iloc[idx]

In [6]:
train_dataloader = StrokeDataset(train_data)
valid_dataloader = StrokeDataset(valid_data)

In [7]:
%%html
<h3>I think shuffling is a good idea because if we take a look at the pmi-data.csv we can see that all the cases with stroke=1 come first in the csv so that means if we didnt shuffle, all of them would be chosen for the training and none would be left for the testing</h3>

In [8]:
%%html
<h1>2. Model</h1>

In [89]:
import torch.nn.functional as F
class Model(torch.nn.Module):
    def __init__(self, in_size, out_size):
        super().__init__()
        self.l1 = torch.nn.Linear(in_features=in_size, out_features=32)
        self.l2 = torch.nn.Linear(in_features=32, out_features=16)
        self.l3 = torch.nn.Linear(in_features=16, out_features=8)
        self.l4 = torch.nn.Linear(in_features=8, out_features=16)
        self.l5 = torch.nn.Linear(in_features=16, out_features=out_size)

    def forward(self, x):
        x = torch.Tensor(x.values)
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = F.relu(self.l3(x))
        x = F.relu(self.l4(x))
        x = self.l5(x)
        return x
        

In [90]:
model = Model(10, 2)

In [11]:
# for param in model.parameters():
#     print(param)

In [12]:
%%html
<h1>3. Optimizer and Loss function</h1>

In [80]:
optimizer = torch.optim.Adam(model.parameters())
loss_fn = torch.nn.BCEWithLogitsLoss()

In [14]:
%%html
<h1>4. Training </h1>

In [105]:
N_EPOCHS = 1

for epoch in range(N_EPOCHS):
    for i, data in enumerate(train_dataloader, 0):
        features, labels = data

        optimizer.zero_grad()

        outputs = model(features)
        y_true = torch.tensor(labels).to(torch.float)
        loss = loss_fn(outputs, torch.Tensor([1 if y_true == 0 else 0, 1 if y_true == 1 else 0]))
        loss.backward()
        optimizer.step()

        print(loss)

        

        
        

tensor(0.7484, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.8033, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.6723, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.7656, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.7860, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.9168, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.7698, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.7334, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.7505, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.7989, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.7506, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.8068, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.7404, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.7228, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.9583, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.7618, grad_fn=<BinaryCrossEntro