In [1]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset, random_split
import pandas as pd

In [2]:
df = pd.read_csv('../data/assignment_2/heart_attack.csv')
df

Unnamed: 0,age,gender,impluse,pressurehight,pressurelow,glucose,kcm,troponin,class
0,64,1,66,160,83,160.0,1.80,0.012,negative
1,21,1,94,98,46,296.0,6.75,1.060,positive
2,55,1,64,160,77,270.0,1.99,0.003,negative
3,64,1,70,120,55,270.0,13.87,0.122,positive
4,55,1,64,112,65,300.0,1.08,0.003,negative
...,...,...,...,...,...,...,...,...,...
1314,44,1,94,122,67,204.0,1.63,0.006,negative
1315,66,1,84,125,55,149.0,1.33,0.172,positive
1316,45,1,85,168,104,96.0,1.24,4.250,positive
1317,54,1,58,117,68,443.0,5.80,0.359,positive


In [4]:
class HeartAttackDataset(Dataset):
	def __init__(self):
		super().__init__()
		df = pd.read_csv('../data/assignment_2/heart_attack.csv')
		# map class labels negative and positive to 0 and 1
		df['class'] = df['class'].map({'negative': 0, 'positive': 1})

		tensor = torch.tensor(df.values, dtype=torch.float32)

		# convert to tensors
		self.features = tensor[:, 0:-1]
		self.labels = tensor[:, -1]
		# print(self.features.shape, self.labels.shape)
		# print(len(self.labels))

	def __len__(self):
		return len(self.labels)
		
	def __getitem__(self, idx):
		return self.features[idx], self.labels[idx]
	
	def get_splits(self, n_test=0.2):
		# determine sizes
		test_size = round(n_test * self.__len__())
		train_size = self.__len__() - test_size

		# calculate the split
		return random_split(self, [train_size, test_size])

In [24]:
class NNClassifier(nn.Module):
	def __init__(self):
		super().__init__()
		self.in_layer = nn.Linear(8, 16)
		self.hidden_1 = nn.Linear(16, 8)
		self.out_layer = nn.Linear(8, 1)
		self.activation = nn.ReLU()
	
	def forward(self, x):
		x = self.in_layer(x)
		x = self.activation(x)
		x = self.hidden_1(x)
		x = self.activation(x)
		return self.out_layer(x)

In [46]:
dataset = HeartAttackDataset()

In [47]:
dataset.features.shape, dataset.labels.shape

(torch.Size([1319, 8]), torch.Size([1319]))

In [54]:
model = NNClassifier()

In [55]:
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.003)

In [56]:
# Calculate accuracy (a classification metric)
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100 
    return acc

In [57]:
epochs = 1000
batch_size = 32

In [58]:
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size

train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=True)

In [59]:
for epoch in range(epochs):
	model.train()
	train_loss = 0
	acc = 0
	for i, data in enumerate(train_loader):
		
		# getting a batch of data
		inputs, labels = data

		# forward pass
		logits = model(inputs).squeeze()
		
		y_pred = torch.round(torch.sigmoid(logits))
		# calculate loss
		loss = loss_fn(logits, labels)
		train_loss += loss.item()

		# calculate accuracy
		accuracy = accuracy_fn(labels, y_pred)
		acc += accuracy

		# backward pass
		optimizer.zero_grad()
		loss.backward()
		optimizer.step()
	train_loss /= len(train_loader)
	acc /= len(train_loader)

	# add testing code
	with torch.inference_mode():
		test_loss = 0
		for i, data in enumerate(test_loader):
			inputs, labels = data
			outputs = model(inputs).squeeze()
			test_loss += loss_fn(outputs, labels).item()
		test_loss /= len(test_loader)

	if (epoch+1) % 50 == 0 or epoch == 0:
		print(f'Epoch: {epoch+1}/{epochs}, loss: {train_loss:.4f}, test_loss: {test_loss:.4f}, acc: {acc:.4f}%')

Epoch: 1/1000, loss: 1.9413, test_loss: 0.6919, acc: 56.7937%
Epoch: 50/1000, loss: 0.4606, test_loss: 0.4900, acc: 76.5885%
Epoch: 100/1000, loss: 0.4123, test_loss: 0.4462, acc: 79.3377%
Epoch: 150/1000, loss: 0.3798, test_loss: 0.3916, acc: 81.6105%
Epoch: 200/1000, loss: 0.3512, test_loss: 0.4034, acc: 83.4219%
Epoch: 250/1000, loss: 0.3380, test_loss: 0.3822, acc: 83.9809%
Epoch: 300/1000, loss: 0.3176, test_loss: 0.3629, acc: 84.6499%
Epoch: 350/1000, loss: 0.2857, test_loss: 0.3402, acc: 87.5886%
Epoch: 400/1000, loss: 0.2444, test_loss: 0.3505, acc: 89.2045%
Epoch: 450/1000, loss: 0.2224, test_loss: 0.3216, acc: 89.5772%
Epoch: 500/1000, loss: 0.2172, test_loss: 0.2997, acc: 89.8460%
Epoch: 550/1000, loss: 0.2030, test_loss: 0.3170, acc: 91.4712%
Epoch: 600/1000, loss: 0.2067, test_loss: 0.5573, acc: 90.3379%
Epoch: 650/1000, loss: 0.1760, test_loss: 0.6183, acc: 92.5098%
Epoch: 700/1000, loss: 0.1531, test_loss: 0.2172, acc: 93.5606%
Epoch: 750/1000, loss: 0.1724, test_loss: 0