In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
from torch import nn
import pandas as pd

device = "cuda" if torch.cuda.is_available() else "cpu"

x = pd.read_csv('./data.csv')

t = {'M': 1,'B': 0}
x.diagnosis = [t[i] for i in x.diagnosis]

x_train, x_test = train_test_split(x, test_size=0.3, random_state=413)
y_train, y_test = x_train['diagnosis'], x_test['diagnosis']

x_train = x_train.drop(columns=['diagnosis', 'id', 'Unnamed: 32'])
x_test = x_test.drop(columns=['diagnosis', 'id', 'Unnamed: 32'])

scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.fit_transform(x_test)

x_test = torch.tensor(x_test).to(device).squeeze()
x_train = torch.tensor(x_train).to(device).squeeze()
y_train = torch.tensor(y_train.values).to(torch.float64).to(device).squeeze()
y_test = torch.tensor(y_test.values).to(torch.float64).to(device).squeeze()

y_train.shape, x_train.shape, y_test.shape, x_test.shape

In [None]:
torch.manual_seed(3)

model = nn.Sequential(
    nn.Linear(in_features=len(x_train[0]), out_features=32),
    nn.ReLU(),
    nn.Linear(in_features=32, out_features=16),
    nn.ReLU(),
    nn.Linear(in_features=16, out_features=1)
).to(device)

In [None]:
from torchmetrics import Accuracy
torchmetric_accuracy = Accuracy(task='binary', num_classes=2).to(device)

In [None]:
loss_fn = torch.nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(lr=0.01, params=model.parameters())

In [None]:
epochs = 80
model = model.double()

train_loss = []
test_loss = []
train_acc = []
test_acc = []

In [None]:
for epoch in range(epochs):
    model.train()
    y_logits = model(x_train).squeeze()
    loss = loss_fn(y_logits, y_train)
    
    train_loss.append(loss.item())
    test_loss.append(loss_fn(model(x_test).squeeze(), y_test).item())
    train_acc.append(torchmetric_accuracy(y_logits, y_train).item())
    test_acc.append(torchmetric_accuracy(model(x_test).squeeze(), y_test).item())
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    model.eval()
    with torch.inference_mode():
        if epoch % 20 == 0 or epoch == epochs - 1:
            print(round(train_loss[-1], 4),
                  round(test_loss[-1], 4),
                  round(train_acc[-1], 4),
                  round(test_acc[-1], 4))

print("\nTrain Acc:", round(train_acc[-1], 4) * 100, "%\nTest Acc :", round(test_acc[-1], 4) * 100, "%")

In [None]:
import matplotlib.pyplot as plt

plt.plot(range(len(train_loss)), train_loss, label="Train loss")
plt.plot(range(len(test_loss)), test_loss, label="Test loss")

plt.title("Loss curves")
plt.ylabel("Loss")
plt.xlabel("Epochs")
plt.legend();

In [None]:
plt.plot(range(len(train_loss)), train_acc, label="Train Acc")
plt.plot(range(len(test_loss)), test_acc, label="Test Acc")

plt.title("Accuracy curves")
plt.ylabel("Accuracy")
plt.xlabel("Epochs")
plt.legend();