In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv(r"C:\Users\niels\Neural network excercise\Neural-network-exercise\Data\Raw-data\diabetes.csv")

In [3]:
df.shape

(768, 9)

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim

In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [6]:
X = df.drop(columns="Outcome").copy().values.astype("float32")
y = df["Outcome"].copy().values.astype("float32")

In [7]:
# First split: train + temp (val+test)
X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Second split: validation + test
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42
)

Normaliserer data først bestemmes normalisering på træningssættet, hvorfor efter det den transformation lægges ned over X_val og X_test.

In [8]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [9]:
X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train).float().unsqueeze(1)

X_val = torch.tensor(X_val).float()
y_val = torch.tensor(y_val).float().unsqueeze(1)

X_test = torch.tensor(X_test).float()
y_test = torch.tensor(y_test).float().unsqueeze(1)

In [10]:
in_features = X_train.shape[1]

In [11]:
in_features

8

In [14]:
class DiabetesNet(nn.Module):
    def __init__(self, in_features):
        super().__init__()

        self.layers = nn.Sequential(
            nn.Linear(in_features, 16),
            nn.ReLU(),
            nn.Linear(16, 8),
            nn.ReLU(),
            nn.Linear(8, 1),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        return self.layers(x)


In [15]:
model = DiabetesNet(in_features)

In [16]:
loss_fn = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [17]:
for p in model.parameters():
    print(p.shape)

torch.Size([16, 8])
torch.Size([16])
torch.Size([8, 16])
torch.Size([8])
torch.Size([1, 8])
torch.Size([1])


In [18]:
epochs = 1000

for epoch in range(epochs):
    # 1. Put model in training mode (good practice)
    model.train()

    # 2. Forward pass: compute predictions
    y_pred = model(X_train)

    # 3. Compute the loss between predictions and true labels
    loss = loss_fn(y_pred, y_train)

    # 4. Clear old gradients (VERY important)
    optimizer.zero_grad()

    # 5. Backward pass: compute gradients
    loss.backward()

    # 6. Update weights based on gradients
    optimizer.step()

    # 7. Print progress every 10 epochs
    if (epoch + 1) % 100 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")


Epoch 100/1000, Loss: 0.5235
Epoch 200/1000, Loss: 0.4222
Epoch 300/1000, Loss: 0.3982
Epoch 400/1000, Loss: 0.3815
Epoch 500/1000, Loss: 0.3673
Epoch 600/1000, Loss: 0.3526
Epoch 700/1000, Loss: 0.3369
Epoch 800/1000, Loss: 0.3210
Epoch 900/1000, Loss: 0.3040
Epoch 1000/1000, Loss: 0.2903


In [19]:
from sklearn.metrics import roc_auc_score, roc_curve

In [20]:
model.eval()

with torch.no_grad():
    y_pred_test = model(X_test)
    y_pred_labels = (y_pred_test >= 0.5).float()

    correct = (y_pred_labels == y_test).float().mean()
    print(f"Test accuracy: {correct.item() * 100:.2f}%")

Test accuracy: 72.41%


In [21]:
print(f"Somers'D: {round(100*(2*roc_auc_score(y_test, y_pred_test)-1),2)}%")

Somers'D: 52.66%
