In [18]:
# Data handling
import pandas as pd
import numpy as np

# Train / test split
from sklearn.model_selection import train_test_split

# Preprocessing
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

# Metrics
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader,Dataset




In [19]:
df = pd.read_csv(r"C:\Users\Admin\Downloads\diabetes.csv")

In [20]:
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1
...,...,...,...,...,...,...,...,...,...
763,10,101,76,48,180,32.9,0.171,63,0
764,2,122,70,27,0,36.8,0.340,27,0
765,5,121,72,23,112,26.2,0.245,30,0
766,1,126,60,0,0,30.1,0.349,47,1


In [21]:
X = df.drop(columns="Outcome")
y = df["Outcome"]

In [22]:
df.columns


Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],
      dtype='object')

In [23]:
num_cols =['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',
       'BMI', 'DiabetesPedigreeFunction', 'Age']
preprocesor = ColumnTransformer(
    transformers=[
        ("num",StandardScaler(),num_cols)
    ]
)

In [24]:
test_size = 0.15
X_trainval,X_test,y_trainval,y_test = train_test_split (X,y,test_size=0.15,random_state=42,stratify=y)

val_size = 0.15
val_ratio = val_size / (1-test_size)

X_train,X_val,y_train,y_val = train_test_split(X_trainval,y_trainval,test_size=val_ratio,random_state=42,stratify=y_trainval)

X_train = preprocesor.fit_transform(X_train)
X_val = preprocesor.transform(X_val)
X_test = preprocesor.transform(X_test)

In [29]:
class DiabetesDataset(Dataset):
    def __init__(self,X,y):
        self.X = torch.tensor(X,dtype=torch.float32)
        self.y = torch.tensor(y.to_numpy().reshape(-1,1),dtype=torch.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self,idx):
        return self.X[idx],self.y[idx]

In [31]:
train_dataset = DiabetesDataset(X_train,y_train)
test_dataset = DiabetesDataset(X_test,y_test)
val_dataset = DiabetesDataset(X_val,y_val)

train_loader = DataLoader(train_dataset,batch_size=32,shuffle=True)
test_loader = DataLoader(test_dataset,batch_size=32)
val_loader = DataLoader(val_dataset,batch_size=32)

In [34]:
class DiabetisNN(nn.Module):
    def __init__(self,input_dim):
        super().__init__()
        
        self.model = nn.Sequential(
            nn.Linear(input_dim,32),
            nn.ReLU(),
            nn.Linear(32,16),
            nn.ReLU(),
            nn.Linear(16,8),
            nn.ReLU(),
            nn.Linear(8,1),
            nn.Sigmoid()
        )

    def forward(self,x):
        return self.model(x)
        

In [38]:
input_dim = X_train.shape[1]
model = DiabetisNN(input_dim)

criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(),lr=000.1)

In [41]:
def train_model(model, train_loader, val_loader, optimizer, criterion, epochs=20):
    for epoch in range(epochs):
        model.train()  # training mode
        total_loss = 0.0

        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)
            loss = criterion(outputs, y_batch)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()

        avg_train_loss = total_loss / len(train_loader)

        # Validation
        model.eval()  # evaluation mode
        val_loss = 0.0
        all_preds, all_targets = [], []

        with torch.no_grad():
            for X_val_batch, y_val_batch in val_loader:
                outputs = model(X_val_batch)
                loss = criterion(outputs, y_val_batch)
                val_loss += loss.item()
                preds = (outputs >= 0.5).float()  # threshold for BCELoss
                all_preds.extend(preds.squeeze().numpy())
                all_targets.extend(y_val_batch.squeeze().numpy())

        avg_val_loss = val_loss / len(val_loader)
        val_acc = accuracy_score(all_targets, all_preds)

        print(f"Epoch {epoch+1}/{epochs} | "
              f"Train Loss: {avg_train_loss:.4f} | "
              f"Val Loss: {avg_val_loss:.4f} | "
              f"Val Acc: {val_acc:.4f}")


In [42]:
train_model(
    model=model,
    train_loader=train_loader,
    val_loader=val_loader,
    optimizer=optimizer,
    criterion=criterion,
    epochs=20  # you can change this
)


Epoch 1/20 | Train Loss: 0.7154 | Val Loss: 0.6355 | Val Acc: 0.6466
Epoch 2/20 | Train Loss: 0.5751 | Val Loss: 0.6307 | Val Acc: 0.6466
Epoch 3/20 | Train Loss: 0.5077 | Val Loss: 0.6231 | Val Acc: 0.6810
Epoch 4/20 | Train Loss: 0.5067 | Val Loss: 0.5947 | Val Acc: 0.7155
Epoch 5/20 | Train Loss: 0.4965 | Val Loss: 0.7097 | Val Acc: 0.7069
Epoch 6/20 | Train Loss: 0.4975 | Val Loss: 0.6108 | Val Acc: 0.7241
Epoch 7/20 | Train Loss: 0.4898 | Val Loss: 0.5903 | Val Acc: 0.6724
Epoch 8/20 | Train Loss: 0.4789 | Val Loss: 0.6260 | Val Acc: 0.7241
Epoch 9/20 | Train Loss: 0.4600 | Val Loss: 0.5865 | Val Acc: 0.6638
Epoch 10/20 | Train Loss: 0.4894 | Val Loss: 0.5630 | Val Acc: 0.6983
Epoch 11/20 | Train Loss: 0.4816 | Val Loss: 0.5638 | Val Acc: 0.6724
Epoch 12/20 | Train Loss: 0.4801 | Val Loss: 0.6133 | Val Acc: 0.7672
Epoch 13/20 | Train Loss: 0.4715 | Val Loss: 0.5475 | Val Acc: 0.7672
Epoch 14/20 | Train Loss: 0.4788 | Val Loss: 0.7352 | Val Acc: 0.6983
Epoch 15/20 | Train Loss: 0.4

In [43]:
from sklearn.metrics import confusion_matrix, classification_report

model.eval()
all_preds, all_targets = [], []

with torch.no_grad():
    for X_batch, y_batch in test_loader:
        outputs = model(X_batch)
        preds = (outputs >= 0.5).float()
        all_preds.extend(preds.squeeze().numpy())
        all_targets.extend(y_batch.squeeze().numpy())

print("Confusion Matrix:")
print(confusion_matrix(all_targets, all_preds))
print("\nClassification Report:")
print(classification_report(all_targets, all_preds))


Confusion Matrix:
[[48 28]
 [ 4 36]]

Classification Report:
              precision    recall  f1-score   support

         0.0       0.92      0.63      0.75        76
         1.0       0.56      0.90      0.69        40

    accuracy                           0.72       116
   macro avg       0.74      0.77      0.72       116
weighted avg       0.80      0.72      0.73       116

