In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import numpy as np
import kagglehub
from torch.utils.data import DataLoader


In [2]:
path = kagglehub.dataset_download("iammustafatz/diabetes-prediction-dataset")
diabetes_df = pd.read_csv(path + '/diabetes_prediction_dataset.csv')
diabetes_df.head()

Downloading from https://www.kaggle.com/api/v1/datasets/download/iammustafatz/diabetes-prediction-dataset?dataset_version_number=1...


100%|██████████| 734k/734k [00:00<00:00, 866kB/s]

Extracting files...





Unnamed: 0,gender,age,hypertension,heart_disease,smoking_history,bmi,HbA1c_level,blood_glucose_level,diabetes
0,Female,80.0,0,1,never,25.19,6.6,140,0
1,Female,54.0,0,0,No Info,27.32,6.6,80,0
2,Male,28.0,0,0,never,27.32,5.7,158,0
3,Female,36.0,0,0,current,23.45,5.0,155,0
4,Male,76.0,1,1,current,20.14,4.8,155,0


In [3]:
# Apply OneHot Encoding
diabetes_df['gender'] = diabetes_df['gender'].replace({'Male':1, 'Female':0, 'Other': np.nan})
diabetes_df['smoking_history'] = diabetes_df['smoking_history'].replace(
    {'never': 0, 'current': 1, 'former': 2, 'ever': 3, 'not current': 4, 'No Info': np.nan}
)

diabetes_df.head()
diabetes_df.dropna(inplace=True)
diabetes_df.head()

  diabetes_df['gender'] = diabetes_df['gender'].replace({'Male':1, 'Female':0, 'Other': np.nan})
  diabetes_df['smoking_history'] = diabetes_df['smoking_history'].replace(


Unnamed: 0,gender,age,hypertension,heart_disease,smoking_history,bmi,HbA1c_level,blood_glucose_level,diabetes
0,0.0,80.0,0,1,0.0,25.19,6.6,140,0
2,1.0,28.0,0,0,0.0,27.32,5.7,158,0
3,0.0,36.0,0,0,1.0,23.45,5.0,155,0
4,1.0,76.0,1,1,1.0,20.14,4.8,155,0
5,0.0,20.0,0,0,0.0,27.32,6.6,85,0


In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [5]:
X = diabetes_df.drop('diabetes', axis=1)
y = diabetes_df['diabetes']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.1, random_state=42, shuffle=True)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, test_size=0.1, random_state=42, shuffle=True)



def create_dataloader(x_set, y_set, batch_size):
  X = torch.tensor(X_train, dtype=torch.float32).view(-1, 1, 8).to(device)
  y = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1).to(device)

  return DataLoader(list(zip(X, y)), shuffle=True, batch_size=batch_size)

train_loader = create_dataloader(X_train, y_train, 64)
valid_loader = create_dataloader(X_valid, y_valid, len(X_valid))
test_loader = create_dataloader(X_test, y_test, len(X_test))


In [None]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()

        self.conv1 = nn.Conv1d(in_channels=1, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.pool = nn.AvgPool1d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(64 * 2, 128)  # Flattened dimension after pooling
        self.fc2 = nn.Linear(128, 1)       # Output layer for binary classification

        self.sigmoid = nn.Sigmoid()         # Sigmoid activation for binary classification

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = self.pool(x)

        x = torch.relu(self.conv2(x))
        x = self.pool(x)

        x = x.view(-1, 64 * 2)  # Flatten the output of the convolutional layers
        # x = x.view(-1, 1, 8)

        x = torch.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))  # Output layer

        return x


In [None]:
model = CNNModel().to(device)
criterion = nn.BCEWithLogitsLoss()  # Binary cross-entropy loss for binary classification
optimizer = optim.Adam(model.parameters(), lr=0.01)


In [None]:
epochs = 1500
for epoch in range(epochs):
    model.train()  # Set the model to training mode
    optimizer.zero_grad()  # Zero the gradients

    for x_batch, y_batch in train_loader:
      output = model(x_batch)
      output = torch.round(output)
      loss = criterion(output, y_batch)

      loss.backward()
      optimizer.step()

    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

    model.eval()
    with torch.no_grad():
      for valid_x, valid_y in valid_loader:
        valid_output = model(valid_x)
        valid_output = torch.round(valid_output)
        valid_loss = criterion(valid_output, valid_y)

    if (epoch + 1) % 100 == 0:
        print(f"Valid Loss: {valid_loss.item():.4f}")
        accuracy = accuracy_score(valid_y.cpu().numpy(), valid_output.cpu().numpy())
        print(f"Valid Accuracy: {accuracy * 100:.2f}%\n")


Epoch [100/1500], Loss: 0.6931
Valid Loss: 0.6931
Valid Accuracy: 89.05%

Epoch [200/1500], Loss: 0.6931
Valid Loss: 0.6931
Valid Accuracy: 88.56%

Epoch [300/1500], Loss: 0.6931
Valid Loss: 0.6931
Valid Accuracy: 88.82%

Epoch [400/1500], Loss: 0.6931
Valid Loss: 0.6931
Valid Accuracy: 88.60%

Epoch [500/1500], Loss: 0.6931
Valid Loss: 0.6931
Valid Accuracy: 88.08%

Epoch [600/1500], Loss: 0.6931
Valid Loss: 0.6931
Valid Accuracy: 88.46%

Epoch [700/1500], Loss: 0.6931
Valid Loss: 0.6931
Valid Accuracy: 88.42%

Epoch [800/1500], Loss: 0.6931
Valid Loss: 0.6931
Valid Accuracy: 89.46%

Epoch [900/1500], Loss: 0.6931
Valid Loss: 0.6931
Valid Accuracy: 89.51%

Epoch [1000/1500], Loss: 0.6931
Valid Loss: 0.6931
Valid Accuracy: 88.46%

Epoch [1100/1500], Loss: 0.6931
Valid Loss: 0.6931
Valid Accuracy: 88.67%

Epoch [1200/1500], Loss: 0.6931
Valid Loss: 0.6931
Valid Accuracy: 89.20%

Epoch [1300/1500], Loss: 0.6931
Valid Loss: 0.6931
Valid Accuracy: 88.98%

Epoch [1400/1500], Loss: 0.6931
Va

In [None]:
model.eval()
with torch.no_grad():
  for test_x, test_y in test_loader:
      test_output = model(test_x)
      test_output = torch.round(test_output)
      test_loss = criterion(test_output, test_y)

      print(f"test Loss: {test_loss.item():.4f}")
      accuracy = accuracy_score(test_y.cpu().numpy(), test_output.cpu().numpy())
      print(f"test Accuracy: {accuracy * 100:.2f}%\n")

test Loss: 0.6931
test Accuracy: 89.20%

test Loss: 0.6931
test Accuracy: 89.34%

test Loss: 0.6931
test Accuracy: 89.17%

test Loss: 0.6931
test Accuracy: 88.16%

test Loss: 0.6931
test Accuracy: 88.75%

test Loss: 0.6931
test Accuracy: 89.09%

test Loss: 0.6931
test Accuracy: 88.47%

test Loss: 0.6931
test Accuracy: 89.48%

test Loss: 0.6931
test Accuracy: 87.07%

