<a href="https://colab.research.google.com/github/HK-Sepuri/My_Small_Projects/blob/main/Diabetes%20Prediction%20using%20Pytorch/Diabetes_Prediction_using_Pytorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Diabetes Prediction using Pytorch

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
import numpy as np
import pandas as pd

In [2]:
# Data Collection and Analysis
# Load the dataset
diabetes_dataset = pd.read_csv("/content/diabetes.csv")

In [3]:
# Display the first few rows of the dataset
print("First few rows of the dataset:")
print(diabetes_dataset.head())

First few rows of the dataset:
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


In [4]:
# Display dataset statistics
print("\nDataset Statistics:")
print(diabetes_dataset.describe())


Dataset Statistics:
       Pregnancies     Glucose  BloodPressure  SkinThickness     Insulin  \
count   768.000000  768.000000     768.000000     768.000000  768.000000   
mean      3.845052  120.894531      69.105469      20.536458   79.799479   
std       3.369578   31.972618      19.355807      15.952218  115.244002   
min       0.000000    0.000000       0.000000       0.000000    0.000000   
25%       1.000000   99.000000      62.000000       0.000000    0.000000   
50%       3.000000  117.000000      72.000000      23.000000   30.500000   
75%       6.000000  140.250000      80.000000      32.000000  127.250000   
max      17.000000  199.000000     122.000000      99.000000  846.000000   

              BMI  DiabetesPedigreeFunction         Age     Outcome  
count  768.000000                768.000000  768.000000  768.000000  
mean    31.992578                  0.471876   33.240885    0.348958  
std      7.884160                  0.331329   11.760232    0.476951  
min      0.000

In [5]:
# Display class distribution
print("\nClass Distribution:")
print(diabetes_dataset["Outcome"].value_counts())


Class Distribution:
0    500
1    268
Name: Outcome, dtype: int64


In [6]:
# Data Preprocessing
# Creating feature matrix X and target vector y
X = diabetes_dataset.drop(columns="Outcome", axis=1)
y = diabetes_dataset["Outcome"]

In [7]:
# Standardize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [8]:
# Convert to PyTorch tensors
X_tensor = torch.Tensor(X)
y_tensor = torch.Tensor(y.values).view(-1, 1)

In [9]:
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, stratify=y, random_state=5)

In [10]:
# Define the neural network
class DiabetesModel(nn.Module):
    def __init__(self):
        super(DiabetesModel, self).__init__()
        self.fc1 = nn.Linear(8, 16)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(16, 8)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(8, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

In [11]:
# Instantiate the model, define loss and optimizer
model = DiabetesModel()
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [12]:
# Training loop
num_epochs = 1000
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()


    with torch.no_grad():
        y_train_pred = (model(X_train) > 0.5).float()
    train_accuracy = accuracy_score(y_train.numpy(), y_train_pred.numpy())
    if epoch % 100 == 0:
      # Print training accuracy for each epoch
      print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}, Training Accuracy: {train_accuracy:.4f}")


Epoch [1/1000], Loss: 0.6731, Training Accuracy: 0.6336
Epoch [101/1000], Loss: 0.5858, Training Accuracy: 0.7720
Epoch [201/1000], Loss: 0.4670, Training Accuracy: 0.8046
Epoch [301/1000], Loss: 0.4274, Training Accuracy: 0.8143
Epoch [401/1000], Loss: 0.4123, Training Accuracy: 0.8176
Epoch [501/1000], Loss: 0.3975, Training Accuracy: 0.8192
Epoch [601/1000], Loss: 0.3837, Training Accuracy: 0.8225
Epoch [701/1000], Loss: 0.3673, Training Accuracy: 0.8290
Epoch [801/1000], Loss: 0.3504, Training Accuracy: 0.8388
Epoch [901/1000], Loss: 0.3359, Training Accuracy: 0.8599


In [13]:
# Evaluation on test data
with torch.no_grad():
    y_test_pred = (model(X_test) > 0.5).float()
test_accuracy = accuracy_score(y_test.numpy(), y_test_pred.numpy())

In [14]:
print("\nTraining and Test Accuracy:")
print(f"Training Accuracy: {train_accuracy}")
print(f"Test Accuracy: {test_accuracy}")


Training and Test Accuracy:
Training Accuracy: 0.8648208469055375
Test Accuracy: 0.7272727272727273
