<a href="https://colab.research.google.com/github/07umar07/GlucoGuard/blob/main/Algorithm(Google_Colab).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import sklearn
import pandas as pd
import imblearn as imb
from sklearn.model_selection import train_test_split
from google.colab import drive

# Diabetes Prediction with Neural Network
# Machine Learning Models
# Date Created: October 28, 2024
# By Umar Abdul Hakim Robbani

drive.mount('/content/drive')

# 1. Data Featuring
df = pd.read_csv('/content/drive/My Drive/Datasets/Diabetes_Datasets/diabetes.csv')
# print(df.isnull().sum())
# zero_counts = df.eq(0).sum()
# print(zero_counts)

df['Glucose'] = df['Glucose'].replace(0, np.median(df['Glucose']))
df['BloodPressure'] = df['BloodPressure'].replace(0, np.median(df['BloodPressure']))
df['SkinThickness'] = df['SkinThickness'].replace(0, np.median(df['SkinThickness']))
df['Insulin'] = df['Insulin'].replace(0, np.median(df['Insulin']))
df['BMI'] = df['BMI'].replace(0, np.median(df['BMI']))

X = df.drop(['Outcome', 'SkinThickness', 'Insulin'], axis = 1).values
y = df['Outcome'].values

# Split Datasets
X_train_dev, X_test, y_train_dev, y_test = train_test_split(X, y, test_size=0.15, random_state=42)
X_train, X_dev, y_train, y_dev = train_test_split(X_train_dev, y_train_dev, test_size=0.25, random_state=42) # 0.176

data_shapes = {
  "Dataset": ["X_train", "X_dev", "X_test"],
  "Shape": [X_train.shape, X_dev.shape, X_test.shape],
  "Percentage": [100 / X.shape[0] * X_train.shape[0], 100 / X.shape[0] * X_dev.shape[0], 100 / X.shape[0] * X_test.shape[0]]
}

print(pd.DataFrame(data_shapes))

print(df.head())

# print(X_train.head())


# X_train = X_train.values
# X_test = X_test.values
# y_train = y_train.values
# y_test = y_test.values


# Normalize the Data
# Calculate mean and standard deviation for training data
mean = np.mean(X_train, axis=0, keepdims= True)
std = np.std(X_train, axis=0, keepdims= True)

# Normalize training data
X_train = (X_train - mean) / std

# Normalize dev data using training data's mean and std
X_dev = (X_dev - mean) / std

# Normalize test data using training data's mean and std
X_test = (X_test - mean) / std

# 1.1. Checking whether the data is imbalanced or not
print(np.bincount(y_train))
ROS = imb.over_sampling.RandomOverSampler()
X_train, y_train = ROS.fit_resample(X_train, y_train)
# X_train = X_train






Mounted at /content/drive
   Dataset     Shape  Percentage
0  X_train  (489, 6)   63.671875
1    X_dev  (163, 6)   21.223958
2   X_test  (116, 6)   15.104167
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35     30.5  33.6   
1            1       85             66             29     30.5  26.6   
2            8      183             64             23     30.5  23.3   
3            1       89             66             23     94.0  28.1   
4            0      137             40             35    168.0  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  
[322 167]


In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input, Dropout, BatchNormalization
from tensorflow.keras.regularizers import L2
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.callbacks import ReduceLROnPlateau
# from initialization import X_train, X_dev, X_test, y_train, y_dev, y_test

reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001)

# Early Stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

# Neural Network Architecture
model = Sequential(
  [
    Input(shape=(6,)),
    Dense(units = 64, activation = 'relu', kernel_regularizer = L2(0.8)), #L1
    BatchNormalization(),
    # Dense(units = 64, activation = 'relu', kernel_regularizer = L2(0.8)), #L2
    # BatchNormalization(),
    Dense(units = 32, activation = 'relu', kernel_regularizer = L2(0.8)), #L3
    BatchNormalization(),
    Dropout(0.3),
    Dense(units = 32, activation = 'relu', kernel_regularizer = L2(0.8)), #L4
    BatchNormalization(),
    Dropout(0.25),
    Dense(units = 32, activation = 'relu', kernel_regularizer = L2(0.8)), #L5
    # BatchNormalization(),
    # Dense(units = 256, activation = 'relu', kernel_regularizer = L2(0.02)),
    Dense(units = 32, activation = 'relu', kernel_regularizer = L2(0.8)), #L6
    BatchNormalization(),
    Dropout(0.3), #'
    Dense(units = 64, activation = 'relu', kernel_regularizer = L2(0.8)), #L7
    # # BatchNormalization(),
    # Dropout(0.3),
    # Dense(units = 315, activation = 'relu', kernel_regularizer = L2(0.8)), #L8
    # BatchNormalization(),
    # Dense(units = 256, activation = 'relu', kernel_regularizer = L2(0.3)), #L9
    Dense(units = 1, activation = 'sigmoid'),
  ]
)

# Compile the model
model.compile(
  loss = tf.keras.losses.binary_crossentropy,
  optimizer = tf.keras.optimizers.Adam(0.0001), #1, 3, 5, 7
  metrics = ['accuracy']
)

history = model.fit(
  X_train, y_train,
  epochs = 100,
  batch_size = 16,
  validation_data = (X_dev, y_dev),
  callbacks=[early_stopping, reduce_lr]
)

loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

loss, accuracy = model.evaluate(X_train, y_train)
print(f"Train Accuracy: {accuracy * 100:.2f}%")

loss, accuracy = model.evaluate(X_dev, y_dev)
print(f"Dev Accuracy: {accuracy * 100:.2f}%")

model.save('/content/drive/My Drive/Ml_Models_tensorflow/diabetes_det_gd2_51.keras') #gd2 (good), gd2_1 (good)
# gd2_2 (Great), 2_3 (Not like before but great), 2_31 (stil 2_2 greater), 2_32 (89-77-76)
# 2_4 (GREATEST, 90/79/78), 2_41 (86/78/77), 2_5 (84/78/78), 2_51

NameError: name 'X_train' is not defined