<a href="https://colab.research.google.com/github/PRAISE-KING/FINAL_PROJECT_PLP/blob/main/diabetes_prediction_tf_keras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [73]:
# 1. DATA COLLECTION

import pandas as pd
import tensorflow as tf

url = 'https://raw.githubusercontent.com/plotly/datasets/master/diabetes.csv'
df = pd.read_csv(url)

print('First 5 rows :\n', df.head())
print('\n\n number of (rows , columns) :\n', df.shape)
print('\n\n Info about the dataset :\n', df.info())
print('\n\n Number of columns :\n', df.columns.to_list())

First 5 rows :
    Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


 number of (rows , columns) :
 (768, 9)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               

In [74]:
# 2. DATA PREPROCESSING

# checking and counting the number of zeros in the whole dataset
num_of_zeros = (df == 0).sum()
print('Number of 0s in the whole dataset :\n' ,num_of_zeros)
# columns in the dataset that shouldnt have 0s
zero_free_columns = ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']
print('\n\n columns that have invalid 0s :\n' ,zero_free_columns)

Number of 0s in the whole dataset :
 Pregnancies                 111
Glucose                       5
BloodPressure                35
SkinThickness               227
Insulin                     374
BMI                          11
DiabetesPedigreeFunction      0
Age                           0
Outcome                     500
dtype: int64


 columns that have invalid 0s :
 ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']


In [75]:
import numpy as np

# changing all 0s to NANs
df[zero_free_columns] = df[zero_free_columns].replace(0, np.nan)
print('Number of 0s as NANs :\n', df.isnull().sum())

# clearing 0s in the zero free columns to desired numbers...I'll go with median because of outliers
df[zero_free_columns] = df[zero_free_columns].fillna(df[zero_free_columns].median())
print('\n\n Number of 0s after clearing :\n', df.isnull().sum())

Number of 0s as NANs :
 Pregnancies                   0
Glucose                       5
BloodPressure                35
SkinThickness               227
Insulin                     374
BMI                          11
DiabetesPedigreeFunction      0
Age                           0
Outcome                       0
dtype: int64


 Number of 0s after clearing :
 Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64


In [76]:
from sklearn.model_selection import train_test_split

# splitting into feature(x) and target(y)
x = df.drop('Outcome', axis=1)
y = df['Outcome']

# train test split
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.2, random_state=42)

In [77]:
# 3. BUILDING NEURAL NETWORK

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Dropout

# building the model
model = Sequential([
    Dense(32, activation = 'relu', input_shape = (8,)),
    Dropout(0.3),
    Dense(16, activation = 'relu'),
    Dropout(0.2),
    Dense(8, activation = 'relu'),
    Dense(4, activation = 'relu'),
    Dense(1, activation = 'sigmoid')
])

# compiling the model
model.compile(
    optimizer = 'adam',
    loss = 'binary_crossentropy',
    metrics = (['accuracy'])
)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [78]:
# 4. TRAINING THE MODEL
from sklearn.utils.class_weight import compute_class_weight

weights = compute_class_weight(class_weight='balanced', classes=np.unique(y_train), y=y_train)
class_weights = {0: weights[0], 1: weights[1]}
model.fit(x_train, y_train, epochs=100, validation_split=0.2, class_weight=class_weights, verbose=1)

Epoch 1/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 23ms/step - accuracy: 0.4053 - loss: 5.3141 - val_accuracy: 0.6179 - val_loss: 0.6943
Epoch 2/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5706 - loss: 1.4274 - val_accuracy: 0.6098 - val_loss: 0.6904
Epoch 3/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.5694 - loss: 1.0299 - val_accuracy: 0.6098 - val_loss: 0.6898
Epoch 4/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6353 - loss: 0.8510 - val_accuracy: 0.6098 - val_loss: 0.6893
Epoch 5/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.6743 - loss: 0.7550 - val_accuracy: 0.6098 - val_loss: 0.6891
Epoch 6/100
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 11ms/step - accuracy: 0.6268 - loss: 0.7791 - val_accuracy: 0.6098 - val_loss: 0.6888
Epoch 7/100
[1m16/16[0m [32

<keras.src.callbacks.history.History at 0x7f9cfaae1f90>

In [79]:
# 5. MODEL EVALUATION

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# make predictions
y_prediction = (model.predict(x_test) > 0.5).astype('int32')

print("Accuracy :\n", accuracy_score(y_test, y_prediction))
print("\n\n confusion_matrix :\n", accuracy_score(y_test, y_prediction))
print("\n\n Classification Report :\n", classification_report(y_test, y_prediction))

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Accuracy :
 0.6428571428571429


 confusion_matrix :
 0.6428571428571429


 Classification Report :
               precision    recall  f1-score   support

           0       0.64      1.00      0.78        99
           1       0.00      0.00      0.00        55

    accuracy                           0.64       154
   macro avg       0.32      0.50      0.39       154
weighted avg       0.41      0.64      0.50       154



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [80]:
# savin the model
model.save('diabetes_prediction.h5')
# loading the model



In [91]:
# 6. DEPLOYMENT
# using FASTAPI & keras

from google.colab import files
files.download("diabetes_prediction.h5")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>