In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import tensorflow as tf
from tensorflow import keras


In [28]:
df = pd.read_csv("diabetes.csv")
df.head()


Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [29]:
X = df.drop("Outcome", axis=1)
y = df["Outcome"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

X.replace(0, np.nan, inplace=True)
X.fillna(X.mean(), inplace=True)

# Check
print(X.isna().sum())
print(y.unique())  # Should be [0, 1]


print("y_train mean:", y_train.mean())
print("y_test mean:", y_test.mean())
print("y_train unique labels:", y_train.unique())
print("y_test unique labels:", y_test.unique())



Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
dtype: int64
[1 0]
y_train mean: 0.3469055374592834
y_test mean: 0.35714285714285715
y_train unique labels: [0 1]
y_test unique labels: [0 1]


In [30]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [31]:
model = keras.Sequential([
    keras.layers.Dense(16, activation='relu', input_shape=(X_train.shape[1],)),
    keras.layers.Dense(8, activation='relu'),
    keras.layers.Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)

model.summary()


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [32]:
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=8,
    validation_split=0.2,
    verbose=1
)


Epoch 1/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 15ms/step - accuracy: 0.3827 - loss: 0.7420 - val_accuracy: 0.6098 - val_loss: 0.6809
Epoch 2/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6614 - loss: 0.6763 - val_accuracy: 0.6911 - val_loss: 0.6658
Epoch 3/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7632 - loss: 0.6487 - val_accuracy: 0.6911 - val_loss: 0.6489
Epoch 4/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7197 - loss: 0.6345 - val_accuracy: 0.6829 - val_loss: 0.6300
Epoch 5/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7647 - loss: 0.5959 - val_accuracy: 0.6911 - val_loss: 0.6046
Epoch 6/100
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7706 - loss: 0.5624 - val_accuracy: 0.6911 - val_loss: 0.5822
Epoch 7/100
[1m62/62[0m [32m━━

In [33]:
loss, acc = model.evaluate(X_test, y_test)
print("Test Accuracy:", acc)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7424 - loss: 0.5641 
Test Accuracy: 0.7402597665786743


In [34]:
model.predict(X_test[:5])


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step


array([[0.4188405 ],
       [0.05193955],
       [0.04464507],
       [0.3864501 ],
       [0.2960895 ]], dtype=float32)

In [35]:
print(X_train.shape, X_test.shape)
print(y_train.mean(), y_test.mean())


(614, 8) (154, 8)
0.3469055374592834 0.35714285714285715


In [36]:
print("y_train mean:", y_train.mean())
print("y_test mean:", y_test.mean())
print("y_train unique labels:", y_train.unique())
print("y_test unique labels:", y_test.unique())


y_train mean: 0.3469055374592834
y_test mean: 0.35714285714285715
y_train unique labels: [0 1]
y_test unique labels: [0 1]


In [37]:
loss, acc = model.evaluate(X_test, y_test)
print("Test Accuracy:", acc)


[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7424 - loss: 0.5641 
Test Accuracy: 0.7402597665786743


In [38]:
predictions = model.predict(X_test[:5])
predictions


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step


array([[0.4188405 ],
       [0.05193955],
       [0.04464507],
       [0.3864501 ],
       [0.2960895 ]], dtype=float32)