dataset: https://www.kaggle.com/datasets/uciml/pima-indians-diabetes-database

In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

In [11]:
data = pd.read_csv('diabetes.csv')

In [12]:
data.shape

(768, 9)

In [13]:
for col in ['Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI']:
    data[col] = data[col].replace(0, np.nan)
    data[col] = data[col].fillna(data[col].mean())

In [14]:
X = data.drop('Outcome', axis=1)
y = data['Outcome']

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [16]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)

In [17]:
X_test_scaled = scaler.transform(X_test)

In [56]:
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.25),
    Dense(32, activation='relu'),
    Dropout(0.25),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

In [57]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [58]:
model.fit(X_train_scaled, y_train, epochs=20, batch_size=32, validation_split=0.2, verbose=1)

Epoch 1/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 49ms/step - accuracy: 0.4827 - loss: 0.7101 - val_accuracy: 0.6016 - val_loss: 0.6761
Epoch 2/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.6864 - loss: 0.6520 - val_accuracy: 0.6504 - val_loss: 0.6427
Epoch 3/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step - accuracy: 0.6843 - loss: 0.6100 - val_accuracy: 0.6423 - val_loss: 0.6004
Epoch 4/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - accuracy: 0.7149 - loss: 0.5735 - val_accuracy: 0.6585 - val_loss: 0.5569
Epoch 5/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 21ms/step - accuracy: 0.7434 - loss: 0.5286 - val_accuracy: 0.7073 - val_loss: 0.5259
Epoch 6/20
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.7291 - loss: 0.5216 - val_accuracy: 0.7480 - val_loss: 0.4949
Epoch 7/20
[1m16/16[0m [32m━━━━

<keras.src.callbacks.history.History at 0x1d536654670>

In [59]:
loss, accuracy = model.evaluate(X_test_scaled, y_test)
print(f'Test Accuracy: {accuracy:.4f}')

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step - accuracy: 0.7792 - loss: 0.4943 
Test Accuracy: 0.7792
