In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import StratifiedKFold
import numpy as np

#  Load the Dataset
data = pd.read_csv("diabetes2.0.csv")

#  Preprocess the Data

target_column = 'target'
if target_column not in data.columns:
    target_column = data.columns[-1]

X = data.drop(target_column, axis=1)
y = data[target_column]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

#  Split the Data
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

#  Define ANN Model Function
def create_model():
    model = Sequential([
        Dense(64, activation='relu', input_shape=(X_train.shape[1],)),
        Dropout(0.2),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

#  Cross-Validation
kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
cv_scores = []

for train, val in kfold.split(X_train, y_train):
    model = create_model()
    model.fit(X_train[train], y_train.iloc[train], epochs=50, batch_size=32, verbose=0)
    scores = model.evaluate(X_train[val], y_train.iloc[val], verbose=0)
    cv_scores.append(scores[1])

#  Evaluate the Model
print("Cross-Validation Accuracy:", np.mean(cv_scores))

#  Evaluate on Test Set
final_model = create_model()
final_model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0)
test_loss, test_accuracy = final_model.evaluate(X_test, y_test)
print("Test Accuracy:", test_accuracy)


Cross-Validation Accuracy: 0.7654804706573486
Test Accuracy: 0.7402597665786743
