In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
import tensorflow as tf

def load_and_preprocess_data(filepath):
    df = pd.read_csv(filepath)
    df = df.drop(columns=['Unnamed: 0'])
    X = df.drop('Outcome', axis=1)
    y = df['Outcome']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=28)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled, y_train, y_test

def build_and_train_nn_model(X_train_scaled, y_train):
    model = Sequential([
        Input(shape=(X_train_scaled.shape[1],)),
        Dense(64, activation='relu'),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    history = model.fit(X_train_scaled, y_train, epochs=100, batch_size=64, validation_split=0.2, verbose=0)
    return model, history

def evaluate_model(model, X_test_scaled, y_test):
    loss, accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
    return loss, accuracy

def build_and_evaluate_ensemble(X_train_scaled, X_test_scaled, y_train, y_test):
    rf_clf = RandomForestClassifier(n_estimators=100, random_state=28)
    gb_clf = GradientBoostingClassifier(n_estimators=100, random_state=28)
    voting_clf = VotingClassifier(estimators=[('rf', rf_clf), ('gb', gb_clf)], voting='soft')
    voting_clf.fit(X_train_scaled, y_train)
    y_pred = voting_clf.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    conf_matrix = confusion_matrix(y_test, y_pred)
    return accuracy, conf_matrix

# Load and preprocess data
X_train_scaled, X_test_scaled, y_train, y_test = load_and_preprocess_data('diabetes_final.csv')

# Build, train and evaluate neural network model
nn_model, history = build_and_train_nn_model(X_train_scaled, y_train)
nn_loss, nn_accuracy = evaluate_model(nn_model, X_test_scaled, y_test)

# Build and evaluate ensemble model
ensemble_accuracy, ensemble_conf_matrix = build_and_evaluate_ensemble(X_train_scaled, X_test_scaled, y_train, y_test)

# Output results
print(f"Neural Network Loss: {nn_loss}, Accuracy: {nn_accuracy}")
print(f"Ensemble Model Accuracy: {ensemble_accuracy}")
print(f"Ensemble Model Confusion Matrix:\n{ensemble_conf_matrix}")


Neural Network Loss: 0.5791845321655273, Accuracy: 0.7467532753944397
Ensemble Model Accuracy: 0.7922077922077922
Ensemble Model Confusion Matrix:
[[89 14]
 [18 33]]
