In [5]:
# imoort python libraries: numpy, pandas, matplotlib, seaborn, keras, sklearn
import pandas as pd
import numpy as np
import os


# import libraries for machine learning
from sklearn.model_selection import cross_val_predict, StratifiedKFold ,cross_val_score
from sklearn.model_selection import KFold

#no Warning
import warnings
warnings.filterwarnings("ignore")

In [6]:
#StandardScaler
from sklearn.preprocessing import StandardScaler


# Import libraries for deep learning
from tensorflow.keras.models import Sequential  # For initializing the neural network
from tensorflow.keras.layers import Dense, Input , Dropout  # For adding layers in the neural network
from scikeras.wrappers import KerasClassifier
from tensorflow.keras.utils import to_categorical  # For using Keras with numpy

In [7]:
# Load the dataset no column name is provided
file_path = 'DATA/pima_diabetes.csv'
df = pd.read_csv(file_path, header=None)

# Display the first few rows of the dataset
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [5]:
# Assigning column names
column_names = ['preg', 'plas', 'pres', 'skin', 'insu', 'mass', 'pedi', 'age', 'class']
df.columns = column_names

# Display the first few rows of the dataset with proper column names
df.head()

Unnamed: 0,preg,plas,pres,skin,insu,mass,pedi,age,class
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [6]:
# Separate features and target variable
X = df.drop(columns=['class'])
y = df['class']

In [7]:
random_seed = 7
np.random.seed(random_seed)

In [10]:
# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [8]:
# Define the baseline model
def baseline_model():
    model = Sequential()
    model.add(Input(shape=(8,)))  # Adjust the shape according to your input features
    model.add(Dense(12, activation='relu'))
    model.add(Dense(7, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

#### Experiment with more layers and different activation functions.

In [9]:
# Define the improved model
def improved_model():
    model = Sequential()
    model.add(Input(shape=(8,)))
    model.add(Dense(16, activation='relu'))  # Increased neurons
    model.add(Dense(12, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

#### Regularization Techniques
##### Add dropout layers and/or L2 regularization.

In [10]:
# Define the regularized model
def regularized_model():
    model = Sequential()
    model.add(Input(shape=(8,)))
    model.add(Dense(16, activation='relu', kernel_regularizer='l2'))
    model.add(Dropout(0.5))  # Dropout layer to prevent overfitting
    model.add(Dense(12, activation='relu', kernel_regularizer='l2'))
    model.add(Dense(8, activation='relu', kernel_regularizer='l2'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [11]:
# Function to evaluate model
def evaluate_model(model_fn, X, y):
    model = KerasClassifier(build_fn=model_fn, epochs=150, batch_size=10, verbose=0)
    kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=42)
    results = cross_val_score(model, X, y, cv=kfold)
    return results.mean(), results.std()


In [26]:
# Evaluate the baseline model
baseline_accuracy, baseline_std = evaluate_model(baseline_model, X_scaled, y)


In [27]:

# Evaluate the improved model
improved_accuracy, improved_std = evaluate_model(improved_model, X_scaled, y)


In [30]:

# Evaluate the regularized model
regularized_accuracy, regularized_std = evaluate_model(regularized_model, X_scaled, y)


In [31]:

# Print the results
print(f"Baseline Model Accuracy: {baseline_accuracy*100:.2f}% (+/- {baseline_std*100:.2f}%)")
print(f"Improved Model Accuracy: {improved_accuracy*100:.2f}% (+/- {improved_std*100:.2f}%)")
print(f"Regularized Model Accuracy: {regularized_accuracy*100:.2f}% (+/- {regularized_std*100:.2f}%)")

Baseline Model Accuracy: 75.25% (+/- 4.98%)
Improved Model Accuracy: 73.82% (+/- 3.51%)
Regularized Model Accuracy: 76.17% (+/- 2.36%)
