# DL Prediction Enhancing:

Try different network architectures like deeper or wider networks, adding dropout or batch normalization for regularization. This can help improve accuracy.

Tune hyperparameters like learning rate, batch size, number of epochs etc. through grid search or Bayesian optimization to find the optimal values.

Use k-fold cross validation to get a better estimate of model performance and reduce overfitting.

Try ensembling models like averaging predictions from multiple models. This can boost performance.

Use more advanced optimization algorithms like Adam, RMSprop instead of basic SGD.

Add momentum to help accelerate training.

Try different activation functions like ReLU, LeakyReLU etc.

Use regularization techniques like L1/L2 regularization, dropout to reduce overfitting.

Get more data if possible. More data usually results in better deep learning models.

Visualize activations and feature maps to better understand what the network is learning.

# MLP

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns
from scipy import stats

from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from mlxtend.plotting import plot_confusion_matrix
from sklearn.linear_model import LogisticRegressionCV
from sklearn.svm import SVC
from sklearn import svm
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.utils import shuffle
from sklearn.svm import SVC, LinearSVC
from sklearn.ensemble import VotingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score , classification_report,ConfusionMatrixDisplay,precision_score,recall_score, f1_score,roc_auc_score,roc_curve, balanced_accuracy_score
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from mlxtend.feature_selection import ExhaustiveFeatureSelector as EFS
import tensorflow as tf 
tf.random.set_seed(3)
from tensorflow import keras

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error


diabetes_data_2=pd.read_csv("Diabetes_dataset.csv")

X =diabetes_data_2.drop(columns='Diabetes',axis=1)
Y = diabetes_data_2['Diabetes']

# Standardize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Define the MLP model

model = Sequential() 
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(256, activation='relu')) 
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Compile the model
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("Mean Squared Error: ", mse)
print("Mean Absolute Error: ", mae)

# MLP Wider network

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error


diabetes_data_2=pd.read_csv("Diabetes_dataset.csv")

X =diabetes_data_2.drop(columns='Diabetes',axis=1)
Y = diabetes_data_2['Diabetes']

# Standardize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Define the MLP model


model = Sequential()
model.add(Dense(512, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(512, activation='relu'))  
model.add(Dense(1, activation='sigmoid'))
# Compile the model
model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("Mean Squared Error: ", mse)
print("Mean Absolute Error: ", mae)

# MLP Tune Hyperparameters 

In [None]:
# Grid search 
batch_size = [16, 32, 64]
learning_rate = [0.01, 0.001, 0.0001]
param_grid = dict(batch_size=batch_size, learning_rate=learning_rate)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X_train, y_train)

# Bayesian optimization
from bayes_opt import BayesianOptimization

def fitness(learning_rate, epochs):
    model.compile(learning_rate=learning_rate, epochs=epochs) 
    history = model.fit(X_train, y_train)
    return history.history['val_accuracy'][-1]

bayes_opt = BayesianOptimization(fitness, {'learning_rate': (0.001, 0.1),  
                                           'epochs': (5, 30)})
bayes_opt.maximize()
best_params = bayes_opt.max['params']

# Using Score function for Hyper parameter

In [None]:
from keras.models import Model
from keras.layers import Dense, Flatten
from keras.optimizers import Adam
import numpy as np

diabetes_data_2=pd.read_csv("Diabetes_dataset.csv")

x =diabetes_data_2.drop(columns='Diabetes',axis=1)
y = diabetes_data_2['Diabetes']

# Standardize the data
scaler = StandardScaler()
x = scaler.fit_transform(x)

# Split the data into training and test sets
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

class MyModel(Model):
    def __init__(self, num_classes):
        super(MyModel, self).__init__()
        self.dense = Dense(num_classes, activation='sigmoid')        
        
    def call(self, x):
        x = Flatten()(x)
        return self.dense(x)
    
    def score(self, x, y):
        y_pred = self.predict(x)
        accuracy = np.mean(np.equal(np.round(y_pred), y))
        return accuracy
    
model = MyModel(num_classes=2)
model.compile(loss='mean_squared_error', optimizer=Adam(), metrics=['accuracy'])


accuracy = model.score(x_test, y_test)
print("Accuracy: ", accuracy)

# Using Gan

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

# Load the credit card dataset
data = pd.read_csv('Diabetes_dataset.csv')

# Separate features and target
X = data.drop('Diabetes', axis=1)
y = data['Diabetes']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Define the generator and discriminator networks
from tensorflow.keras.layers import Dense, Input, LeakyReLU
from tensorflow.keras.models import Model

# Generator network
generator_input = Input(shape=(X_train.shape[1],))
generator_hidden = Dense(128, activation='relu')(generator_input)
generator_hidden = Dense(64, activation='relu')(generator_hidden)
generator_output = Dense(X_train.shape[1], activation='tanh')(generator_hidden)

generator = Model(inputs=generator_input, outputs=generator_output)

# Discriminator network
discriminator_input = Input(shape=(X_train.shape[1],))
discriminator_hidden = Dense(128, activation='relu')(discriminator_input)
discriminator_hidden = Dense(64, activation='relu')(discriminator_hidden)
discriminator_output = Dense(1, activation='sigmoid')(discriminator_hidden)

discriminator = Model(inputs=discriminator_input, outputs=discriminator_output)

# Compile the generator and discriminator
generator.compile(loss='binary_crossentropy', optimizer='adam')
discriminator.compile(loss='binary_crossentropy', optimizer='adam')

# Train the generator and discriminator
batch_size = 32
epochs = 100

for epoch in range(epochs):
    # Generate synthetic data
    synthetic_data = generator.predict(np.random.normal(size=(batch_size, X_train.shape[1])))

    # Combine synthetic and real data
    X_combined = np.concatenate([X_train[:batch_size], synthetic_data], axis=0)
    y_combined = np.concatenate([np.ones(batch_size), np.zeros(batch_size)], axis=0)

    # Train the discriminator
    discriminator.train_on_batch(X_combined, y_combined)

    # Train the generator
    noise = np.random.normal(size=(batch_size, X_train.shape[1]))
    generator_labels = np.ones(batch_size)
    discriminator.trainable = False
    generator.train_on_batch(noise, generator_labels)
    discriminator.trainable = True

# Augment the training data with synthetic data
X_augmented = np.concatenate([X_train, synthetic_data], axis=0)
y_augmented = np.concatenate([y_train, np.ones(batch_size)], axis=0)

# Train the model with augmented data
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(n_estimators=100)
model.fit(X_augmented, y_augmented)

# Evaluate the model on the test set
y_pred = model.predict(X_test)

print('Accuracy:', accuracy_score(y_test, y_pred))
print('Recall:', recall_score(y_test, y_pred))
print('Precision:', precision_score(y_test, y_pred))
print('F1 score:', f1_score(y_test, y_pred))

# K Fold

In [None]:
from keras.models import Model
from keras.layers import Dense, Flatten
from keras.optimizers import Adam
import numpy as np

diabetes_data_2=pd.read_csv("Diabetes_dataset.csv")

X =diabetes_data_2.drop(columns='Diabetes',axis=1)
y = diabetes_data_2['Diabetes']

# Standardize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.model_selection import KFold

kf = KFold(n_splits=5, shuffle=True)
histories = []
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index] 
    y_train, y_test = y[train_index], y[test_index]
    
    model = Sequential()
    model.add(Dense(512, input_dim=X_train.shape[1], activation='relu'))
    model.add(Dense(512, activation='relu'))  
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])

# Train the model
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)
    #history = model.fit(X_train, y_train) 
    histories.append(history)

performance = [history.history['val_accuracy'][-1] for history in histories]
print("Average 5-Fold Validation Accuracy: %.2f" % (sum(performance)/len(performance)))

# Two model Combine Testing

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error


diabetes_data_2=pd.read_csv("Diabetes_dataset.csv")

X =diabetes_data_2.drop(columns='Diabetes',axis=1)
Y = diabetes_data_2['Diabetes']

# Standardize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Define the MLP model


In [None]:
# Train two different models
model1 = Sequential()
model1.add(Dense(64, input_dim=17, activation='relu'))
model1.add(Dense(1, activation='sigmoid'))

model2 = Sequential() 
model2.add(Dense(32, input_dim=17, activation='relu'))
model2.add(Dense(1, activation='sigmoid'))

# Make predictions and average
pred1 = model1.predict(X_test)
pred2 = model2.predict(X_test)

y_pred = (pred1 + pred2) / 2

In [None]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("Mean Squared Error: ", mse)
print("Mean Absolute Error: ", mae)

In [None]:
# Compile the model
model1.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])

# Train the model
history = model1.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)


# Learning rate adding to model

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error


diabetes_data_2=pd.read_csv("Diabetes_dataset.csv")

X =diabetes_data_2.drop(columns='Diabetes',axis=1)
Y = diabetes_data_2['Diabetes']

# Standardize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Define the MLP model

model = Sequential() 
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(256, activation='relu')) 
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
# Compile the model
model.compile(loss='binary_crossentropy', 
              optimizer=Adam(learning_rate=0.01),
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("Mean Squared Error: ", mse)
print("Mean Absolute Error: ", mae)

# Adding momentum and SGD

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error


diabetes_data_2=pd.read_csv("Diabetes_dataset.csv")

X =diabetes_data_2.drop(columns='Diabetes',axis=1)
Y = diabetes_data_2['Diabetes']

# Standardize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Define the MLP model

model = Sequential() 
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(256, activation='relu')) 
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))


from tensorflow.keras.optimizers import SGD

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='binary_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("Mean Squared Error: ", mse)
print("Mean Absolute Error: ", mae)

# LeakyReLU Activation function:

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error


diabetes_data_2=pd.read_csv("Diabetes_dataset.csv")

X =diabetes_data_2.drop(columns='Diabetes',axis=1)
Y = diabetes_data_2['Diabetes']

# Standardize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Define the MLP model

model = Sequential() 
from tensorflow.keras.layers import LeakyReLU

model.add(Dense(64, activation=LeakyReLU(alpha=0.05)))
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(256, activation='relu')) 
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))


from tensorflow.keras.optimizers import SGD

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='binary_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("Mean Squared Error: ", mse)
print("Mean Absolute Error: ", mae)

# Adding Regularization:

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error
from tensorflow.keras.layers import LSTM, Dense, Dropout


diabetes_data_2=pd.read_csv("Diabetes_dataset.csv")

X =diabetes_data_2.drop(columns='Diabetes',axis=1)
Y = diabetes_data_2['Diabetes']

# Standardize the data
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Define the MLP model

model = Sequential() 
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.regularizers import l2
model.add(Dense(64, kernel_regularizer=l2(0.01)))  
model.add(Dropout(0.5))
#model.add(Dense(64, activation=LeakyReLU(alpha=0.05)))
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(256, activation='relu')) 
model.add(Dense(512, activation='relu'))
model.add(Dense(1, activation='sigmoid'))


from tensorflow.keras.optimizers import SGD

opt = SGD(learning_rate=0.01, momentum=0.9)

model.compile(loss='binary_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("Mean Squared Error: ", mse)
print("Mean Absolute Error: ", mae)