<a href="https://colab.research.google.com/github/Jirtus-sanasam/MLP-Diabetes/blob/main/Diabetes7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [25]:
# Initial import
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [26]:
df = pd.read_csv("/content/diabetes_data2.csv")

In [27]:
# Separate the independent and dependent variable

X = df.drop("Outcome", axis=1)
Y = df['Outcome']

In [28]:
# Split full dataset into training and testing sets
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y,
    test_size=0.2,
    random_state=42,
    stratify=Y  # important for imbalanced medical data
)


In [29]:
# Split training data into train and validation
X_train, X_val, Y_train, Y_val = train_test_split(
    X_train, Y_train,
    test_size=0.2,
    random_state=42,
    stratify=Y_train
)


In [30]:
# Initialize scaler
scaler = StandardScaler()

# Fit only on training data
X_train_scaled = scaler.fit_transform(X_train)

# Transform validation and test data
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)


In [31]:
Y_train = np.array(Y_train)
Y_val = np.array(Y_val)
Y_test = np.array(Y_test)


In [32]:
# Data for GA hyperparameter tuning
X_ga_train = X_train_scaled
Y_ga_train = Y_train

X_ga_val = X_val_scaled
Y_ga_val = Y_val


In [38]:
import random
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
import numpy as np

class GeneticAlgorithmMLPTuner:
    def __init__(self, X_train, y_train, X_val, y_val, population_size=10, generations=10, mutation_rate=0.1):
        self.X_train = X_train
        self.y_train = y_train
        self.X_val = X_val
        self.y_val = y_val
        self.input_dim = X_train.shape[1]
        self.population_size = population_size
        self.generations = generations
        self.mutation_rate = mutation_rate

    def _create_individual(self):
        # Define ranges for hyperparameters
        num_hidden_layers = np.random.randint(1, 4)  # 1 to 3 hidden layers
        hidden_layer_sizes = [int(2**np.random.randint(4, 8)) for _ in range(num_hidden_layers)] # 16 to 128 units
        learning_rate = 10**np.random.uniform(-4, -2) # 0.0001 to 0.01
        dropout_rate = np.random.uniform(0.0, 0.5)
        activation = np.random.choice(['relu', 'tanh', 'sigmoid'])
        optimizer_choice = np.random.choice(['adam', 'rmsprop'])

        return {
            'num_hidden_layers': num_hidden_layers,
            'hidden_layer_sizes': hidden_layer_sizes,
            'learning_rate': learning_rate,
            'dropout_rate': dropout_rate,
            'activation': activation,
            'optimizer_choice': optimizer_choice
        }

    def _build_model(self, individual):
        model = Sequential()
        # The first hidden layer
        model.add(Dense(individual['hidden_layer_sizes'][0], activation=individual['activation'], input_dim=self.input_dim))
        if individual['dropout_rate'] > 0:
            model.add(Dropout(individual['dropout_rate']))

        # Additional hidden layers
        for i in range(1, individual['num_hidden_layers']):
            # Ensure the index 'i' is within the bounds of hidden_layer_sizes
            # This check becomes redundant with the fix in _crossover
            if i < len(individual['hidden_layer_sizes']):
                model.add(Dense(individual['hidden_layer_sizes'][i], activation=individual['activation']))
                if individual['dropout_rate'] > 0:
                    model.add(Dropout(individual['dropout_rate']))
            else:
                # This else block should ideally not be reached with the crossover fix
                # but provides robustness
                break

        model.add(Dense(1, activation='sigmoid')) # Binary classification

        if individual['optimizer_choice'] == 'adam':
            optimizer = Adam(learning_rate=individual['learning_rate'])
        else:
            optimizer = tf.keras.optimizers.RMSprop(learning_rate=individual['learning_rate'])

        model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
        return model

    def _evaluate_individual(self, individual):
        model = self._build_model(individual)
        model.fit(self.X_train, self.y_train, epochs=10, batch_size=32, verbose=0, validation_data=(self.X_val, self.y_val))
        loss, accuracy = model.evaluate(self.X_val, self.y_val, verbose=0)
        return accuracy

    def _select_parents(self, population, fitnesses):
        # Tournament selection
        tournament_size = 3
        parents = []
        for _ in range(2): # Select two parents
            contenders = np.random.choice(len(population), tournament_size, replace=False)
            winner_idx = contenders[np.argmax([fitnesses[i] for i in contenders])]
            parents.append(population[winner_idx])
        return parents

    def _crossover(self, parent1, parent2):
        child = {}
        # Ensure num_hidden_layers and hidden_layer_sizes come from the same parent
        if random.random() < 0.5:
            child['num_hidden_layers'] = parent1['num_hidden_layers']
            child['hidden_layer_sizes'] = parent1['hidden_layer_sizes']
        else:
            child['num_hidden_layers'] = parent2['num_hidden_layers']
            child['hidden_layer_sizes'] = parent2['hidden_layer_sizes']

        # Crossover other parameters independently
        for key in parent1.keys():
            if key not in ['num_hidden_layers', 'hidden_layer_sizes']:
                child[key] = random.choice([parent1[key], parent2[key]])
        return child

    def _mutate(self, individual):
        for key in individual.keys():
            if np.random.rand() < self.mutation_rate:
                if key == 'num_hidden_layers':
                    # When num_hidden_layers mutates, hidden_layer_sizes must be re-generated
                    new_num_hidden_layers = np.random.randint(1, 4)
                    individual['num_hidden_layers'] = new_num_hidden_layers
                    individual['hidden_layer_sizes'] = [int(2**np.random.randint(4, 8)) for _ in range(new_num_hidden_layers)]
                elif key == 'hidden_layer_sizes':
                    # If only hidden_layer_sizes mutates, re-generate based on current num_hidden_layers
                    individual[key] = [int(2**np.random.randint(4, 8)) for _ in range(individual['num_hidden_layers'])]
                elif key == 'learning_rate':
                    individual[key] = 10**np.random.uniform(-4, -2)
                elif key == 'dropout_rate':
                    individual[key] = np.random.uniform(0.0, 0.5)
                elif key == 'activation':
                    individual[key] = np.random.choice(['relu', 'tanh', 'sigmoid'])
                elif key == 'optimizer_choice':
                    individual[key] = np.random.choice(['adam', 'rmsprop'])
        return individual

    def evolve(self, verbose=1):
        population = [self._create_individual() for _ in range(self.population_size)]
        best_individual = None
        best_fitness = -np.inf

        for generation in range(self.generations):
            fitnesses = [self._evaluate_individual(individual) for individual in population]
            current_best_gen_fitness = max(fitnesses)
            current_best_gen_individual = population[np.argmax(fitnesses)]

            if current_best_gen_fitness > best_fitness:
                best_fitness = current_best_gen_fitness
                best_individual = current_best_gen_individual

            if verbose > 0:
                print(f"Generation {generation+1}/{self.generations} - Best Fitness: {best_fitness:.4f}")

            new_population = [best_individual] # Keep the best individual (elitism)

            while len(new_population) < self.population_size:
                parent1, parent2 = self._select_parents(population, fitnesses)
                child = self._crossover(parent1, parent2)
                child = self._mutate(child)
                new_population.append(child)

            population = new_population

        return best_individual, best_fitness

In [None]:
ga_param_grid = {
    'population_size': [10, 15, 20],
    'generations': [5, 8, 10],
    'mutation_rate': [0.1, 0.2, 0.3]
}

ga_param_tuner = GAParameterTuner(
    X_train, Y_train,
    X_val, Y_val
)

best_ga_params, ga_score = ga_param_tuner.grid_search(
    ga_param_grid,
    n_trials=2
)

print("\nBest GA Parameters:")
print(best_ga_params)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **

GA Params: {'population_size': 10, 'generations': 5, 'mutation_rate': 0.1} → Avg Fitness: 0.7520


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **

GA Params: {'population_size': 10, 'generations': 5, 'mutation_rate': 0.2} → Avg Fitness: 0.7276


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **