In [3]:
# Question 9: Hyperparameter Optimization for Hidden Layers
# Description: Optimize the number of units in the hidden layers using grid search.

In [4]:
import tensorflow as tf
from tensorflow import keras
from sklearn.model_selection import GridSearchCV
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from scikeras.wrappers import KerasClassifier # For Keras models with scikit-learn

# 1. Generate some dummy data (replace with your actual data)
# We create a synthetic dataset for a binary classification problem.
# n_samples: total number of samples
# n_features: total number of features
# n_informative: number of informative features
# n_redundant: number of redundant features
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_redundant=5, random_state=42)

# Split the data into training and testing sets.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Define a function to create the Keras model
# This function allows GridSearchCV to create new Keras models with varying 'n_units'.
def create_model(n_units=32):
    model = keras.Sequential([
        # Hidden layer with 'n_units' neurons and ReLU activation.
        # input_shape is crucial for the first layer, matching the number of features in X_train.
        keras.layers.Dense(n_units, activation='relu', input_shape=(X_train.shape[1],)),
        # Output layer with 1 neuron and sigmoid activation for binary classification.
        keras.layers.Dense(1, activation='sigmoid')
    ])
    # Compile the model with Adam optimizer, binary cross-entropy loss, and accuracy metric.
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# 3. Create a KerasClassifier for use with GridSearchCV
# The KerasClassifier wrapper makes your Keras model compatible with scikit-learn's API.
# IMPORTANT: You need to install scikeras: pip install scikeras
keras_model = KerasClassifier(model=create_model, verbose=0, n_units=32) # Pass a default value for n_units

# 4. Define the hyperparameter search space
# This dictionary specifies the hyperparameters and the values to explore.
param_grid = {
    'model__n_units': [16, 32, 64, 128], # Number of units (neurons) in the hidden layer
    'batch_size': [32, 64],              # Batch size for training
    'epochs': [10, 20]                   # Number of training epochs
}

# 5. Initialize GridSearchCV
# GridSearchCV performs an exhaustive search over the specified parameter values.
grid_search = GridSearchCV(estimator=keras_model,
                           param_grid=param_grid,
                           scoring='accuracy', # Metric to optimize (e.g., accuracy, neg_log_loss)
                           cv=3,               # Number of folds for cross-validation
                           verbose=2,          # Verbosity level (2 shows progress during search)
                           n_jobs=-1)          # Use all available CPU cores for parallel processing

# 6. Perform the grid search
# This step trains and evaluates the model for each combination of hyperparameters
# using the specified cross-validation strategy.
print("Starting Grid Search...")
grid_search.fit(X_train, y_train)
print("Grid Search completed.")

# 7. Print the results
print("\n--- Grid Search Results ---")
print(f"Best parameters found: {grid_search.best_params_}")
print(f"Best cross-validation accuracy: {grid_search.best_score_:.4f}")

# 8. Evaluate the best model on the test set
# Retrieve the best model found by GridSearchCV, which is already trained.
best_model = grid_search.best_estimator_
# Evaluate its performance on the unseen test data to get an unbiased estimate.
test_loss, test_accuracy = best_model.score(X_test, y_test)

Starting Grid Search...


AttributeError: 'super' object has no attribute '__sklearn_tags__'