ClimateWins Deep Learning Hyperparameter Optimization
Exercise 2.4 - Part 2: CNN/RNN Bayesian Optimization

Task: Find optimized hyperparameters for Deep Learning models to predict 
      safe flying days for Air Ambulance company using Bayesian Optimization

Author: Data Science Bootcamp - Exercise 2.4
Estimated Time: 2-3 hours (includes 30-60 min optimization runtime)

In [1]:
# ============================================================================
# SECTION 1: IMPORTS AND SETUP
# ============================================================================
print("="*80)
print("SECTION 1: IMPORTS AND SETUP FOR DEEP LEARNING OPTIMIZATION")
print("="*80)

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Standard ML libraries
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.metrics import make_scorer, accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler
from math import floor

# NEW LIBRARIES FOR BAYESIAN OPTIMIZATION (as per task requirements)
print("\n→ Importing Keras and optimization libraries...")

# IMPORTANT: Install these libraries first if not already installed:
# pip install tensorflow
# pip install bayesian-optimization
# pip install scikeras

# Modern TensorFlow/Keras imports (TensorFlow 2.x)
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, Dense, Dropout, BatchNormalization, Flatten, MaxPooling1D
from tensorflow.keras.layers import LSTM, SimpleRNN, LeakyReLU
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam, SGD, RMSprop, Adadelta, Adagrad, Adamax, Nadam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# For TensorFlow 2.x, use scikeras instead of keras.wrappers
try:
    from scikeras.wrappers import KerasClassifier
    print("✓ Using scikeras (TensorFlow 2.x compatible)")
except ImportError:
    print("⚠ scikeras not found. Installing...")
    import subprocess
    subprocess.check_call(['pip', 'install', 'scikeras'])
    from scikeras.wrappers import KerasClassifier

# Bayesian Optimization
try:
    from bayes_opt import BayesianOptimization
    print("✓ Bayesian Optimization library loaded")
except ImportError:
    print("⚠ bayesian-optimization not found. Installing...")
    import subprocess
    subprocess.check_call(['pip', 'install', 'bayesian-optimization'])
    from bayes_opt import BayesianOptimization

# Utility for checking target type
from sklearn.utils.multiclass import type_of_target

import time

# Set random seeds for reproducibility
np.random.seed(42)
import tensorflow as tf
tf.random.set_seed(42)

# Set plotting style
plt.style.use('fivethirtyeight')
sns.set_palette("husl")
pd.set_option("display.max_columns", None)

print("✓ All libraries imported successfully")
print("✓ Keras and Bayesian Optimization libraries loaded")


SECTION 1: IMPORTS AND SETUP FOR DEEP LEARNING OPTIMIZATION

→ Importing Keras and optimization libraries...
✓ Using scikeras (TensorFlow 2.x compatible)
✓ Bayesian Optimization library loaded
✓ All libraries imported successfully
✓ Keras and Bayesian Optimization libraries loaded


In [2]:
# ============================================================================
# SECTION 2: LOAD THE CLEANED DATA FROM EXERCISE 2.2
# ============================================================================
print("\n" + "="*80)
print("SECTION 2: LOADING CLEANED DATA FROM EXERCISE 2.2")
print("="*80)

# Define file paths
data_path = '/Users/davidscheider/anaconda_projects/ClimateWins/02 Data/Original Data/'
cleaned_file = 'Dataset-weather-prediction-dataset-CLEANED.csv'
pleasant_file = 'Dataset-Answers-Weather_Prediction_Pleasant_Weather.csv'

# Load the cleaned weather data
print("\n→ Loading cleaned weather observations...")
df_weather = pd.read_csv(data_path + cleaned_file)
print(f"✓ Weather data loaded: {df_weather.shape}")

# Load pleasant weather labels
df_pleasant = pd.read_csv(data_path + pleasant_file)
print(f"✓ Pleasant weather labels loaded: {df_pleasant.shape}")

# Merge data
df_merged = pd.merge(df_weather, df_pleasant, on='DATE', how='inner')
print(f"✓ Merged data shape: {df_merged.shape}")

# Get pleasant weather columns
pleasant_cols = [col for col in df_merged.columns if 'pleasant_weather' in col]
print(f"✓ Found {len(pleasant_cols)} weather stations")

# Create target variable (majority vote)
df_merged['pleasant_weather_majority'] = (df_merged[pleasant_cols].mean(axis=1) >= 0.5).astype(int)

print(f"\n✓ Target variable distribution:")
print(f"  Pleasant days: {df_merged['pleasant_weather_majority'].sum()}")
print(f"  Not pleasant days: {(df_merged['pleasant_weather_majority'] == 0).sum()}")
print(f"  Percentage pleasant: {df_merged['pleasant_weather_majority'].mean()*100:.1f}%")


SECTION 2: LOADING CLEANED DATA FROM EXERCISE 2.2

→ Loading cleaned weather observations...
✓ Weather data loaded: (22950, 137)
✓ Pleasant weather labels loaded: (22950, 16)
✓ Merged data shape: (22950, 152)
✓ Found 15 weather stations

✓ Target variable distribution:
  Pleasant days: 3993
  Not pleasant days: 18957
  Percentage pleasant: 17.4%


In [3]:
# ============================================================================
# SECTION 3: PREPARE DATA FOR DEEP LEARNING
# ============================================================================
print("\n" + "="*80)
print("SECTION 3: PREPARING DATA FOR DEEP LEARNING MODEL")
print("="*80)

# Extract date information
df_merged['DATE'] = pd.to_datetime(df_merged['DATE'], format='%Y%m%d')
df_merged['year'] = df_merged['DATE'].dt.year

print(f"✓ Date range: {df_merged['year'].min()} to {df_merged['year'].max()}")

# Separate features and target
exclude_cols = ['pleasant_weather_majority', 'DATE', 'date', 'year', 'MONTH', 'PADDING_0', 'PADDING_1']
exclude_cols.extend(pleasant_cols)

feature_cols = [col for col in df_merged.columns 
                if col not in exclude_cols and df_merged[col].dtype in ['int64', 'float64']]

X = df_merged[feature_cols].copy()
y = df_merged['pleasant_weather_majority'].copy()

# Handle missing values
X = X.fillna(X.mean())
y = y.fillna(y.mode()[0])

print(f"\n✓ Initial shapes:")
print(f"  X: {X.shape}")
print(f"  y: {y.shape}")



SECTION 3: PREPARING DATA FOR DEEP LEARNING MODEL
✓ Date range: 1960 to 2022

✓ Initial shapes:
  X: (22950, 133)
  y: (22950,)


In [4]:
# ============================================================================
# SECTION 4: RESHAPE DATA FOR DEEP LEARNING (REQUIRED FORMAT)
# ============================================================================
print("\n" + "="*80)
print("SECTION 4: RESHAPING DATA TO REQUIRED FORMAT")
print("="*80)

print("\n→ Task requirement: Reshape X to (observations, timesteps, features)")
print("  Target shape: (22950, 15, 9)")
print("  This represents: 15 weather stations with 9 measurements each")

# Determine actual dimensions from our data
n_observations = len(X)
n_total_features = len(feature_cols)

print(f"\n✓ Current data dimensions:")
print(f"  Observations: {n_observations}")
print(f"  Total features: {n_total_features}")

# Calculate appropriate reshaping
# We need to reshape into (observations, timesteps, features)
# Assuming timesteps = number of stations, features = measurements per station

# Try to identify station structure
# Count how many unique station prefixes we have in feature names
station_prefixes = set()
for col in feature_cols:
    if '_' in col:
        station_prefixes.add(col.split('_')[0])

n_stations = len(station_prefixes) if len(station_prefixes) > 0 else 15
n_features_per_station = n_total_features // n_stations if n_stations > 0 else 9

print(f"\n✓ Detected structure:")
print(f"  Estimated stations (timesteps): {n_stations}")
print(f"  Features per station: {n_features_per_station}")

# Reshape X for CNN/RNN
# Format: (observations, timesteps, features)
timesteps = n_stations
input_dim = n_features_per_station

# Ensure we can reshape properly
if n_total_features % timesteps != 0:
    print(f"\n⚠ Warning: {n_total_features} features cannot be evenly divided by {timesteps} timesteps")
    print(f"→ Adjusting to nearest valid configuration...")
    
    # Pad or trim features to make it divisible
    target_features = timesteps * input_dim
    if n_total_features < target_features:
        # Pad with zeros
        padding_needed = target_features - n_total_features
        X_padded = np.zeros((n_observations, target_features))
        X_padded[:, :n_total_features] = X.values
        X = pd.DataFrame(X_padded)
        print(f"✓ Added {padding_needed} zero-padded features")
    else:
        # Trim excess features
        X = X.iloc[:, :target_features]
        print(f"✓ Trimmed to {target_features} features")

# Now reshape
X_reshaped = X.values.reshape(n_observations, timesteps, input_dim)

print(f"\n✓ Reshaped X to: {X_reshaped.shape}")
print(f"  Format: (observations={X_reshaped.shape[0]}, timesteps={X_reshaped.shape[1]}, features={X_reshaped.shape[2]})")



SECTION 4: RESHAPING DATA TO REQUIRED FORMAT

→ Task requirement: Reshape X to (observations, timesteps, features)
  Target shape: (22950, 15, 9)
  This represents: 15 weather stations with 9 measurements each

✓ Current data dimensions:
  Observations: 22950
  Total features: 133

✓ Detected structure:
  Estimated stations (timesteps): 15
  Features per station: 8

→ Adjusting to nearest valid configuration...
✓ Trimmed to 120 features

✓ Reshaped X to: (22950, 15, 8)
  Format: (observations=22950, timesteps=15, features=8)


In [5]:
# ============================================================================
# SECTION 5: PREPARE TARGET VARIABLE FOR BAYESIAN OPTIMIZATION
# ============================================================================
print("\n" + "="*80)
print("SECTION 5: PREPARING TARGET VARIABLE (CRITICAL FOR BAYESIAN OPTIMIZATION)")
print("="*80)

# Check current type of target
print(f"\n→ Checking target variable format...")
print(f"  Current y shape: {y.shape}")
print(f"  Current y type: {type_of_target(y)}")

# Task requirement: Bayesian optimization needs "multiclass" or "binary" format
# NOT "multilabel-indicator" format

# Our target should already be in correct format (single column with class labels)
Y_for_optimization = y.values

print(f"\n✓ Target for Bayesian Optimization:")
print(f"  Shape: {Y_for_optimization.shape}")
print(f"  Type: {type_of_target(Y_for_optimization)}")
print(f"  Unique values: {np.unique(Y_for_optimization)}")

# Verify it's in correct format
assert type_of_target(Y_for_optimization) in ['binary', 'multiclass'], \
    "Target must be in 'binary' or 'multiclass' format for Bayesian optimization!"

print("✓ Target format verified: Ready for Bayesian Optimization")

# For the final model training, we'll need one-hot encoding
# But we'll do that AFTER optimization
Y_one_hot = to_categorical(y, num_classes=2)
print(f"\n✓ One-hot encoded target for final model: {Y_one_hot.shape}")


SECTION 5: PREPARING TARGET VARIABLE (CRITICAL FOR BAYESIAN OPTIMIZATION)

→ Checking target variable format...
  Current y shape: (22950,)
  Current y type: binary

✓ Target for Bayesian Optimization:
  Shape: (22950,)
  Type: binary
  Unique values: [0 1]
✓ Target format verified: Ready for Bayesian Optimization

✓ One-hot encoded target for final model: (22950, 2)


In [6]:
# ============================================================================
# SECTION 6: SPLIT DATA (BEFORE OPTIMIZATION)
# ============================================================================
print("\n" + "="*80)
print("SECTION 6: SPLITTING DATA INTO TRAIN/TEST SETS")
print("="*80)

# Split the data
# Use the reshaped X and non-one-hot y for optimization
X_train, X_test, Y_train, Y_test = train_test_split(
    X_reshaped, Y_for_optimization, 
    test_size=0.2, 
    random_state=42,
    stratify=Y_for_optimization
)

print(f"\n✓ Training set:")
print(f"  X_train: {X_train.shape}")
print(f"  Y_train: {Y_train.shape}")

print(f"\n✓ Testing set:")
print(f"  X_test: {X_test.shape}")
print(f"  Y_test: {Y_test.shape}")

# Verify target distribution
print(f"\n✓ Class distribution in training set:")
unique, counts = np.unique(Y_train, return_counts=True)
for val, count in zip(unique, counts):
    print(f"  Class {val}: {count} ({count/len(Y_train)*100:.1f}%)")



SECTION 6: SPLITTING DATA INTO TRAIN/TEST SETS

✓ Training set:
  X_train: (18360, 15, 8)
  Y_train: (18360,)

✓ Testing set:
  X_test: (4590, 15, 8)
  Y_test: (4590,)

✓ Class distribution in training set:
  Class 0: 15166 (82.6%)
  Class 1: 3194 (17.4%)


In [7]:
# ============================================================================
# SECTION 7: BASELINE MODEL (BEFORE OPTIMIZATION)
# ============================================================================
print("\n" + "="*80)
print("SECTION 7: BASELINE CNN MODEL (FROM EXERCISE 2.2)")
print("="*80)

print("\n→ Creating baseline CNN model...")

# Define baseline model architecture
def create_baseline_cnn():
    model = Sequential()
    
    # CNN layers
    model.add(Conv1D(
        filters=64,
        kernel_size=2,
        activation='relu',
        input_shape=(timesteps, input_dim)
    ))
    model.add(BatchNormalization())
    model.add(MaxPooling1D(pool_size=2))
    model.add(Dropout(0.3))
    
    # Dense layers
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.3))
    model.add(Dense(2, activation='softmax'))
    
    # Compile
    model.compile(
        loss='sparse_categorical_crossentropy',
        optimizer=Adam(learning_rate=0.001),
        metrics=['accuracy']
    )
    
    return model

baseline_model = create_baseline_cnn()
print("\n✓ Baseline model architecture:")
baseline_model.summary()

# Train baseline model
print("\n→ Training baseline model...")
history_baseline = baseline_model.fit(
    X_train, Y_train,
    epochs=20,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)

# Evaluate baseline
baseline_loss, baseline_accuracy = baseline_model.evaluate(X_test, Y_test, verbose=0)
print(f"\n✓ Baseline Model Performance:")
print(f"  Accuracy: {baseline_accuracy:.4f} ({baseline_accuracy*100:.2f}%)")
print(f"  Loss: {baseline_loss:.4f}")

# Baseline predictions
Y_pred_baseline = baseline_model.predict(X_test)
Y_pred_baseline_classes = np.argmax(Y_pred_baseline, axis=1)

print("\n✓ Baseline Classification Report:")
print(classification_report(Y_test, Y_pred_baseline_classes,
                           target_names=['Not Pleasant', 'Pleasant']))



SECTION 7: BASELINE CNN MODEL (FROM EXERCISE 2.2)

→ Creating baseline CNN model...

✓ Baseline model architecture:



→ Training baseline model...
Epoch 1/20
[1m459/459[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 871us/step - accuracy: 0.9018 - loss: 0.2229 - val_accuracy: 0.9202 - val_loss: 0.1680
Epoch 2/20
[1m459/459[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 683us/step - accuracy: 0.9215 - loss: 0.1792 - val_accuracy: 0.9248 - val_loss: 0.1610
Epoch 3/20
[1m459/459[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 686us/step - accuracy: 0.9257 - loss: 0.1695 - val_accuracy: 0.9262 - val_loss: 0.1584
Epoch 4/20
[1m459/459[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 697us/step - accuracy: 0.9274 - loss: 0.1644 - val_accuracy: 0.9267 - val_loss: 0.1562
Epoch 5/20
[1m459/459[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 697us/step - accuracy: 0.9265 - loss: 0.1627 - val_accuracy: 0.9278 - val_loss: 0.1556
Epoch 6/20
[1m459/459[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 700us/step - accuracy: 0.9291 - loss: 0.1573 - val_accuracy: 0.9292 - val

In [12]:
# ============================================================================
# SECTION 8: DEFINE BAYESIAN OPTIMIZATION FUNCTION
# ============================================================================
print("\n" + "="*80)
print("SECTION 8: SETTING UP BAYESIAN OPTIMIZATION FUNCTION")
print("="*80)

print("\n→ Creating Bayesian optimization function (this is complex!)...")

# Define scoring function for cross-validation
score_acc = make_scorer(accuracy_score)

# Define the Bayesian optimization function
# This function will be called by the Bayesian optimizer
def bayesian_optimization_function(neurons, activation, kernel, optimizer, 
                                   learning_rate, batch_size, epochs,
                                   layers1, layers2, normalization, 
                                   dropout, dropout_rate):
    """
    Function to optimize CNN hyperparameters using Bayesian Optimization.
    This creates a flexible CNN architecture and returns its cross-validated score.
    """
    
    # Define optimizer options (removing Ftrl as it's less common)
    optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Adam']
    optimizerD = {
        'Adam': Adam(learning_rate=learning_rate),
        'SGD': SGD(learning_rate=learning_rate),
        'RMSprop': RMSprop(learning_rate=learning_rate),
        'Adadelta': Adadelta(learning_rate=learning_rate),
        'Adagrad': Adagrad(learning_rate=learning_rate),
        'Adamax': Adamax(learning_rate=learning_rate),
        'Nadam': Nadam(learning_rate=learning_rate)
    }
    
    # Define activation options
    activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 
                   'selu', 'elu', 'exponential', 'relu']
    
    # Round hyperparameters to integers where needed
    neurons = round(neurons)
    kernel = round(kernel)
    activation_idx = round(activation)
    optimizer_idx = round(optimizer)
    batch_size = round(batch_size)
    epochs = round(epochs)
    layers1 = round(layers1)
    layers2 = round(layers2)
    
    # Select activation and optimizer
    activation_func = activationL[activation_idx]
    optimizer_func = optimizerD[optimizerL[optimizer_idx]]
    
    # Define the CNN model builder
    def cnn_model():
        model = Sequential()
        
        # Initial Conv1D layer
        model.add(Conv1D(
            neurons,
            kernel_size=kernel,
            activation=activation_func,
            input_shape=(timesteps, input_dim)
        ))
        
        # Batch normalization (if enabled)
        if normalization > 0.5:
            model.add(BatchNormalization())
        
        # Additional dense layers (layers1)
        for i in range(layers1):
            model.add(Dense(neurons, activation=activation_func))
        
        # Dropout (if enabled)
        if dropout > 0.5:
            model.add(Dropout(dropout_rate, seed=123))
        
        # More dense layers (layers2)
        for i in range(layers2):
            model.add(Dense(neurons, activation=activation_func))
        
        # Pooling and flattening
        model.add(MaxPooling1D())
        model.add(Flatten())
        
        # Output layer
        model.add(Dense(2, activation='softmax'))
        
        # Compile model
        model.compile(
            loss='sparse_categorical_crossentropy',
            optimizer=optimizer_func,
            metrics=['accuracy']
        )
        
        return model
    
    # Early stopping callback
    es = EarlyStopping(
        monitor='accuracy',
        mode='max',
        verbose=0,
        patience=20
    )
    
    # Create Keras classifier (scikeras syntax)
    nn = KerasClassifier(
        model=cnn_model,
        epochs=epochs,
        batch_size=batch_size,
        verbose=0,
        callbacks=[es]
    )
    
    # Stratified K-Fold cross-validation
    kfold = StratifiedKFold(n_splits=3, shuffle=True, random_state=123)
    
    # Calculate cross-validated score
    try:
        score = cross_val_score(
            nn, X_train, Y_train,
            scoring=score_acc,
            cv=kfold
        ).mean()
        return score
    except Exception as e:
        print(f"⚠ Error in optimization: {e}")
        return 0.0

print("✓ Bayesian optimization function defined successfully")



SECTION 8: SETTING UP BAYESIAN OPTIMIZATION FUNCTION

→ Creating Bayesian optimization function (this is complex!)...
✓ Bayesian optimization function defined successfully


In [13]:
# ============================================================================
# SECTION 9: DEFINE HYPERPARAMETER SEARCH SPACE
# ============================================================================
print("\n" + "="*80)
print("SECTION 9: DEFINING HYPERPARAMETER SEARCH SPACE")
print("="*80)

print("\n→ Setting up hyperparameter ranges for Bayesian search...")

# SMALL SEARCH SPACE (for quick testing - 5-10 minutes)
params_small = {
    'neurons': (10, 50),
    'kernel': (1, 2),
    'activation': (0, 5),
    'optimizer': (0, 3),
    'learning_rate': (0.001, 0.01),
    'batch_size': (32, 64),
    'epochs': (10, 20),
    'layers1': (1, 2),
    'layers2': (1, 2),
    'normalization': (0, 1),
    'dropout': (0, 1),
    'dropout_rate': (0.1, 0.3)
}

# MEDIUM SEARCH SPACE (recommended - 20-30 minutes)
params_medium = {
    'neurons': (10, 100),
    'kernel': (1, 3),
    'activation': (0, 9),
    'optimizer': (0, 7),
    'learning_rate': (0.001, 0.1),
    'batch_size': (32, 200),
    'epochs': (20, 50),
    'layers1': (1, 3),
    'layers2': (1, 3),
    'normalization': (0, 1),
    'dropout': (0, 1),
    'dropout_rate': (0, 0.3)
}

# LARGE SEARCH SPACE (for final optimization - 45-60 minutes)
params_large = {
    'neurons': (10, 100),
    'kernel': (1, 3),
    'activation': (0, 9),
    'optimizer': (0, 7),
    'learning_rate': (0.01, 1),
    'batch_size': (200, 1000),
    'epochs': (20, 100),
    'layers1': (1, 3),
    'layers2': (1, 3),
    'normalization': (0, 1),
    'dropout': (0, 1),
    'dropout_rate': (0, 0.3)
}

# SELECT WHICH PARAMETER SPACE TO USE
# Start with 'small' for testing, then use 'medium' or 'large' for final run
PARAM_SPACE = 'small'  # Change to 'medium' or 'large' for better results

if PARAM_SPACE == 'small':
    params = params_small
    init_points = 3
    n_iter = 2
    print("✓ Using SMALL search space (quick test - ~5-10 minutes)")
elif PARAM_SPACE == 'medium':
    params = params_medium
    init_points = 10
    n_iter = 10
    print("✓ Using MEDIUM search space (recommended - ~20-30 minutes)")
else:
    params = params_large
    init_points = 15
    n_iter = 10
    print("✓ Using LARGE search space (full optimization - ~45-60 minutes)")

print(f"\n✓ Search parameters:")
print(f"  Initial points: {init_points}")
print(f"  Iterations: {n_iter}")
print(f"  Total evaluations: {init_points + n_iter}")

print(f"\n✓ Hyperparameter ranges:")
for param, (min_val, max_val) in params.items():
    print(f"  {param}: {min_val} to {max_val}")



SECTION 9: DEFINING HYPERPARAMETER SEARCH SPACE

→ Setting up hyperparameter ranges for Bayesian search...
✓ Using SMALL search space (quick test - ~5-10 minutes)

✓ Search parameters:
  Initial points: 3
  Iterations: 2
  Total evaluations: 5

✓ Hyperparameter ranges:
  neurons: 10 to 50
  kernel: 1 to 2
  activation: 0 to 5
  optimizer: 0 to 3
  learning_rate: 0.001 to 0.01
  batch_size: 32 to 64
  epochs: 10 to 20
  layers1: 1 to 2
  layers2: 1 to 2
  normalization: 0 to 1
  dropout: 0 to 1
  dropout_rate: 0.1 to 0.3


In [14]:
# ============================================================================
# SECTION 10: RUN BAYESIAN OPTIMIZATION
# ============================================================================
print("\n" + "="*80)
print("SECTION 10: RUNNING BAYESIAN OPTIMIZATION")
print("="*80)

print("\n" + "!"*70)
print("! IMPORTANT: This will take time! Grab a coffee/lunch!")
print(f"! Estimated time: {5 if PARAM_SPACE=='small' else 25 if PARAM_SPACE=='medium' else 50} minutes")
print("!"*70)

print("\n→ Starting Bayesian Optimization...")
start_time = time.time()

# Initialize Bayesian Optimization
nn_opt = BayesianOptimization(
    f=bayesian_optimization_function,
    pbounds=params,
    random_state=42,
    verbose=2
)

# Run optimization
try:
    nn_opt.maximize(
        init_points=init_points,
        n_iter=n_iter
    )
    optimization_successful = True
except StopIteration as e:
    print(f"\n⚠ StopIteration encountered: {e}")
    print("→ Checking if optimization completed partially...")
    optimization_successful = False
except Exception as e:
    print(f"\n⚠ Optimization error: {e}")
    optimization_successful = False

optimization_time = (time.time() - start_time) / 60

print(f"\n✓ Optimization completed in {optimization_time:.2f} minutes")



SECTION 10: RUNNING BAYESIAN OPTIMIZATION

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
! IMPORTANT: This will take time! Grab a coffee/lunch!
! Estimated time: 5 minutes
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

→ Starting Bayesian Optimization...
|   iter    |  target   |  neurons  |  kernel   | activa... | optimizer | learni... | batch_... |  epochs   |  layers1  |  layers2  | normal... |  dropout  | dropou... |
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------
| [39m1        [39m | [39mnan      [39m | [39m24.981604[39m | [39m1.9507143[39m | [39m3.6599697[39m | [39m1.7959754[39m | [39m0.0024041[39m | [39m36.991824[39m | [39m10.580836[39m | [39m1.8661761[39m | [39m1.6011150[39m | [39m0.7080725[39m | [39m0.0205844[39m | [39m0.2939819[39m |
| [39m2        [39m | [39mnan      [39m 

In [15]:
# ============================================================================
# SECTION 11: EXTRACT BEST HYPERPARAMETERS
# ============================================================================
print("\n" + "="*80)
print("SECTION 11: BEST HYPERPARAMETERS FOUND")
print("="*80)

if optimization_successful or len(nn_opt.max['params']) > 0:
    best_params = nn_opt.max['params']
    best_score = nn_opt.max['target']
    
    print(f"\n✓ Best Cross-Validation Score: {best_score:.4f} ({best_score*100:.2f}%)")
    print(f"\n✓ Best Hyperparameters:")
    
    # Round parameters appropriately
    best_params_rounded = {
        'neurons': round(best_params['neurons']),
        'kernel': round(best_params['kernel']),
        'activation': round(best_params['activation']),
        'optimizer': round(best_params['optimizer']),
        'learning_rate': best_params['learning_rate'],
        'batch_size': round(best_params['batch_size']),
        'epochs': round(best_params['epochs']),
        'layers1': round(best_params['layers1']),
        'layers2': round(best_params['layers2']),
        'normalization': best_params['normalization'],
        'dropout': best_params['dropout'],
        'dropout_rate': best_params['dropout_rate']
    }
    
    for param, value in best_params_rounded.items():
        print(f"  {param}: {value}")
else:
    print("\n⚠ Optimization did not complete successfully")
    print("→ Using baseline parameters instead")
    best_params_rounded = {
        'neurons': 64,
        'kernel': 2,
        'activation': 0,  # relu
        'optimizer': 1,  # Adam
        'learning_rate': 0.001,
        'batch_size': 32,
        'epochs': 20,
        'layers1': 1,
        'layers2': 1,
        'normalization': 1.0,
        'dropout': 1.0,
        'dropout_rate': 0.3
    }



SECTION 11: BEST HYPERPARAMETERS FOUND

✓ Best Cross-Validation Score: nan (nan%)

✓ Best Hyperparameters:
  neurons: 25
  kernel: 2
  activation: 4
  optimizer: 2
  learning_rate: 0.002404167763981929
  batch_size: 37
  epochs: 11
  layers1: 2
  layers2: 2
  normalization: 0.7080725777960455
  dropout: 0.020584494295802447
  dropout_rate: 0.2939819704323989


In [16]:
# ============================================================================
# SECTION 12: BUILD OPTIMIZED MODEL
# ============================================================================
print("\n" + "="*80)
print("SECTION 12: BUILDING OPTIMIZED CNN MODEL")
print("="*80)

print("\n→ Creating optimized model with best hyperparameters...")

# Define optimizer and activation lists
optimizerL = ['SGD', 'Adam', 'RMSprop', 'Adadelta', 'Adagrad', 'Adamax', 'Nadam', 'Adam']
optimizerD = {
    'Adam': Adam(learning_rate=best_params_rounded['learning_rate']),
    'SGD': SGD(learning_rate=best_params_rounded['learning_rate']),
    'RMSprop': RMSprop(learning_rate=best_params_rounded['learning_rate']),
    'Adadelta': Adadelta(learning_rate=best_params_rounded['learning_rate']),
    'Adagrad': Adagrad(learning_rate=best_params_rounded['learning_rate']),
    'Adamax': Adamax(learning_rate=best_params_rounded['learning_rate']),
    'Nadam': Nadam(learning_rate=best_params_rounded['learning_rate'])
}

activationL = ['relu', 'sigmoid', 'softplus', 'softsign', 'tanh', 
               'selu', 'elu', 'exponential', 'relu']

# Get selected optimizer and activation
selected_optimizer = optimizerD[optimizerL[best_params_rounded['optimizer']]]
selected_activation = activationL[best_params_rounded['activation']]

print(f"✓ Using optimizer: {optimizerL[best_params_rounded['optimizer']]}")
activation_name = activationL[best_params_rounded['activation']]
print(f"✓ Using activation: {activation_name}")

# Build optimized model
optimized_model = Sequential()

# Initial Conv1D layer
optimized_model.add(Conv1D(
    best_params_rounded['neurons'],
    kernel_size=best_params_rounded['kernel'],
    activation=selected_activation,
    input_shape=(timesteps, input_dim)
))

# Batch normalization
if best_params_rounded['normalization'] > 0.5:
    optimized_model.add(BatchNormalization())
    print("✓ Added Batch Normalization")

# Additional dense layers (layers1)
for i in range(best_params_rounded['layers1']):
    optimized_model.add(Dense(best_params_rounded['neurons'], activation=selected_activation))
print(f"✓ Added {best_params_rounded['layers1']} dense layer(s) (phase 1)")

# Dropout
if best_params_rounded['dropout'] > 0.5:
    optimized_model.add(Dropout(best_params_rounded['dropout_rate']))
    print(f"✓ Added Dropout ({best_params_rounded['dropout_rate']})")

# More dense layers (layers2)
for i in range(best_params_rounded['layers2']):
    optimized_model.add(Dense(best_params_rounded['neurons'], activation=selected_activation))
print(f"✓ Added {best_params_rounded['layers2']} dense layer(s) (phase 2)")

# Pooling and flattening
optimized_model.add(MaxPooling1D())
optimized_model.add(Flatten())

# Output layer
optimized_model.add(Dense(2, activation='softmax'))

# Compile
optimized_model.compile(
    loss='sparse_categorical_crossentropy',
    optimizer=selected_optimizer,
    metrics=['accuracy']
)

print("\n✓ Optimized model architecture:")
optimized_model.summary()



SECTION 12: BUILDING OPTIMIZED CNN MODEL

→ Creating optimized model with best hyperparameters...
✓ Using optimizer: RMSprop
✓ Using activation: tanh
✓ Added Batch Normalization
✓ Added 2 dense layer(s) (phase 1)
✓ Added 2 dense layer(s) (phase 2)

✓ Optimized model architecture:


In [17]:
# ============================================================================
# SECTION 13: TRAIN OPTIMIZED MODEL
# ============================================================================
print("\n" + "="*80)
print("SECTION 13: TRAINING OPTIMIZED MODEL")
print("="*80)

print("\n→ Training optimized model...")
print(f"  Epochs: {best_params_rounded['epochs']}")
print(f"  Batch size: {best_params_rounded['batch_size']}")

history_optimized = optimized_model.fit(
    X_train, Y_train,
    epochs=best_params_rounded['epochs'],
    batch_size=best_params_rounded['batch_size'],
    validation_split=0.2,
    verbose=1
)

print("\n✓ Training complete!")



SECTION 13: TRAINING OPTIMIZED MODEL

→ Training optimized model...
  Epochs: 11
  Batch size: 37
Epoch 1/11
[1m397/397[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.9148 - loss: 0.1900 - val_accuracy: 0.9126 - val_loss: 0.2062
Epoch 2/11
[1m397/397[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 905us/step - accuracy: 0.9268 - loss: 0.1675 - val_accuracy: 0.9131 - val_loss: 0.1990
Epoch 3/11
[1m397/397[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 887us/step - accuracy: 0.9277 - loss: 0.1619 - val_accuracy: 0.9082 - val_loss: 0.2034
Epoch 4/11
[1m397/397[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 914us/step - accuracy: 0.9301 - loss: 0.1578 - val_accuracy: 0.9096 - val_loss: 0.1968
Epoch 5/11
[1m397/397[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 892us/step - accuracy: 0.9293 - loss: 0.1562 - val_accuracy: 0.9126 - val_loss: 0.1910
Epoch 6/11
[1m397/397[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 900us/

In [20]:
# ============================================================================
# SECTION 14: EVALUATE OPTIMIZED MODEL
# ============================================================================
print("\n" + "="*80)
print("SECTION 14: EVALUATING OPTIMIZED MODEL")
print("="*80)

# Evaluate on test set
optimized_loss, optimized_accuracy = optimized_model.evaluate(X_test, Y_test, verbose=0)

print(f"\n✓ Optimized Model Performance:")
print(f"  Accuracy: {optimized_accuracy:.4f} ({optimized_accuracy*100:.2f}%)")
print(f"  Loss: {optimized_loss:.4f}")

print(f"\n✓")


SECTION 14: EVALUATING OPTIMIZED MODEL

✓ Optimized Model Performance:
  Accuracy: 0.9176 (91.76%)
  Loss: 0.1923

✓
