In [5]:
import pandas as pd

# Load the Bambili_treated.csv file
file_name = "Bafoussam_treated.csv"
bafoussam_df = pd.read_csv(file_name)

# Convert the 'date' column to datetime objects
# The format '%Y%m%d' specifies that dates are like 'YYYYMMDD' (e.g., 19500101)
bafoussam_df['date'] = pd.to_datetime(bafoussam_df['date'], format='%Y%m%d')

print("DataFrame head after converting 'date' column:")
print(bafoussam_df.head())
print("\nDataFrame info after converting 'date' column:")
print(bafoussam_df.info())

DataFrame head after converting 'date' column:
        date  temperature  humidity  irradiance  potential  wind_speed
0 1950-01-01       22.384    77.513      805.17     4.1294     0.12452
1 1950-01-02       22.184    76.513      804.17     4.1194     0.13452
2 1950-01-03       22.724    78.124      779.17     3.9913     0.20164
3 1950-01-04       23.304    77.783      829.17     4.2474     0.41398
4 1950-01-05       23.220    78.819      816.67     4.1834     0.43339

DataFrame info after converting 'date' column:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27214 entries, 0 to 27213
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   date         27214 non-null  datetime64[ns]
 1   temperature  27214 non-null  float64       
 2   humidity     27214 non-null  float64       
 3   irradiance   27214 non-null  float64       
 4   potential    27214 non-null  float64       
 5   wind_speed   27214 non-

In [8]:
import numpy as np
import pandas as pd

# Load the Bafoussam dataset
# Assuming 'bafoussam_df' is already loaded or you need to load it here
# For example, if it's from a CSV:
# bafoussam_df = pd.read_csv('your_bafoussam_dataset.csv')
# Make sure the 'date' column is in datetime format if you just loaded it
# bafoussam_df['date'] = pd.to_datetime(bafoussam_df['date'])

# IMPORTANT: You need to load your 'bafoussam_df' DataFrame here.
# For example:
# bafoussam_df = pd.read_csv("path/to/your/Bafoussam_dataset.csv")
# bafoussam_df['date'] = pd.to_datetime(bafoussam_df['date'])

# If you want to use a file you've already uploaded, please specify its name.
# For demonstration, let's assume 'Bambili_treated.csv' is actually 'Bafoussam_treated.csv' for this example,
# or you would load your actual Bafoussam file here.
# For now, let's assume 'bafoussam_df' is the DataFrame you are working with.
# If you have a file like 'Bafoussam_treated.csv', replace the line below:
bafoussam_df = pd.read_csv("Bafoussam_treated.csv") # Placeholder: REPLACE with your actual Bafoussam file
bafoussam_df['date'] = pd.to_datetime(bafoussam_df['date'])


# Extract various time-based features
bafoussam_df['year'] = bafoussam_df['date'].dt.year
bafoussam_df['month'] = bafoussam_df['date'].dt.month
bafoussam_df['day_of_month'] = bafoussam_df['date'].dt.day
bafoussam_df['day_of_week'] = bafoussam_df['date'].dt.dayofweek # Monday=0, Sunday=6
bafoussam_df['day_of_year'] = bafoussam_df['date'].dt.dayofyear
bafoussam_df['week_of_year'] = bafoussam_df['date'].dt.isocalendar().week.astype(int)
bafoussam_df['quarter'] = bafoussam_df['date'].dt.quarter

# Add binary flags for start/end of month, quarter, and year
bafoussam_df['is_month_start'] = bafoussam_df['date'].dt.is_month_start.astype(int)
bafoussam_df['is_month_end'] = bafoussam_df['date'].dt.is_month_end.astype(int)
bafoussam_df['is_quarter_start'] = bafoussam_df['date'].dt.is_quarter_start.astype(int)
bafoussam_df['is_quarter_end'] = bafoussam_df['date'].dt.is_quarter_end.astype(int)
bafoussam_df['is_year_start'] = bafoussam_df['date'].dt.is_year_start.astype(int)
bafoussam_df['is_year_end'] = bafoussam_df['date'].dt.is_year_end.astype(int)

# Add cyclical features for Month, Day of Year, and Day of Week
# Month (1-12)
bafoussam_df['month_sin'] = np.sin(2 * np.pi * bafoussam_df['month'] / 12)
bafoussam_df['month_cos'] = np.cos(2 * np.pi * bafoussam_df['month'] / 12)

# Day of Year (1-366 for leap years)
# Use 366 for the cycle to account for leap years, or 365.25 for average
bafoussam_df['day_of_year_sin'] = np.sin(2 * np.pi * bafoussam_df['day_of_year'] / 366)
bafoussam_df['day_of_year_cos'] = np.cos(2 * np.pi * bafoussam_df['day_of_year'] / 366)

# Day of Week (0-6)
bafoussam_df['day_of_week_sin'] = np.sin(2 * np.pi * bafoussam_df['day_of_week'] / 7)
bafoussam_df['day_of_week_cos'] = np.cos(2 * np.pi * bafoussam_df['day_of_week'] / 7)

# Display the first few rows with new features and updated info
print("\nDataFrame head with new time-based features:")
print(bafoussam_df.head())
print("\nDataFrame info with new time-based features:")
print(bafoussam_df.info())

# Save the updated DataFrame to a new CSV file
output_file_name = "Bafoussam_treated_with_time_features.csv"
bafoussam_df.to_csv(output_file_name, index=False)
print(f"\nUpdated dataset saved to '{output_file_name}'.")


DataFrame head with new time-based features:
                           date  temperature  humidity  irradiance  potential  \
0 1970-01-01 00:00:00.019500101       22.384    77.513      805.17     4.1294   
1 1970-01-01 00:00:00.019500102       22.184    76.513      804.17     4.1194   
2 1970-01-01 00:00:00.019500103       22.724    78.124      779.17     3.9913   
3 1970-01-01 00:00:00.019500104       23.304    77.783      829.17     4.2474   
4 1970-01-01 00:00:00.019500105       23.220    78.819      816.67     4.1834   

   wind_speed  year  month  day_of_month  day_of_week  ...  is_quarter_start  \
0     0.12452  1970      1             1            3  ...                 1   
1     0.13452  1970      1             1            3  ...                 1   
2     0.20164  1970      1             1            3  ...                 1   
3     0.41398  1970      1             1            3  ...                 1   
4     0.43339  1970      1             1            3  ...         

In [9]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.feature_selection import RFE

# Load the updated Bambili dataset with time-based features
file_name = "Bafoussam_treated_with_time_features.csv"
bafoussam_df_features = pd.read_csv(file_name)

# Display the first few rows and information about the DataFrame to confirm the new features
print("Bafoussam_treated_with_time_features.csv head:")
print(bafoussam_df_features.head())
print("\nBambili_treated_with_time_features.csv info:")
print(bafoussam_df_features.info())

# Define the target variable
target_column = 'irradiance'

# Define features (X) by dropping the 'date' column and the target column
# The 'date' column itself is no longer needed as its information is now in the new features.
X_bafoussam = bafoussam_df_features.drop(columns=['date', target_column])
y_bafoussam = bafoussam_df_features[target_column]

# Initialize the Random Forest Regressor
estimator = RandomForestRegressor(n_estimators=100, random_state=42)

# Initialize RFE with n_features_to_select = 14
# The 'step' parameter controls how many features are removed at each iteration.
# A step of 1 means one feature is removed at a time.
selector = RFE(estimator=estimator, n_features_to_select=14, step=1)

# Fit RFE to the Bambili data with the expanded feature set
selector.fit(X_bafoussam, y_bafoussam)

# Get the selected features
selected_features_mask = selector.support_
selected_feature_names = X_bafoussam.columns[selected_features_mask].tolist()

print(f"\nOriginal number of features (excluding 'date' and target): {X_bafoussam.shape[1]}")
print(f"Selected 14 features for Bambili using RFE: {selected_feature_names}")

# Optionally, you can also get the ranking of features (1 being the most important)
feature_ranking = pd.DataFrame({'Feature': X_bafoussam.columns, 'Ranking': selector.ranking_})
feature_ranking = feature_ranking.sort_values(by='Ranking')
print("\nFeature ranking (1 = selected/most important):")
print(feature_ranking)

# Create a DataFrame with only the selected features and the target variable
selected_df = bafoussam_df_features[selected_feature_names + [target_column]]

# Save the DataFrame with selected features
output_file_name_selected = "Bafoussam_selected_features.csv"
selected_df.to_csv(output_file_name_selected, index=False)
print(f"\nDataset with selected features saved to '{output_file_name_selected}'.")

Bafoussam_treated_with_time_features.csv head:
                            date  temperature  humidity  irradiance  \
0  1970-01-01 00:00:00.019500101       22.384    77.513      805.17   
1  1970-01-01 00:00:00.019500102       22.184    76.513      804.17   
2  1970-01-01 00:00:00.019500103       22.724    78.124      779.17   
3  1970-01-01 00:00:00.019500104       23.304    77.783      829.17   
4  1970-01-01 00:00:00.019500105       23.220    78.819      816.67   

   potential  wind_speed  year  month  day_of_month  day_of_week  ...  \
0     4.1294     0.12452  1970      1             1            3  ...   
1     4.1194     0.13452  1970      1             1            3  ...   
2     3.9913     0.20164  1970      1             1            3  ...   
3     4.2474     0.41398  1970      1             1            3  ...   
4     4.1834     0.43339  1970      1             1            3  ...   

   is_quarter_start  is_quarter_end  is_year_start  is_year_end  month_sin  \
0        

In [10]:
# @title Default title text
import pandas as pd
from sklearn.model_selection import KFold

# Load the dataset with selected features
file_name = "Bafoussam_selected_features.csv"
bafoussam_df_selected = pd.read_csv(file_name)

print("DataFrame head with selected features:")
print(bafoussam_df_selected.head())
print("\nDataFrame info with selected features:")
print(bafoussam_df_selected.info())

# Define the target variable
target_column = 'irradiance'

# Separate features (X) and target variable (y)
X = bafoussam_df_selected.drop(columns=[target_column])
y = bafoussam_df_selected[target_column]

print(f"\nFeatures (X) shape: {X.shape}")
print(f"Target (y) shape: {y.shape}")

# Prepare for 10-fold cross-validation
# The paper mentions "tenfold cross validation techniques" for model development and evaluation.
# KFold is a good way to generate the indices for these folds.
n_splits = 10 # As per "tenfold cross validation"

kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

print(f"\nPreparing for {n_splits}-fold cross-validation:")
print("Iterating through folds to show the split indices:")

fold_count = 0
for train_index, test_index in kf.split(X):
    fold_count += 1
    # You would typically use these indices to get your train and test sets for each fold
    # X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    # y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    print(f"  Fold {fold_count}:")
    print(f"    Train set size: {len(train_index)} samples")
    print(f"    Test set size: {len(test_index)} samples")
    # For brevity, we are not printing the actual data, just the sizes.
    if fold_count >= 3: # Print only first 3 folds to avoid too much output
        print("  ...")
        break

print(f"\nData successfully prepared for {n_splits}-fold cross-validation.")
print("Each iteration of the KFold object provides indices to split your data into training and testing sets.")

DataFrame head with selected features:
   temperature  humidity  potential  wind_speed  year  month  day_of_month  \
0       22.384    77.513     4.1294     0.12452  1970      1             1   
1       22.184    76.513     4.1194     0.13452  1970      1             1   
2       22.724    78.124     3.9913     0.20164  1970      1             1   
3       23.304    77.783     4.2474     0.41398  1970      1             1   
4       23.220    78.819     4.1834     0.43339  1970      1             1   

   is_year_end  month_sin  month_cos  day_of_year_sin  day_of_year_cos  \
0            0        0.5   0.866025         0.017166         0.999853   
1            0        0.5   0.866025         0.017166         0.999853   
2            0        0.5   0.866025         0.017166         0.999853   
3            0        0.5   0.866025         0.017166         0.999853   
4            0        0.5   0.866025         0.017166         0.999853   

   day_of_week_sin  day_of_week_cos  irradiance

In [11]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold

# Load the dataset with selected features
file_name = "Bafoussam_selected_features.csv"
bafoussam_df_selected = pd.read_csv(file_name)

# Define the target variable
target_column = 'irradiance'

# Separate features (X) and target variable (y)
X = bafoussam_df_selected.drop(columns=[target_column])
y = bafoussam_df_selected[target_column]

# Normalize features and target (important for neural networks)
# Use MinMaxScaler for features (X) and target (y) separately
scaler_X = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)

scaler_y = MinMaxScaler()
# Reshape y to 2D array for scaler (it expects 2D input)
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1))

print(f"X_scaled shape: {X_scaled.shape}")
print(f"y_scaled shape: {y_scaled.shape}")

# Function to create sequences for CNN-LSTM
# n_timesteps: how many past days/observations to use for prediction
def create_sequences(X, y, n_timesteps):
    Xs, ys = [], []
    for i in range(len(X) - n_timesteps):
        # Features from t to t + n_timesteps - 1
        Xs.append(X[i:(i + n_timesteps)])
        # Target at t + n_timesteps
        ys.append(y[i + n_timesteps])
    return np.array(Xs), np.array(ys)

# Define the number of timesteps. This is a hyperparameter!
# Let's start with 7 timesteps (using the past 7 days to predict the next day).
n_timesteps = 7
print(f"\nCreating sequences with {n_timesteps} timesteps...")

X_seq, y_seq = create_sequences(X_scaled, y_scaled, n_timesteps)

print(f"Shape of X_seq (samples, timesteps, features): {X_seq.shape}")
print(f"Shape of y_seq (samples, target): {y_seq.shape}")

# Prepare for 10-fold cross-validation (using the sequential data)
n_splits = 10
kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

# This loop will now be used for actual model training in the next steps
# print(f"\nSample KFold split with sequential data:")
# fold_count = 0
# for train_index, test_index in kf.split(X_seq):
#     fold_count += 1
#     print(f"  Fold {fold_count}: Train samples: {len(train_index)}, Test samples: {len(test_index)}")
#     if fold_count >= 1: # Just show one example
#         break

X_scaled shape: (27214, 14)
y_scaled shape: (27214, 1)

Creating sequences with 7 timesteps...
Shape of X_seq (samples, timesteps, features): (27207, 7, 14)
Shape of y_seq (samples, target): (27207, 1)


In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense, Flatten
from tensorflow.keras.optimizers import Adam # A common optimizer

# --- Model Definition ---

# Define the number of features (columns in X_seq, which is the last dimension of X_seq.shape)
n_features = X_seq.shape[2] # Should be 14

# Define the CNN-SLSTM model
def build_cnn_slstm_model(n_timesteps, n_features,
                          filters=64, kernel_size=2, pool_size=2,
                          lstm_units_1=50, lstm_units_2=50,
                          dense_units_1=25, dense_units_2=10): # Added a second dense layer as per description

    model = Sequential()

    # CNN Part
    # Conv1D processes sequences (timesteps, features)
    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu',
                     input_shape=(n_timesteps, n_features)))
    model.add(MaxPooling1D(pool_size=pool_size))

    # Flatten the output of the CNN to feed into the LSTM.
    # The LSTM expects a 2D input (samples, features) after processing a sequence,
    # or a 3D input if it processes a sequence of sequences.
    # Given Conv1D and MaxPooling reduce the timesteps, a flatten is typical if LSTM follows
    # and processes the reduced sequence as a feature vector per sample.
    # However, if LSTM is meant to continue processing a sequence, the Conv1D's output
    # (after pooling) should be directly fed without flatten if its output shape is (batch, new_timesteps, filters).
    # Let's assume the CNN is feature extraction over the sequence, and the LSTM operates on these processed sequences.
    # If MaxPooling1D significantly reduces timesteps, the LSTM might expect the output of CNN to still be sequential.
    # A common design is to feed the output of MaxPooling1D directly to LSTM.
    # Let's verify the shape transition:
    # (None, n_timesteps, n_features) -> Conv1D -> (None, n_timesteps - kernel_size + 1, filters) -> MaxPooling1D -> (None, (n_timesteps - kernel_size + 1) // pool_size, filters)
    # This shape is compatible with LSTM.

    # Stacked LSTM Part
    # First LSTM layer: return_sequences=True to pass the sequence to the next LSTM layer
    model.add(LSTM(units=lstm_units_1, activation='relu', return_sequences=True))
    # Second LSTM layer: No return_sequences=True as it's the last LSTM before dense layers
    model.add(LSTM(units=lstm_units_2, activation='relu'))

    # Dense (Fully Connected) Layers as per paper (two dense layers)
    model.add(Dense(units=dense_units_1, activation='relu'))
    model.add(Dense(units=dense_units_2, activation='relu')) # Added a second dense layer

    # Output Layer for Regression
    model.add(Dense(units=1, activation='linear')) # Linear activation for regression output

    # Compile the model
    # 'optimizer': Adam is a popular choice.
    # 'loss': 'mse' (Mean Squared Error) is common for regression tasks.
    model.compile(optimizer=Adam(), loss='mse')

    return model

# Build an instance of the model to see its summary
# These are placeholder values for hyperparameters, which SMO would optimize later.
model = build_cnn_slstm_model(n_timesteps=n_timesteps, n_features=n_features,
                              filters=64, kernel_size=2, pool_size=2,
                              lstm_units_1=50, lstm_units_2=50,
                              dense_units_1=25, dense_units_2=10)

print("\n--- CNN-SLSTM Model Summary ---")
model.summary()

# You can save the model architecture for later if needed (optional)
# model.save('cnn_slstm_architecture.h5') # Saves structure, weights, and optimizer state
print("\nCNN-SLSTM model architecture defined successfully.")
print("The next step would involve training this model using the K-Fold splits and optimizing hyperparameters with SMO.")


--- CNN-SLSTM Model Summary ---


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



CNN-SLSTM model architecture defined successfully.
The next step would involve training this model using the K-Fold splits and optimizing hyperparameters with SMO.


In [13]:
!pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl.metadata (9.7 kB)
Collecting pyaml>=16.9 (from scikit-optimize)
  Downloading pyaml-25.5.0-py3-none-any.whl.metadata (12 kB)
Downloading scikit_optimize-0.10.2-py2.py3-none-any.whl (107 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m107.8/107.8 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyaml-25.5.0-py3-none-any.whl (26 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-25.5.0 scikit-optimize-0.10.2


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, LSTM, Dense
from tensorflow.keras.optimizers import Adam
import tensorflow as tf

# Import Bayesian Optimization library
from skopt import gp_minimize
from skopt.space import Real, Integer
from skopt.utils import use_named_args

# Suppress TensorFlow warnings for cleaner output during optimization
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

# --- 1. Data Preparation ---
# Load the dataset with selected features
file_name = "Bafoussam_selected_features.csv"
bafoussam_df_selected = pd.read_csv(file_name)

# Define the target variable
target_column = 'irradiance'

# Separate features (X) and target variable (y)
X_raw = bafoussam_df_selected.drop(columns=[target_column])
y_raw = bafoussam_df_selected[target_column]

# Normalize features and target
scaler_X = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X_raw)

scaler_y = MinMaxScaler()
y_scaled = scaler_y.fit_transform(y_raw.values.reshape(-1, 1))

# Function to create sequences for CNN-LSTM
def create_sequences(X, y, n_timesteps):
    Xs, ys = [], []
    for i in range(len(X) - n_timesteps):
        Xs.append(X[i:(i + n_timesteps)])
        ys.append(y[i + n_timesteps])
    return np.array(Xs), np.array(ys)

# Get the number of features (columns in X_raw)
n_features = X_raw.shape[1]

# --- 2. CNN-SLSTM Model Definition Function ---
def build_cnn_slstm_model(n_timesteps_param, n_features_param,
                          filters_param, kernel_size_param,
                          lstm_units_1_param, lstm_units_2_param,
                          dense_units_1_param, dense_units_2_param,
                          learning_rate_param, fixed_pool_size=2):

    model = Sequential()
    model.add(Conv1D(filters=filters_param, kernel_size=(kernel_size_param,), activation='relu',
                     input_shape=(n_timesteps_param, n_features_param)))
    model.add(MaxPooling1D(pool_size=fixed_pool_size))
    model.add(LSTM(units=int(lstm_units_1_param), activation='relu', return_sequences=True))
    model.add(LSTM(units=int(lstm_units_2_param), activation='relu'))
    model.add(Dense(units=dense_units_1_param, activation='relu'))
    model.add(Dense(units=dense_units_2_param, activation='relu'))
    model.add(Dense(units=1, activation='linear')) # Output layer for regression

    optimizer = Adam(learning_rate=learning_rate_param)
    model.compile(optimizer=optimizer, loss='mse')
    return model

# --- 3. Define the Search Space for Hyperparameters ---
space = [
    Integer(3, 10, name='n_timesteps'),        # Number of past days to consider (sequence length)
    Integer(32, 128, name='filters'),          # Number of filters in Conv1D
    Integer(2, 3, name='kernel_size'),         # Kernel size for Conv1D
    Integer(32, 100, name='lstm_units_1'),     # Units in first LSTM layer
    Integer(32, 100, name='lstm_units_2'),     # Units in second LSTM layer
    Integer(10, 50, name='dense_units_1'),     # Units in first Dense layer
    Integer(5, 20, name='dense_units_2'),      # Units in second Dense layer
    Real(1e-4, 1e-2, "log-uniform", name='learning_rate'), # Adam optimizer learning rate
    Integer(50, 200, name='epochs'),           # Number of training epochs per fold
    Integer(16, 64, name='batch_size', prior='log-uniform') # Batch size for training (often powers of 2)
]

# --- 4. Objective Function for Bayesian Optimization ---
@use_named_args(space)
def objective_function(**hyperparameters):
    # Extract hyperparameters from the input dictionary
    n_timesteps = hyperparameters['n_timesteps']
    filters = hyperparameters['filters']
    kernel_size = hyperparameters['kernel_size']
    lstm_units_1 = hyperparameters['lstm_units_1']
    lstm_units_2 = hyperparameters['lstm_units_2']
    dense_units_1 = hyperparameters['dense_units_1']
    dense_units_2 = hyperparameters['dense_units_2']
    learning_rate = hyperparameters['learning_rate']
    epochs = hyperparameters['epochs']
    batch_size = hyperparameters['batch_size']

    # Fixed pool_size value
    fixed_pool_size = 2

    print(f"\n--- Evaluating Trial ---")
    print(f"Hyperparams: n_timesteps={n_timesteps}, filters={filters}, kernel_size={kernel_size}, "
          f"pool_size={fixed_pool_size}, lstm_units_1={lstm_units_1}, lstm_units_2={lstm_units_2}, "
          f"dense_units_1={dense_units_1}, dense_units_2={dense_units_2}, "
          f"lr={learning_rate:.6f}, epochs={epochs}, batch_size={batch_size}")

    # --- NEW CHECK: Ensure Conv1D output is large enough for MaxPooling1D ---
    conv1d_output_length = n_timesteps - kernel_size + 1
    if conv1d_output_length < fixed_pool_size:
        print(f"  SKIPPING: Invalid combination (n_timesteps={n_timesteps}, kernel_size={kernel_size}). "
              f"Conv1D output length ({conv1d_output_length}) is too small for MaxPooling1D (pool_size={fixed_pool_size}).")
        return 1e9 # Penalize this invalid combination with a very high error

    # Re-create sequences based on the current n_timesteps hyperparameter
    try:
        current_X_seq, current_y_seq = create_sequences(X_scaled, y_scaled, n_timesteps)
    except ValueError as e:
        print(f"Error creating sequences for n_timesteps={n_timesteps}: {e}")
        return 1e9 # Return a very high error to penalize invalid n_timesteps values

    if current_X_seq.shape[0] == 0: # Handle cases where n_timesteps is too large for data
        print(f"Not enough data for n_timesteps={n_timesteps}. Returning high error.")
        return 1e9

    # Prepare for 10-fold cross-validation
    n_splits = 10
    kf = KFold(n_splits=n_splits, shuffle=True, random_state=42)

    fold_mses = [] # To store MSE for each fold

    for fold, (train_index, val_index) in enumerate(kf.split(current_X_seq)):
        # Clear Keras session for each fold to prevent memory issues and model state leakage
        tf.keras.backend.clear_session()

        # Build the model with the current hyperparameters and fixed pool_size
        model = build_cnn_slstm_model(n_timesteps_param=n_timesteps,
                                      n_features_param=n_features,
                                      filters_param=filters,
                                      kernel_size_param=kernel_size,
                                      lstm_units_1_param=lstm_units_1,
                                      lstm_units_2_param=lstm_units_2,
                                      dense_units_1_param=dense_units_1,
                                      dense_units_2_param=dense_units_2,
                                      learning_rate_param=learning_rate,
                                      fixed_pool_size=fixed_pool_size) # Pass fixed pool_size

        # Split data for the current fold
        X_train, X_val = current_X_seq[train_index], current_X_seq[val_index]
        y_train, y_val = current_y_seq[train_index], current_y_seq[val_index]

        # Fit the model for the current fold
        try:
            model.fit(X_train, y_train,
                      epochs=epochs,
                      batch_size=batch_size,
                      validation_data=(X_val, y_val),
                      verbose=0) # Set verbose to 0 to suppress training output per epoch

            # Evaluate the model on the validation set of the current fold
            mse = model.evaluate(X_val, y_val, verbose=0)
            fold_mses.append(mse)
            print(f"  Fold {fold+1} MSE: {mse:.4f}")
        except tf.errors.InvalidArgumentError as e:
            print(f"TensorFlow Invalid Argument Error during training/evaluation in fold {fold+1}: {e}")
            fold_mses.append(1e9) # Penalize with high error
        except Exception as e:
            print(f"General error during training/evaluation in fold {fold+1}: {e}")
            fold_mses.append(1e9) # Penalize with high error

    # Calculate the average MSE across all folds
    average_mse = np.mean(fold_mses)
    print(f"Average MSE for this set of hyperparameters: {average_mse:.4f}")

    # Return the average MSE (skopt minimizes this value)
    return average_mse

# --- 5. Run Bayesian Optimization ---
print("\n--- Starting Bayesian Optimization (This may take a while) ---")
results = gp_minimize(
    func=objective_function,
    dimensions=space,
    n_calls=20, # Recommend starting small, then increase for production
    n_random_starts=5,
    random_state=42,
    verbose=True,
    n_jobs=1 # Set to 1 to avoid potential issues with TensorFlow multi-threading if not carefully managed
)

# --- Display Results ---
print("\n--- Bayesian Optimization Results ---")
print(f"Best validation MSE found: {results.fun:.4f}")
print("Best hyperparameters:")
best_hyperparameters = {dim.name: value for dim, value in zip(space, results.x)}
print(best_hyperparameters)


--- Starting Bayesian Optimization (This may take a while) ---
Iteration No: 1 started. Evaluating function at random point.

--- Evaluating Trial ---
Hyperparams: n_timesteps=9, filters=50, kernel_size=3, pool_size=2, lstm_units_1=73, lstm_units_2=62, dense_units_1=14, dense_units_2=12, lr=0.000465, epochs=71, batch_size=39


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
