In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import KFold, RandomizedSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from scikeras.wrappers import KerasRegressor
from keras.models import Sequential
from keras.layers import Conv1D, Flatten, Dense
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
import joblib

# -----------------------------
# Load and preprocess the data
# -----------------------------
# Load dataset
df = pd.read_csv(r"C:\ML_work\Data Engineering work\uasin_gishu_weather_data.csv", parse_dates=["time"])

# Sort by date
df = df.sort_values(by='time')

# Features and target
features = ['temperature_2m_max', 'temperature_2m_min', 'windspeed_10m_max']
target = 'precipitation_sum'

# Check for missing values and fill them with column mean
df[features + [target]] = df[features + [target]].fillna(df[features + [target]].mean())

# Extract input and output
X = df[features].values
y = df[target].values

# Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Reshape input for CNN: (samples, timesteps, features)
X_scaled = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# -----------------------------
# Build CNN model function
# -----------------------------
def build_model(filters=64, kernel_size=2, dense_units=32, learning_rate=0.001, input_shape=(1, 3)):
    model = Sequential()
    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu', input_shape=input_shape))
    model.add(Flatten())
    model.add(Dense(dense_units, activation='relu'))
    model.add(Dense(1))  # Output layer
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mse')
    return model

# -----------------------------
# Wrap model for scikit-learn
# -----------------------------
regressor = KerasRegressor(
    model=build_model,
    model__input_shape=(X_scaled.shape[1], X_scaled.shape[2]),
    verbose=0
)

# -----------------------------
# Hyperparameter space
# -----------------------------
param_dist = {
    'model__filters': [32, 64, 128],
    'model__kernel_size': [1, 2, 3],
    'model__dense_units': [16, 32, 64],
    'model__learning_rate': [0.001, 0.0001],
    'batch_size': [16, 32],
    'epochs': [30, 50]
}

# -----------------------------
# Cross-validation and search
# -----------------------------
kfold = KFold(n_splits=3, shuffle=True, random_state=42)

random_search = RandomizedSearchCV(
    estimator=regressor,
    param_distributions=param_dist,
    cv=kfold,
    n_iter=5,
    scoring='neg_mean_squared_error',
    random_state=42
)

early_stop = EarlyStopping(monitor='loss', patience=5, restore_best_weights=True)

# -----------------------------
# Train the model
# -----------------------------
random_search.fit(X_scaled, y, callbacks=[early_stop])

print("Best Parameters:", random_search.best_params_)

# -----------------------------
# Save best model and scaler
# -----------------------------
best_model = random_search.best_estimator_.model_
best_model.save("cnn_weather_model.h5")

joblib.dump(scaler, "weather_scaler.pkl")

print("Model and scaler saved successfully.")


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  super().__init__(activity_regularizer=activity_regularizer, **

Best Parameters: {'model__learning_rate': 0.001, 'model__kernel_size': 1, 'model__filters': 128, 'model__dense_units': 32, 'epochs': 30, 'batch_size': 32}
Model and scaler saved successfully.
