In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from catboost import CatBoostRegressor
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import RandomizedSearchCV # Import RandomizedSearchCV from sklearn.model_selection
import joblib
from sklearn.metrics import r2_score
import numpy as np

In [2]:
# Load dataset
url = "concrete_data.csv"
df = pd.read_csv(url)


In [3]:

# Splitting features and target
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [4]:
# Train-test split (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:

# Standardize data for neural network
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [7]:
param_grid = {
    'iterations': [500, 1000],
    'learning_rate': [0.01, 0.05, 0.1],
    'depth': [4, 6, 8, 10],
    'l2_leaf_reg': [1, 3, 5, 7],
    'subsample': [0.5, 0.7, 1.0],
    'colsample_bylevel': [0.5, 0.7, 1.0],
    'bagging_temperature': [0, 1, 2],
}

# # Create CatBoost model
catboost_model = CatBoostRegressor(random_state=42, verbose=0)

# # Randomized search for hyperparameter tuning
tuner = RandomizedSearchCV(
    estimator=catboost_model,
    param_distributions=param_grid,
    n_iter=50,
    scoring='neg_mean_squared_error',
    cv=3,
    random_state=42,
    verbose=1
)

# # Fit the model
tuner.fit(X_train, y_train, eval_set=(X_test, y_test), early_stopping_rounds=50)

catboost_pred = tuner.predict(X_test)


Fitting 3 folds for each of 50 candidates, totalling 150 fits


In [10]:
nn_model = Sequential()
nn_model.add(Dense(64, input_dim=X_train_scaled.shape[1], activation='relu'))
nn_model.add(Dense(64, activation='relu'))
nn_model.add(Dense(1))  # Output layer for regression

nn_model.compile(optimizer='adam', loss='mean_squared_error')
nn_model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, validation_data=(X_test_scaled, y_test))

# Get predictions from Neural Network
nn_pred = nn_model.predict(X_test_scaled)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 1421.8662 - val_loss: 1300.2670
Epoch 2/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 1317.0062 - val_loss: 983.9893
Epoch 3/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 916.2848 - val_loss: 558.7830
Epoch 4/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 497.3824 - val_loss: 267.1996
Epoch 5/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 294.1883 - val_loss: 202.0736
Epoch 6/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 208.8287 - val_loss: 186.4835
Epoch 7/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 207.0289 - val_loss: 175.6758
Epoch 8/100
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 186.5522 - val_loss: 167.4508
Epoch 9/100


In [12]:
combined_preds = np.column_stack((catboost_pred, nn_pred.flatten()))


In [13]:
meta_model = LinearRegression()
meta_model.fit(combined_preds, y_test)

In [15]:
final_preds = meta_model.predict(combined_preds)


In [16]:
mse_hybrid = mean_squared_error(y_test, final_preds)
print(f"Hybrid Model RMSE: {mse_hybrid**0.5}")
r2 = r2_score(y_test, final_preds)
print(f"R² Score: {r2}")

Hybrid Model RMSE: 3.8586206783425046
R² Score: 0.9422185152606593
