**Working with:**

1. *XGBoost + Optuna*
2. *LightGBM + Optuna*
3. *CatBoost + Optuna*
4. *Histogram-Based Gradient Boosting (HistGBM) + Optuna*
5. *Extra Trees + Optuna*
6. *Random Forest + Optuna*
7. *AdaBoost + Optuna*
8. *Blending*
9. *Stacking*

### Rough Structure for (Model + Optuna) 

In [None]:
import datetime

print(f"Code started at: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

In [None]:
! pip install optuna

In [None]:
import sklearn.model_selection
import sklearn.ensemble
import optuna
import numpy as np
import pandas as pd

train="training data"
test="testing data"

# Splitting df_train into features (X) and target (y)
x = train.drop(columns=['targets'])
y = train['targets']

# Splitting train and test datasets
x_train, x_test, y_train, y_test = sklearn.model_selection.train_test_split(x, y, test_size=0.2, random_state=42)

def objective(trial):
    # 1. Hyperparameter sampling
    # 2. Model training
    # 3. Performance evaluation
    # 4. Return the score
    rf_model = sklearn.ensemble.RandomForestRegressor(
        n_estimators=trial.suggest_int('n_estimators', 50, 200),
        max_depth=trial.suggest_int('max_depth', 10, 30, log=True),
        min_samples_split=trial.suggest_int('min_samples_split', 2, 10),
        min_samples_leaf=trial.suggest_int('min_samples_leaf', 1, 4),
        max_features=trial.suggest_categorical('max_features', ['sqrt', 'log2', 0.5]),
        bootstrap=trial.suggest_categorical('bootstrap', [True, False]),
        random_state=42
    )
    cv_score = sklearn.model_selection.cross_val_score(rf_model, x_train, y_train, 
                               cv=5, scoring='neg_mean_squared_error').mean()
    return -cv_score  # Minimize negative MSE

# Optimize hyperparameters
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=50,timeout=600)

# Train the best model
best_rf_model = sklearn.ensemble.RandomForestRegressor(**study.best_params, random_state=42)
best_rf_model.fit(x_train, y_train,)

# Predict on test data
predictions = best_rf_model.predict(x_test)
print(f"Predictions on X_test: {predictions[:10]}")

#submission
sub = pd.read_csv("sample_submission.csv")
output = pd.DataFrame({"id":sub.id, "Premium Amount":predictions})
output.to_csv('submission_final.csv', index=False)


output.head()

In [None]:
# Convert all trial results into a pandas DataFrame for further analysis
print("\nAll Trials as DataFrame:")
df = study.trials_dataframe()
print(df.head())  # Display the first few rows of the DataFrame

# Print the best hyperparameters
print("Best Hyperparameters:")
print(study.best_params)

# Print the best objective value (negative MSE in this case)
print("\nBest Objective Value (Negative MSE):")
print(study.best_value)

# Print details of the best trial
print("\nDetails of the Best Trial:")
best_trial = study.best_trial
print(f"  Trial Number: {best_trial.number}")
print(f"  Best Params: {best_trial.params}")
print(f"  Best Value (Negative MSE): {best_trial.value}")

# Print user-defined and system attributes of the best trial
print("\nAdditional Attributes of the Best Trial:")
print(f"  User Attributes: {best_trial.user_attrs}")
print(f"  System Attributes: {best_trial.system_attrs}")

## Model's + Optuna

In [8]:
! pip install lightgbm



In [None]:
import lightgbm as lgbm


gbm = lgbm.train(param, dtrain)

In [None]:
!

In [None]:
import sklearn.ensemble

# Initialize a RandomForestRegressor model with default parameters
randomforest_model = sklearn.ensemble.RandomForestRegressor(
rf_hyperparam_grid = {
    'n_estimators': [50, 100, 200],                    # Number of trees to evaluate.
    'max_depth': [None, 10, 20, 30],                  # Different depths to test tree complexity.
    'min_samples_split': [2, 5, 10],                 # Adjusts minimum samples required to split.
    'min_samples_leaf': [1, 2, 4],                   # Tests minimum samples per leaf.
    'max_features': ['sqrt', 'log2', 0.5],           # Explore feature subsets for splitting.
    'bootstrap': [True, False],                      # Tests bootstrap and non-bootstrap methods.
    'oob_score': [True, False],                      # Includes out-of-bag scoring in evaluation.
    'random_state': [42]                             # Ensures reproducibility during tuning.
}
)

In [None]:
extratree_model = sklearn.tree.ExtraTreeRegressor(
{
    "criterion": ["squared_error", "friedman_mse", "absolute_error", "poisson"],
    "splitter": ["random", "best"],
    "max_depth": [None, 10, 20, 50, 100],
    "min_samples_split": [2, 5, 10, 20],
    "min_samples_leaf": [1, 2, 5, 10],
    "max_features": [None, "auto", "sqrt", "log2", 0.5, 1.0],
    "min_impurity_decrease": [0.0, 0.01, 0.1],
    "max_leaf_nodes": [None, 10, 50, 100],
    "ccp_alpha": [0.0, 0.01, 0.1, 1.0]
})

In [None]:
adaboost_model = sklearn.ensemble.AdaBoostRegressor(
{
    "n_estimators": [50, 100, 200, 500],
    "learning_rate": [0.01, 0.1, 1.0, 10.0],
    "loss": ["linear", "square", "exponential"],
    "random_state": [None, 0, 42, 100]
}
)

In [None]:
gbm_model = sklearn.ensemble.GradientBoostingRegressor(
    "loss": ["squared_error", "absolute_error", "huber", "quantile"],
    "learning_rate": [0.01, 0.1, 0.2, 0.5],
    "n_estimators": [50, 100, 200],
    "max_depth": [3, 5, 10, None],
    "subsample": [0.6, 0.8, 1.0],
)

In [None]:
import optuna

xgb_model = xgb.XGBRegressor(
        tree_method='gpu_hist',  # GPU acceleration for faster training
        lambda_=trial.suggest_loguniform('lambda', 1e-3, 10.0),  # Regularization term
        alpha=trial.suggest_loguniform('alpha', 1e-3, 10.0),  # L1 regularization
        colsample_bytree=trial.suggest_categorical('colsample_bytree', [0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]),  # Fraction of features used
        subsample=trial.suggest_categorical('subsample', [0.4, 0.5, 0.6, 0.7, 0.8, 1.0]),  # Fraction of data used
        learning_rate=trial.suggest_categorical('learning_rate', [0.008, 0.01, 0.012, 0.014, 0.016, 0.018, 0.02]),  # Learning rate
        n_estimators=10000,  # Number of trees (max iterations)
        max_depth=trial.suggest_categorical('max_depth', [5, 7, 9, 11, 13, 15, 17]),  # Depth of trees
        random_state=trial.suggest_categorical('random_state', [2020]),  # Random seed
        min_child_weight=trial.suggest_int('min_child_weight', 1, 300)  # Minimum weight of child
    )

In [None]:
import sklearn.ensemble

xgboost_model = sklearn.ensemble.HistGradientBoostingRegressor(
{
    "loss": ["squared_error", "absolute_error", "poisson", "quantile"],  # Loss function options
    "quantile": [None, 0.1, 0.2, 0.3, 0.4, 0.5],  # Quantile for "quantile" loss, only applicable for quantile loss
    "learning_rate": [0.01, 0.05, 0.1, 0.2],  # Learning rate options
    "max_iter": [50, 100, 200],  # Number of boosting iterations
    "max_leaf_nodes": [31, 50, 100, None],  # Max leaf nodes per tree, None means unlimited
    "max_depth": [None, 5, 10, 20],  # Max depth of each tree
    "min_samples_leaf": [5, 10, 20, 50],  # Minimum samples per leaf node
    "l2_regularization": [0.0, 0.1, 0.5, 1.0],  # Regularization strength to prevent overfitting
    "early_stopping": [False, "auto"],  # Enable early stopping
    "scoring": ["loss", "neg_mean_squared_error", "r2"],  # Scoring metric for early stopping
}
)


In [None]:
! pip install lightgbm

import lightgbm as lgb

In [None]:
import catboost 
from catboost import CatBoostRegressor
import optuna

model = CatBoostRegressor(
    loss_function='RMSE',
    task_type='GPU',
    l2_leaf_reg=trial.suggest_loguniform('l2_leaf_reg', 1e-3, 10.0),
    max_bin=trial.suggest_int('max_bin', 200, 400),
    subsample=trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
    learning_rate=trial.suggest_uniform('learning_rate', 0.006, 0.018),
    n_estimators=25000,
    max_depth=trial.suggest_categorical('max_depth', [5,7,9,11,13,15]),
    random_state=trial.suggest_categorical('random_state', [2020]),
    min_data_in_leaf=trial.suggest_int('min_data_in_leaf', 1, 300)
)

In [None]:
'''
LightGBM + Optuna

CatBoost + Optuna

Stacking

Blending
'''