In [None]:
import pandas as pd
from sklearn.metrics import mean_squared_error
from Method.GradientBoost import XGBoostRegressor as xgb

In [None]:
train_data = pd.read_csv("Processed Data/train.csv")
validation_data = pd.read_csv("Processed Data/validation.csv")

In [None]:
X_train = train_data.drop(columns=['Attrition_rate']).values
y_train = train_data['Attrition_rate'].values

In [None]:
X_val = validation_data.drop(columns=['Attrition_rate']).values
y_val = validation_data['Attrition_rate'].values

In [None]:
import optuna
def objective(trial):
    params = {
        'subsample_cols': trial.suggest_float('subsample_cols', 0.6, 0.8),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
        'depth': trial.suggest_int('depth', 3, 5),
        'min_leaf': trial.suggest_int('min_leaf', 2, 5),
        'learning_rate': trial.suggest_float('learning_rate', 0.1, 0.3),
        'boosting_rounds': trial.suggest_int('boosting_rounds', 5, 15),
        'lambda_': trial.suggest_float('lambda_', 1, 1.5),
        'gamma': trial.suggest_float('gamma', 0.5, 1.3),
        'eps': trial.suggest_float('eps', 0.1, 0.1),
        'early_stopping_rounds': 5
    }
    print("Current Parameters:", params)

    model = xgb()
    model.fit(X_train, y_train , **params)

    y_pred_val = model.predict(X_val)
    rmse = mean_squared_error(y_val, y_pred_val, squared=False)

    return rmse


In [None]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=10)

In [None]:
best_params = study.best_params

best_model = xgb()
best_model.fit(X_train, y_train, **best_params)
y_pred = best_model.predict(X_val)
final_rmse = mean_squared_error(y_val, y_pred, squared=False)

In [None]:
print(f"Best Trial Parameters: {study.best_params}")
print(f"Final RMSE on Validation Set: {final_rmse}")

In [None]:
save_model(best_model, 'GD_is_the_best.pkl')