In [9]:
#!pip install optuna
#!pip install lightgbm
import optuna
from sklearn.model_selection import cross_val_score
import lightgbm as lgb
import xgboost as xgb
import numpy as np
import seaborn as sns
#!pip install --upgrade shap
import matplotlib.pyplot as plt
import shap
import pandas as pd
from sklearn.linear_model import LinearRegression

# Load the CSV file into a pandas DataFrame
train = pd.read_csv("/content/drive/MyDrive/CS 301/house-prices-advanced-regression-techniques/train.csv")

# Split the data into X (features) and y (target) 
X = train[["MSSubClass", "LotFrontage", "LotArea", "YearBuilt", "YearRemodAdd", "MasVnrArea", "BsmtFinSF1", "BsmtFinSF2", "BsmtUnfSF", "TotalBsmtSF", "1stFlrSF", "2ndFlrSF", "GrLivArea"]]

y = train[["SalePrice"]]

model = xgb.XGBRegressor().fit(X, y)

# Create an explainer object using the XGBoost model
explainer = shap.Explainer(model)

# Generate SHAP values for your dataset
shap_values = explainer(X)

def objective(trial):
    params = {
        'num_leaves': trial.suggest_int('num_leaves', 2, 256),
        'max_depth': trial.suggest_int('max_depth', 2, 64),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.001, 1),
        'n_estimators': trial.suggest_int('n_estimators', 50, 1000),
        'min_child_samples': trial.suggest_int('min_child_samples', 1, 100),
        'reg_alpha': trial.suggest_loguniform('reg_alpha', 1e-9, 10.0),
        'reg_lambda': trial.suggest_loguniform('reg_lambda', 1e-9, 10.0),
        'random_state': 42,
        'objective': 'regression',
        'metric': 'rmse'
    }
    lgbm = lgb.LGBMRegressor(**params)
    return cross_val_score(lgbm, X, y, cv=5, scoring='neg_root_mean_squared_error').mean()

study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=100)
print('Best Hyperparameters:', study.best_params)
print('Best RMSE:', -study.best_value)
best_params = study.best_params
best_lgbm = lgb.LGBMRegressor(**best_params)
best_lgbm.fit(X, y)


ntree_limit is deprecated, use `iteration_range` or model slicing instead.
[32m[I 2023-04-12 22:36:40,127][0m A new study created in memory with name: no-name-22c027ff-115e-4f59-b65f-a1f29bccf17a[0m
suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use :func:`~optuna.trial.Trial.suggest_float` instead.
suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use :func:`~optuna.trial.Trial.suggest_float` instead.
suggest_loguniform has been deprecated in v3.0.0. This feature will be removed in v6.0.0. See https://github.com/optuna/optuna/releases/tag/v3.0.0. Use :func:`~optuna.trial.Trial.suggest_float` instead.
[32m[I 2023-04-12 22:36:41,736][0m Trial 0 finished with value: -37536.725523477864 and parameters: {'num_leaves': 241, 'max_depth': 21, 'learning_rate': 0.006682615651605047, 'n_estimato

Best Hyperparameters: {'num_leaves': 169, 'max_depth': 44, 'learning_rate': 0.0010124339167544556, 'n_estimators': 83, 'min_child_samples': 25, 'reg_alpha': 0.0006338598354043378, 'reg_lambda': 1.0120470962516766e-05}
Best RMSE: 74499.10688260473
