<a href="https://colab.research.google.com/github/Davron030901/Scikit_learning/blob/main/Mastering_Hyperparameter_Tuning_with_Optuna_Boost_Your_Machine_Learning_Models.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import seaborn as sns

In [None]:
import pandas as pd

In [None]:
healthexp=sns.load_dataset('healthexp')

In [None]:
healthexp.head()

Unnamed: 0,Year,Country,Spending_USD,Life_Expectancy
0,1970,Germany,252.311,70.6
1,1970,France,192.143,72.2
2,1970,Great Britain,123.993,71.9
3,1970,Japan,150.437,72.0
4,1970,USA,326.961,70.9


In [None]:
healthexp=pd.get_dummies(healthexp,dtype=int)

In [None]:
healthexp.head()

Unnamed: 0,Year,Spending_USD,Life_Expectancy,Country_Canada,Country_France,Country_Germany,Country_Great Britain,Country_Japan,Country_USA
0,1970,252.311,70.6,0,0,1,0,0,0
1,1970,192.143,72.2,0,1,0,0,0,0
2,1970,123.993,71.9,0,0,0,1,0,0
3,1970,150.437,72.0,0,0,0,0,1,0
4,1970,326.961,70.9,0,0,0,0,0,1


In [None]:
X=healthexp.drop(['Life_Expectancy'],axis=1)

In [None]:
y=healthexp['Life_Expectancy']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=54)

In [None]:
from sklearn.ensemble import RandomForestRegressor

In [None]:
rfr=RandomForestRegressor(random_state=34)

In [None]:
rfr.fit(X_train,y_train)

In [None]:
y_pred=rfr.predict(X_test)

In [None]:
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score

In [None]:
mean_absolute_error(y_test,y_pred)

0.31138181818180044

In [None]:
mean_squared_error(y_test,y_pred)

0.1553235999999905

In [None]:
r2_score(y_test,y_pred)

0.9836234548107303

In [None]:
!pip install optuna



In [None]:
import optuna

In [None]:
from sklearn.model_selection import cross_val_score

In [None]:
def objective(trial):
  n_estimators = trial.suggest_int('n_estimators', 100, 1000)
  max_depth = trial.suggest_int('max_depth', 10, 50)
  min_samples_split = trial.suggest_int('min_samples_split', 2, 32)
  min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 32)
  model = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf)
  score = cross_val_score(model, X, y, n_jobs=-1, cv=5, scoring='neg_mean_squared_error').mean()
  return score

In [None]:
study = optuna.create_study(direction='maximize', sampler=optuna.samplers.RandomSampler(seed=42)) # Default is random Search
study.optimize(objective, n_trials=100)

[I 2024-12-09 16:18:57,037] A new study created in memory with name: no-name-f5222964-8945-4027-ac72-7343cb206c1e
[I 2024-12-09 16:19:01,714] Trial 0 finished with value: -4.545156363742535 and parameters: {'n_estimators': 437, 'max_depth': 48, 'min_samples_split': 24, 'min_samples_leaf': 20}. Best is trial 0 with value: -4.545156363742535.
[I 2024-12-09 16:19:04,121] Trial 1 finished with value: -5.156849067939029 and parameters: {'n_estimators': 240, 'max_depth': 16, 'min_samples_split': 3, 'min_samples_leaf': 28}. Best is trial 0 with value: -4.545156363742535.
[I 2024-12-09 16:19:08,909] Trial 2 finished with value: -5.557062527409069 and parameters: {'n_estimators': 641, 'max_depth': 39, 'min_samples_split': 2, 'min_samples_leaf': 32}. Best is trial 0 with value: -4.545156363742535.
[I 2024-12-09 16:19:14,178] Trial 3 finished with value: -3.0354376796950095 and parameters: {'n_estimators': 850, 'max_depth': 18, 'min_samples_split': 7, 'min_samples_leaf': 6}. Best is trial 3 with 

In [None]:
study.best_params

{'n_estimators': 358,
 'max_depth': 34,
 'min_samples_split': 2,
 'min_samples_leaf': 2}

In [None]:
best_params=study.best_params

In [None]:
import matplotlib.pyplot as plt

In [None]:
optuna.visualization.plot_optimization_history(study)

In [None]:
optuna.visualization.plot_parallel_coordinate(study)

In [None]:
optuna.visualization.plot_slice(study,params=['n_estimators','max_depth','min_samples_split','min_samples_leaf'])

In [None]:
optuna.visualization.plot_param_importances(study)

In [None]:
best_n_estimators=best_params['n_estimators']
best_max_depth=best_params['max_depth']
best_min_samples_split=best_params['min_samples_split']
best_min_samples_leaf=best_params['min_samples_leaf']

In [None]:
best_model=RandomForestRegressor(n_estimators=best_n_estimators,
                                 max_depth=best_max_depth,
                                 min_samples_split=best_min_samples_split,
                                 min_samples_leaf=best_min_samples_leaf)

In [None]:
best_model.fit(X_train,y_train)

In [None]:
y_pred=best_model.predict(X_test)

In [None]:
mean_absolute_error(y_test,y_pred)

0.37089871965683663

In [None]:
mean_squared_error(y_test,y_pred)

0.20439877287319483

In [None]:
r2_score(y_test,y_pred)

0.9784492135091555