# Hyperparameter Tuning with Optuna

In [2]:
import pandas as pd
import seaborn as sns

from sklearn import metrics
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor

import optuna

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
df = sns.load_dataset('healthexp')

## Data Exploration

In [4]:
df.head()

Unnamed: 0,Year,Country,Spending_USD,Life_Expectancy
0,1970,Germany,252.311,70.6
1,1970,France,192.143,72.2
2,1970,Great Britain,123.993,71.9
3,1970,Japan,150.437,72.0
4,1970,USA,326.961,70.9


In [5]:
df.shape

(274, 4)

In [6]:
df.describe()

Unnamed: 0,Year,Spending_USD,Life_Expectancy
count,274.0,274.0,274.0
mean,1996.992701,2789.338905,77.909489
std,14.180933,2194.939785,3.276263
min,1970.0,123.993,70.6
25%,1985.25,1038.357,75.525
50%,1998.0,2295.578,78.1
75%,2009.0,4055.61,80.575
max,2020.0,11859.179,84.7


## Model Preparation

### Create Binary Numbers for Categorical Column

In [7]:
df = pd.get_dummies(df)

In [8]:
df

Unnamed: 0,Year,Spending_USD,Life_Expectancy,Country_Canada,Country_France,Country_Germany,Country_Great Britain,Country_Japan,Country_USA
0,1970,252.311,70.6,False,False,True,False,False,False
1,1970,192.143,72.2,False,True,False,False,False,False
2,1970,123.993,71.9,False,False,False,True,False,False
3,1970,150.437,72.0,False,False,False,False,True,False
4,1970,326.961,70.9,False,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...
269,2020,6938.983,81.1,False,False,True,False,False,False
270,2020,5468.418,82.3,False,True,False,False,False,False
271,2020,5018.700,80.4,False,False,False,True,False,False
272,2020,4665.641,84.7,False,False,False,False,True,False


### Assign the Features into A Set 'X' and Assign the Target Variables to 'Y'

In [9]:
X = df.drop(['Life_Expectancy'], axis =1)

In [10]:
y = df['Life_Expectancy']

### Split the Data Between Training & Set

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.2 , random_state = 54)

### Model Results Predection

In [12]:
def modelresults(predictions):
    mae = mean_absolute_error(y_test, predictions)
    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    
    print('Mean absolute error on model is {:.4f}'.format(mae))
    print('')
    print('Mean squared error on model is {:.4f}'.format(mse))
    print('')
    print('The r2 score on model is {:.4f}'.format(r2))

### Random Forest Regressor Model

In [13]:
rfr = RandomForestRegressor(random_state = 34)

In [14]:
rfr.fit(X_train, y_train)

In [15]:
y_pred_rfr_fit = rfr.predict(X_test)

In [16]:
modelresults(y_pred_rfr_fit)

Mean absolute error on model is 0.3114

Mean squared error on model is 0.1553

The r2 score on model is 0.9836


The results for this model is great. but for practice purposes, we will adapt the **Optuna model** in order to imporve the results by modifying the hyper parameters.

## Optuna Model with 200 Trails

In [17]:

def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 100, 1000)
    max_depth = trial.suggest_int('max_depth', 10, 50)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 32)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 32)
    max_features = trial.suggest_categorical('max_features', ['sqrt', 'log2'])
    criterion = trial.suggest_categorical('criterion', ["squared_error", "absolute_error", "friedman_mse", "poisson"])


    model = RandomForestRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_features=max_features,
        criterion=criterion,
        random_state=34  # Set a random state for reproducibility
    )

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # You can choose any metric you want to optimize, for example, mean_squared_error
    score = mean_squared_error(y_test, y_pred)
    
    return score

study = optuna.create_study(direction='minimize', sampler=optuna.samplers.RandomSampler(seed=42))
study.optimize(objective, n_trials=200)

# Print the best parameters found by Optuna
print("Best trial:")
trial = study.best_trial

print("Value: {:.4f}".format(trial.value))

print("Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


[I 2024-01-30 22:30:08,286] A new study created in memory with name: no-name-2fd61ce8-87cb-46f3-a8ec-9f2c01243b79
[I 2024-01-30 22:30:09,116] Trial 0 finished with value: 3.0322819936505816 and parameters: {'n_estimators': 437, 'max_depth': 48, 'min_samples_split': 24, 'min_samples_leaf': 20, 'max_features': 'sqrt', 'criterion': 'absolute_error'}. Best is trial 0 with value: 3.0322819936505816.
[I 2024-01-30 22:30:09,320] Trial 1 finished with value: 1.2943417839701201 and parameters: {'n_estimators': 118, 'max_depth': 49, 'min_samples_split': 27, 'min_samples_leaf': 7, 'max_features': 'log2', 'criterion': 'absolute_error'}. Best is trial 1 with value: 1.2943417839701201.
[I 2024-01-30 22:30:10,249] Trial 2 finished with value: 1.6878654638265322 and parameters: {'n_estimators': 651, 'max_depth': 15, 'min_samples_split': 11, 'min_samples_leaf': 12, 'max_features': 'log2', 'criterion': 'friedman_mse'}. Best is trial 1 with value: 1.2943417839701201.
[I 2024-01-30 22:30:10,967] Trial 3 f

Best trial:
Value: 0.1314
Params: 
    n_estimators: 366
    max_depth: 19
    min_samples_split: 3
    min_samples_leaf: 1
    max_features: sqrt
    criterion: poisson


In [18]:
study.best_params

{'n_estimators': 366,
 'max_depth': 19,
 'min_samples_split': 3,
 'min_samples_leaf': 1,
 'max_features': 'sqrt',
 'criterion': 'poisson'}

In [19]:
study.best_params

{'n_estimators': 366,
 'max_depth': 19,
 'min_samples_split': 3,
 'min_samples_leaf': 1,
 'max_features': 'sqrt',
 'criterion': 'poisson'}

In [20]:
best_params = study.best_params

## Optuna Visualizations

### Optimization History Plot

In [21]:
optuna.visualization.plot_optimization_history(study)


The Optimization History Plot visualizes the evolution of hyperparameter search, offering insights into the effectiveness and efficiency of the optimization process. This graphical representation aids in assessing whether the objective function improves over trials or quickly converges to an optimal solution. By analyzing the plot, decisions can be made regarding the need for additional trials or whether a satisfactory result has been achieved.

The graphical analysis indicates that our model attained its **optimal value by the 107th trial**, with subsequent trials maintaining the same objective values. In summary, this plot serves as a visual summary of the hyperparameter optimization study, showcasing how the objective function evolves with each trial and providing valuable information about the optimization process's efficiency and effectiveness.






### Parallel Coordinate Plot

Parallel Coordinate Plot is used to generate a parallel coordinates plot, which is a type of visualization that helps to understand the **relationships between hyperparameters and their corresponding objective function values** in a hyperparameter optimization study.

this is a useful tool for gaining insights into **the relationships between hyperparameters and their impact on the objective function during an optimization study.** It helps us to **identify promising hyperparameter configurations** and understand **the trade-offs and interactions between different hyperparameters.** This can aid in making decisions on which hyperparameter settings to prioritize for further optimization or model tuning.

In [22]:
optuna.visualization.plot_parallel_coordinate(study)

The plot illustrates serval important aspects:

1) **Hyperparameter Relationships:** Each vertical axis represents a hyperparameter, and the horizontal lines connecting different axes illustrate how the values of these hyperparameters relate to each other during the optimization process. You can see how certain hyperparameter values are chosen together or in relation to one another.

2. **Objective Function Values:** The color and thickness of each line segment in the plot represent the objective function value for a specific trial or combination of hyperparameters. Darker, thicker lines often indicate better objective function values, while lighter, thinner lines correspond to poorer results.

3. **Optimal Configurations:** By visually inspecting the plot, we can identify regions of the plot where the lines converge, indicating successful configurations of hyperparameters that led to good objective function values. This helps you find optimal or promising combinations of hyperparameters.

4. **Divergence and Exploration:** The spread of lines across the plot can also indicate how widely you explored the hyperparameter search space. Tight clusters suggest that the search was focused, while scattered lines may indicate that the search space was thoroughly explored.

### Slice Plot

These Plots provide insights into **the relationship between specific hyperparameters and the objective function value.** It visualizes how a particular hyperparameter or a combination of hyperparameters affects the optimization process.

In [23]:
optuna.visualization.plot_slice(study, params = ['n_estimators',
                                                 'max_depth',
                                                 'min_samples_split',
                                                 'min_samples_leaf',
                                                 'max_features',
                                                 'criterion',])

### Hyperparameter Importances

This plot can provide valuable insights into **which hyperparameters had the most significant impact on the objective function** and the overall model performance. 

In [24]:
optuna.visualization.plot_param_importances(study)

### Assigning the Best Hyperparameters

In [25]:
best_n_estimators = best_params['n_estimators']
best_n_estimators

366

In [26]:
best_max_depth = best_params['max_depth']
best_max_depth


19

In [27]:
best_min_samples_split = best_params['min_samples_split']
best_min_samples_split

3

In [28]:
best_min_samples_leaf = best_params['min_samples_leaf']
best_min_samples_leaf

1

In [29]:
best_max_features = best_params['max_features']
best_max_features

'sqrt'

In [30]:
best_criterion = best_params['criterion']
best_criterion

'poisson'

## Ramdom Forest Model with The Best HyperParatmeters

In [31]:
best_model = RandomForestRegressor( n_estimators = best_n_estimators,
                                  max_depth = best_max_depth,
                                  min_samples_split = best_min_samples_split,
                                  min_samples_leaf =  best_min_samples_leaf,
                                  )

In [32]:
%%time
best_model.fit(X_train, y_train)

CPU times: total: 734 ms
Wall time: 1.18 s


In [33]:
y_pred_rfr_optuna_200_trails = best_model.predict(X_test)

In [34]:
modelresults(y_pred_rfr_optuna_200_trails)

Mean absolute error on model is 0.3235

Mean squared error on model is 0.1662

The r2 score on model is 0.9825


We achieved a better MAE & MSE scores by tuning our hyperparameters with the Optuna model. However, the r2 score is not as high as the previous models we used for this dataset.

## Optuna Model with 1000 Trails

In [35]:
def objective(trial):
    n_estimators = trial.suggest_int('n_estimators', 100, 1000)
    max_depth = trial.suggest_int('max_depth', 10, 50)
    min_samples_split = trial.suggest_int('min_samples_split', 2, 32)
    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 32)
    max_features = trial.suggest_categorical('max_features', ['sqrt', 'log2'])
    criterion = trial.suggest_categorical('criterion', ["squared_error", "absolute_error", "friedman_mse", "poisson"])


    model = RandomForestRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_split=min_samples_split,
        min_samples_leaf=min_samples_leaf,
        max_features=max_features,
        criterion=criterion,
        random_state=34  # Set a random state for reproducibility
    )

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # You can choose any metric you want to optimize, for example, mean_squared_error
    score = mean_squared_error(y_test, y_pred)
    
    return score

study = optuna.create_study(direction='minimize', sampler=optuna.samplers.RandomSampler(seed=42))
study.optimize(objective, n_trials=1000)

# Print the best parameters found by Optuna
print("Best trial:")
trial = study.best_trial

print("Value: {:.4f}".format(trial.value))

print("Params: ")
for key, value in trial.params.items():
    print("    {}: {}".format(key, value))


[I 2024-01-30 22:32:27,787] A new study created in memory with name: no-name-cf693ed9-e861-4d18-a3fe-a7cbc988628d
[I 2024-01-30 22:32:28,664] Trial 0 finished with value: 3.0322819936505816 and parameters: {'n_estimators': 437, 'max_depth': 48, 'min_samples_split': 24, 'min_samples_leaf': 20, 'max_features': 'sqrt', 'criterion': 'absolute_error'}. Best is trial 0 with value: 3.0322819936505816.
[I 2024-01-30 22:32:28,913] Trial 1 finished with value: 1.2943417839701201 and parameters: {'n_estimators': 118, 'max_depth': 49, 'min_samples_split': 27, 'min_samples_leaf': 7, 'max_features': 'log2', 'criterion': 'absolute_error'}. Best is trial 1 with value: 1.2943417839701201.
[I 2024-01-30 22:32:29,691] Trial 2 finished with value: 1.6878654638265322 and parameters: {'n_estimators': 651, 'max_depth': 15, 'min_samples_split': 11, 'min_samples_leaf': 12, 'max_features': 'log2', 'criterion': 'friedman_mse'}. Best is trial 1 with value: 1.2943417839701201.
[I 2024-01-30 22:32:30,363] Trial 3 f

Best trial:
Value: 0.1314
Params: 
    n_estimators: 366
    max_depth: 19
    min_samples_split: 3
    min_samples_leaf: 1
    max_features: sqrt
    criterion: poisson


In [36]:
optuna.visualization.plot_optimization_history(study)


In [37]:
optuna.visualization.plot_parallel_coordinate(study)

In [38]:
optuna.visualization.plot_slice(study, params = ['n_estimators',
                                                 'max_depth',
                                                 'min_samples_split',
                                                 'min_samples_leaf',
                                                 'max_features',
                                                 'criterion',])

In [39]:
optuna.visualization.plot_param_importances(study)

In [40]:

best_n_estimators = best_params['n_estimators']
best_n_estimators

366

In [41]:
best_max_depth = best_params['max_depth']
best_max_depth


19

In [42]:
best_min_samples_split = best_params['min_samples_split']
best_min_samples_split

3

In [43]:

best_min_samples_leaf = best_params['min_samples_leaf']
best_min_samples_leaf

1

In [44]:
best_max_features = best_params['max_features']
best_max_features

'sqrt'

In [45]:
best_criterion = best_params['criterion']
best_criterion

'poisson'

In [46]:
best_model = RandomForestRegressor( n_estimators = best_n_estimators,
                                  max_depth = best_max_depth,
                                  min_samples_split = best_min_samples_split,
                                  min_samples_leaf =  best_min_samples_leaf,
                                  )


In [47]:
best_model.fit(X_train, y_train)


In [48]:
y_pred_rfr_optuna_1000_trails = best_model.predict(X_test)

In [49]:
modelresults(y_pred_rfr_optuna_1000_trails)

Mean absolute error on model is 0.3310

Mean squared error on model is 0.1696

The r2 score on model is 0.9821


## Create a Table of Results

In [50]:



def modelresults(predictions, model_name):
    mae = mean_absolute_error(y_test, predictions)
    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)

    results_dict = {
        'Model': model_name,
        'MAE': mae,
        'MSE': mse,
        'R2 Score': r2
    }
    
    print('Results for {}:'.format(model_name))
    print('Mean absolute error on model is {:.4f}'.format(mae))
    print('Mean squared error on model is {:.4f}'.format(mse))
    print('The r2 score on model is {:.4f}'.format(r2))
    
    return results_dict



results_fit = modelresults(y_pred_rfr_fit, 'Random Forest Fit')
results_optuna_200 = modelresults(y_pred_rfr_optuna_200_trails, 'Random Forest Optuna (200 trials)')
results_optuna_1000 = modelresults(y_pred_rfr_optuna_1000_trails, 'Random Forest Optuna (1000 trials)')

results_df = pd.DataFrame([results_fit, results_optuna_200, results_optuna_1000])
print(results_df)


Results for Random Forest Fit:
Mean absolute error on model is 0.3114
Mean squared error on model is 0.1553
The r2 score on model is 0.9836
Results for Random Forest Optuna (200 trials):
Mean absolute error on model is 0.3235
Mean squared error on model is 0.1662
The r2 score on model is 0.9825
Results for Random Forest Optuna (1000 trials):
Mean absolute error on model is 0.3310
Mean squared error on model is 0.1696
The r2 score on model is 0.9821
                                Model       MAE       MSE  R2 Score
0                   Random Forest Fit  0.311382  0.155324  0.983623
1   Random Forest Optuna (200 trials)  0.323495  0.166158  0.982481
2  Random Forest Optuna (1000 trials)  0.331046  0.169620  0.982116
