# Spaceship. Part 5. (continued)
## Hyperparameters tuning 

Here we'll proceed with hyperparameters searching process described in ['05_hyperparameters.ipynb'](05_hyperparameters.ipynb).\

This notebook can be re-run over and over to continue searching.

Choose running time:

In [None]:
HOURS = 0
MINUTES = 0
SECONDS = 10

RUNNING_TIME = HOURS * 3600 + MINUTES * 60 + SECONDS

Let's load our data, our Optuna study and define all the nesessary functions:

In [None]:
# Random seed for reproducibility
SEED = 123

import pandas as pd
import numpy as np
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import StratifiedKFold
import joblib

train = pd.read_csv('04_train_prepared.csv', index_col=0)
test =  pd.read_csv('04_test_prepared.csv', index_col=0)
scores_df = pd.read_csv('05_scores_df.csv', index_col=0)
test_Ids = pd.read_csv('test_Ids.csv', index_col=0).reset_index(drop=True)

train['Transported'] = [1 if i else 0 for i in train['Transported']]

study = joblib.load("05_RF.pkl")

print('Before current session: ')
print("Best trial:", study.best_trial.number)
print("Best average cross-validation ROC AUC:", study.best_trial.value)
print("Best hyperparameters:", study.best_params)

def train_evaluate(params):
    '''
    This function takes  parameters for a classifier.
    
    It returns average cross-validated ROC AUC score.
    '''
    
    # Prepare our best estimator for training
    from sklearn.ensemble import RandomForestClassifier
    model = RandomForestClassifier(random_state=SEED,
                               n_estimators= 100,
                               n_jobs=-1
                               )


    # Set parameters for the model
    model.set_params(**params)
    
    # Create a StratifiedKFold object (6 splits with equal proportion of positive target values)
    skf = StratifiedKFold(n_splits=6, shuffle=True, random_state=SEED)
    
    # An empty list for collecting scores
    test_roc_auc_scores = []
    
    # Iterate through folds
    for train_index, cv_index in skf.split(train.drop('Transported', axis=1), train['Transported']):
        # Obtain training and testing folds
        cv_train, cv_test = train.iloc[train_index], train.iloc[cv_index]
        
        # Fit the model
        model.fit(cv_train.drop('Transported', axis=1), cv_train['Transported']) 
        
        # Calculate ROC AUC score and append to the scores lists
        test_pred_proba = model.predict_proba(cv_test.drop('Transported', axis=1))[:, 1]
        test_roc_auc_scores.append(roc_auc_score(cv_test['Transported'], test_pred_proba))
        
    return np.mean(test_roc_auc_scores)
        
