In [94]:
import numpy as np
import pandas as pd
#import time

from sklearn.ensemble.forest import RandomForestClassifier
from sklearn.ensemble.forest import RandomForestRegressor

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import f1_score
from sklearn.metrics import mean_squared_error

from hyperopt import hp, space_eval, fmin, tpe, hp, STATUS_OK, Trials


### Get Data and Split into Train/Test

In [2]:
df = pd.read_csv("../Dataset/heart_target.csv")

In [3]:
def split_df(df,target,val_size=0.3):
    X_lst = list(df.columns)
    X_lst.remove(target)
    X = df[X_lst]
    y = df[target]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=val_size)
    return X_train, X_test, y_train, y_test
X_train, X_test, y_train, y_test = split_df(df, 'target')
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

### Define param_space

In [100]:
rf_cls_param_space = {
    'max_depth': hp.choice('max_depth', range(1,20)),
    'max_features': hp.choice('max_features', range(1,len(X_train.columns))),
    'n_estimators': hp.choice('n_estimators', range(100,500)),
    'criterion': hp.choice('criterion', ["gini", "entropy"])}
rf_reg_param_space = {
    'max_depth': hp.choice('max_depth', range(1,20)),
    'max_features': hp.choice('max_features', range(1,len(X_train.columns))),
    'n_estimators': hp.choice('n_estimators', range(100,500)),
    'criterion': hp.choice('criterion', ["mse", "mae"])}

param_space = {'RandomForestClassifier':rf_cls_param_space,
               'RandomForestRegressor':rf_reg_param_space,
               
              }

### Hyperparameter Tuning

In [73]:
'''
For this to work we need:
1. The algorithm object having .set_params method
2. str(algo_type).split('(')[0] Must return the name of the ObjectType
'''

################### For test
# algo_type = RandomForestClassifier()
# task_type = 'cls'
algo_type = RandomForestRegressor() #주어질 것
task_type = 'reg'                   #정해질 것
################################

In [88]:
def score_model_classification(model,params):
    return -1*cross_val_score(model.set_params(**params),X_train,y_train,cv=5,scoring='f1').mean()
def score_model_regression(model,params):
    ### cross_val_score()reuturns negative mse
    return -1 - cross_val_score(model.set_params(**params),X_train,y_train,cv=5,scoring='neg_mean_squared_error').mean()

def f_cls(params):
    score = score_model_classification(algo_type,params)
    print('score config (-f1):', score, params)
    return {'loss': score, 'status': STATUS_OK}
def f_reg(params):
    score = score_model_regression(algo_type,params)
    print('score config (-1 + mse):', score, params)
    return {'loss': score, 'status': STATUS_OK}


trials = Trials()
if task_type == 'reg':
    best = fmin(f_reg, param_space[str(algo_type).split('(')[0]], algo=tpe.suggest, max_evals=100, trials=trials)
elif task_type == 'cls':
    best = fmin(f_cls, param_space[str(algo_type).split('(')[0]], algo=tpe.suggest, max_evals=100, trials=trials)
print ('best:')
print (best)

score config:                                                                                                          
-0.8518204682800099                                                                                                    
{'criterion': 'mse', 'max_depth': 12, 'max_features': 11, 'n_estimators': 327}                                         
score config:                                                                                                          
-0.850614042772778                                                                                                     
{'criterion': 'mae', 'max_depth': 5, 'max_features': 7, 'n_estimators': 230}                                           
score config:                                                                                                          
-0.8529378281223051                                                                                                    
{'criterion': 'mae', 'max_depth': 17, 'm

{'criterion': 'mse', 'max_depth': 12, 'max_features': 3, 'n_estimators': 374}                                          
score config:                                                                                                          
-0.8633031959211785                                                                                                    
{'criterion': 'mse', 'max_depth': 18, 'max_features': 2, 'n_estimators': 460}                                          
score config:                                                                                                          
-0.8592654207455315                                                                                                    
{'criterion': 'mse', 'max_depth': 18, 'max_features': 3, 'n_estimators': 339}                                          
score config:                                                                                                          
-0.8512688035189153                     

-0.8505613569691243                                                                                                    
{'criterion': 'mse', 'max_depth': 2, 'max_features': 7, 'n_estimators': 203}                                           
score config:                                                                                                          
-0.8604552882034562                                                                                                    
{'criterion': 'mae', 'max_depth': 19, 'max_features': 2, 'n_estimators': 435}                                          
score config:                                                                                                          
-0.8569660859365691                                                                                                    
{'criterion': 'mse', 'max_depth': 11, 'max_features': 6, 'n_estimators': 146}                                          
score config:                           

score config:                                                                                                          
-0.862167987667888                                                                                                     
{'criterion': 'mse', 'max_depth': 18, 'max_features': 2, 'n_estimators': 409}                                          
score config:                                                                                                          
-0.8501633582194471                                                                                                    
{'criterion': 'mse', 'max_depth': 2, 'max_features': 4, 'n_estimators': 407}                                           
score config:                                                                                                          
-0.8610900635407448                                                                                                    
{'criterion': 'mse', 'max_depth': 11, 'm

{'criterion': 'mae', 'max_depth': 19, 'max_features': 5, 'n_estimators': 214}                                          
score config:                                                                                                          
-0.8620993620279549                                                                                                    
{'criterion': 'mse', 'max_depth': 11, 'max_features': 2, 'n_estimators': 383}                                          
score config:                                                                                                          
-0.8556238816818379                                                                                                    
{'criterion': 'mse', 'max_depth': 18, 'max_features': 7, 'n_estimators': 153}                                          
score config:                                                                                                          
-0.8545972714768042                     

### Train Model with optimized Hyperparameters

In [108]:
params = space_eval(param_space[str(algo_type).split('(')[0]],best)
algo_type.set_params(**params)
algo_type.fit(X_train,y_train)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=18,
           max_features=2, max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=422, n_jobs=None, oob_score=False,
           random_state=None, verbose=0, warm_start=False)

### Final Evaluation

In [109]:
def test_classification(model,params):
    y_pred = model.predict(X_test)
    return y_pred, f1_score(y_test, y_pred)
def test_regression(model,params):
    y_pred = model.predict(X_test)
    return y_pred, mean_squared_error(y_test, y_pred)

In [110]:
if task_type == 'cls':
    res,f1_score = test_classification(algo_type, best)
    print(res,f1_score)
else:
    res,mse = test_regression(algo_type,best)
    print(res,mse)

[0.90995261 0.5521327  0.68957346 0.03317536 0.66824645 0.48341232
 0.80805687 0.12796209 0.09952607 0.59004739 0.04976303 0.8056872
 0.97630332 0.10900474 0.92417062 0.87440758 0.61848341 0.51895735
 0.98341232 0.68483412 0.17535545 0.96445498 0.83175355 0.69905213
 0.56398104 0.8056872  0.38151659 0.6943128  0.8436019  0.90758294
 0.05687204 0.75829384 0.40521327 0.992891   0.68720379 0.7985782
 0.39099526 0.47156398 0.44075829 0.18246445 0.34834123 0.62559242
 0.51895735 0.13981043 0.42890995 0.9549763  0.90047393 0.8436019
 0.58293839 0.08056872 0.68483412 0.09952607 0.90758294 0.59952607
 0.77014218 0.0971564  0.57345972 0.44312796 0.91232227 0.76540284
 0.59241706 0.68246445 0.53080569 0.69905213 0.20616114 0.32938389
 0.61848341 0.96445498 0.00236967 0.53317536 0.31279621 0.9478673
 0.62559242 0.507109   0.01184834 0.06635071 0.88862559 0.23696682
 0.38388626 0.88625592 0.4028436  0.76066351 0.71327014 0.84597156
 0.88388626 0.12085308 0.59241706 0.93838863 0.2464455  0.51895735

In [111]:
#Final Evaluation
def test_classification(model,params):
    y_pred = model.predict(X_test)
    return y_pred, f1_score(y_test, y_pred)
def test_regression(model,params):
    y_pred = model.predict(X_test)
    return y_pred, mean_squared_error(y_test, y_pred)

y_pred = None
if task_type == 'cls':
    y_pred, f1_score = test_classification(algo_type, best)
    print(f1_score)
else:
    y_pred, mse = test_regression(algo_type,best)
    print(mse)
return algo_type

0.1122393531537531


RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=18,
           max_features=2, max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           n_estimators=422, n_jobs=None, oob_score=False,
           random_state=None, verbose=0, warm_start=False)