## Initialization

In [41]:
from rotation_forest import RotationForestClassifier
import numpy as np 
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score, confusion_matrix

## Data loading

In [42]:
data = load_breast_cancer()
X = data.data
y = data.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)
# scaler_x = MinMaxScaler().fit(X_train)
# X_train, X_test = scaler_x.transform(X_train), scaler_x.transform(X_test)

## Rotation Forest - Grid search

In [43]:
model = RotationForestClassifier(bootstrap=True)
hyperparameters = {'n_estimators':np.arange(5, 100, 5), 'criterion':['gini', 'entropy']}
new_model = GridSearchCV(model, hyperparameters)
new_model.fit(X_train, y_train)

GridSearchCV(estimator=RotationForestClassifier(bootstrap=True),
             param_grid={'criterion': ['gini', 'entropy'],
                         'n_estimators': array([ 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85,
       90, 95])})

## Evaluation

In [44]:
print('Best number of estimators:', new_model.best_estimator_.get_params()['n_estimators'])
print('Best criterion:', new_model.best_estimator_.get_params()['criterion'])
y_pred_acc = new_model.predict(X_test)
print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred_acc)))
print('Precision Score : ' + str(precision_score(y_test,y_pred_acc)))
print('Recall Score : ' + str(recall_score(y_test,y_pred_acc)))
print('F1 Score : ' + str(f1_score(y_test,y_pred_acc)))
print(confusion_matrix(y_pred_acc, y_test))

Best number of estimators: 50
Best criterion: gini
Accuracy Score : 0.9532163742690059
Precision Score : 0.9428571428571428
Recall Score : 0.9801980198019802
F1 Score : 0.9611650485436893
[[64  2]
 [ 6 99]]


## Random Forest - Grid search

In [45]:
model = RandomForestClassifier()
hyperparameters = {'n_estimators':np.arange(5, 100, 5), 'criterion':['gini', 'entropy']}
new_model = GridSearchCV(model, hyperparameters)
new_model.fit(X_train, y_train)

GridSearchCV(estimator=RandomForestClassifier(),
             param_grid={'criterion': ['gini', 'entropy'],
                         'n_estimators': array([ 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85,
       90, 95])})

## Evaluation

In [46]:
print('Best number of estimators:', new_model.best_estimator_.get_params()['n_estimators'])
print('Best criterion:', new_model.best_estimator_.get_params()['criterion'])
y_pred_acc = new_model.predict(X_test)
print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred_acc)))
print('Precision Score : ' + str(precision_score(y_test,y_pred_acc)))
print('Recall Score : ' + str(recall_score(y_test,y_pred_acc)))
print('F1 Score : ' + str(f1_score(y_test,y_pred_acc)))
print(confusion_matrix(y_pred_acc, y_test))

Best number of estimators: 30
Best criterion: gini
Accuracy Score : 0.9473684210526315
Precision Score : 0.9423076923076923
Recall Score : 0.9702970297029703
F1 Score : 0.9560975609756097
[[64  3]
 [ 6 98]]
