In [1]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import make_scorer, accuracy_score, precision_score, recall_score
import pandas as pd

In [2]:
dataset = pd.read_csv('processedData.csv')
target = dataset['target']
data = dataset.drop(columns='target')

In [3]:
gridParameters = [{
    'n_estimators': [10, 50, 100],
    'criterion': ['gini', 'entropy'],
    'min_samples_split': [2,4,6,8,10],
    'min_samples_leaf': [1,2,3,4,5],
    'bootstrap': [True, False],
    'max_features': [None, 'sqrt', 'log2']
}]

In [4]:
scoring = {
    'Accuracy': make_scorer(accuracy_score), 
    'Precision': make_scorer(precision_score, average='macro'), 
    'Recall': make_scorer(recall_score, average='macro')
}
gs = GridSearchCV(
    RandomForestClassifier(),
    gridParameters,
    verbose=10,
    cv=5,
    n_jobs=-1,
    scoring= scoring,
    refit = "Recall"
)
gs.fit(X=data, y=target)

#bestEstimators = gs.best_estimator_
#finalAcuraccy = gs.cv_results_["mean_test_score"][gs.best_index_]
#stdDeviation = gs.cv_results_["std_test_score"][gs.best_index_]

Fitting 5 folds for each of 900 candidates, totalling 4500 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:    3.3s
[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:    4.1s
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:    4.9s
[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed:    5.7s
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:    6.7s
[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed:    7.8s
[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed:    8.7s
[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed:   10.1s
[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed:   11.8s
[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed:   13.4s
[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed:   15.1s
[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed:   16.6s
[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed:   18.4s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:   20.0s
[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed:   

GridSearchCV(cv=5, estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid=[{'bootstrap': [True, False],
                          'criterion': ['gini', 'entropy'],
                          'max_features': [None, 'sqrt', 'log2'],
                          'min_samples_leaf': [1, 2, 3, 4, 5],
                          'min_samples_split': [2, 4, 6, 8, 10],
                          'n_estimators': [10, 50, 100]}],
             refit='Recall',
             scoring={'Accuracy': make_scorer(accuracy_score),
                      'Precision': make_scorer(precision_score, average=macro),
                      'Recall': make_scorer(recall_score, average=macro)},
             verbose=10)

In [5]:
bestEstimators = gs.best_estimator_
finalAcuraccy = gs.cv_results_["mean_test_Accuracy"][gs.best_index_]
finalPrecision = gs.cv_results_["mean_test_Precision"][gs.best_index_]
finalRecall = gs.cv_results_["mean_test_Recall"][gs.best_index_]
stdDeviAcuraccy = gs.cv_results_["std_test_Accuracy"][gs.best_index_] 
stdDeviPrecision = gs.cv_results_["std_test_Precision"][gs.best_index_] 
stdDeviRecall = gs.cv_results_["std_test_Recall"][gs.best_index_] 

In [6]:
print(f'Accuracy: {round(finalAcuraccy, 3)}')
print(f'std Deviation Accuracy: {round(stdDeviAcuraccy, 2)}')
print(f'Recall: {round(finalRecall, 3)}')
print(f'std Deviation Accuracy: {round(stdDeviRecall, 2)}')
print(f'Accuracy: {round(finalPrecision, 3)}')
print(f'std Deviation Accuracy: {round(stdDeviPrecision, 2)}')

Accuracy: 0.89
std Deviation Accuracy: 0.03
Recall: 0.748
std Deviation Accuracy: 0.05
Accuracy: 0.794
std Deviation Accuracy: 0.07


In [7]:
arquivoResultados = "RandomForestResults.txt"
objFile = open(arquivoResultados, "a")
objFile.write("RF Results \n")
objFile.write(str(bestEstimators).replace("\n", "").replace(" ", "") + "\n")
objFile.write(str(finalAcuraccy)+"\n")
objFile.write(str(stdDeviAcuraccy)+"\n")
objFile.write(str(finalRecall)+"\n")
objFile.write(str(stdDeviRecall)+"\n")
objFile.write(str(finalPrecision)+"\n")
objFile.write(str(stdDeviPrecision)+"\n")
objFile.close()