In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, f1_score
from sklearn.model_selection import cross_val_score
import optuna
import warnings
warnings.filterwarnings('ignore')

In [2]:
dataset = pd.read_csv('new_data.csv')

In [3]:
X = dataset.drop(axis=1, columns=['Y'])
y = dataset['Y']

### Optuna

In [4]:
def objective(trial):
    min_samples_split = trial.suggest_int("min_samples_split", 7, 11)
    n_estimators = trial.suggest_int("n_estimators", 150, 350)
    max_features = trial.suggest_float("max_features", 0.3, 0.9)
    criterion = trial.suggest_categorical("criterion", ['entropy', 'gini'])
    bootstrap = trial.suggest_categorical("bootstrap", [True, False])
   
    rf_model = RandomForestClassifier(
        criterion = criterion,
        bootstrap = bootstrap,
        max_features = max_features,
        min_samples_split = min_samples_split,
        n_estimators = n_estimators)
    score = cross_val_score(rf_model, X, y,  cv=5).mean()
    return score

In [5]:
study = optuna.create_study(direction = "maximize")
study.optimize(objective, n_trials = 10)
trial = study.best_trial

[32m[I 2022-04-19 00:26:44,664][0m A new study created in memory with name: no-name-2e2edd85-d4a7-4640-ba15-9b5fa1097443[0m
[32m[I 2022-04-19 00:28:18,479][0m Trial 0 finished with value: 0.7374654482740614 and parameters: {'min_samples_split': 8, 'n_estimators': 322, 'max_features': 0.468721255424552, 'criterion': 'entropy', 'bootstrap': True}. Best is trial 0 with value: 0.7374654482740614.[0m
[32m[I 2022-04-19 00:29:25,886][0m Trial 1 finished with value: 0.7353361418203024 and parameters: {'min_samples_split': 9, 'n_estimators': 229, 'max_features': 0.5439998909222009, 'criterion': 'entropy', 'bootstrap': False}. Best is trial 0 with value: 0.7374654482740614.[0m
[32m[I 2022-04-19 00:30:51,310][0m Trial 2 finished with value: 0.7371495867470583 and parameters: {'min_samples_split': 9, 'n_estimators': 319, 'max_features': 0.5115407792379894, 'criterion': 'entropy', 'bootstrap': False}. Best is trial 0 with value: 0.7374654482740614.[0m
[32m[I 2022-04-19 00:31:43,315][0

In [6]:
print("Best Score: ", trial.value)
print("Best Params: ")
for key, value in trial.params.items():
    print("  {}: {}".format(key, value))

Best Score:  0.7401456239454185
Best Params: 
  min_samples_split: 11
  n_estimators: 271
  max_features: 0.4950768786025981
  criterion: gini
  bootstrap: False
