In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import numpy as np
import optuna
from sklearn.model_selection  import train_test_split, cross_validate
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix

In [None]:
training_data = np.genfromtxt('dataset.csv', delimiter=',', dtype=np.int32)

In [None]:
X = training_data[:, :-1]
y = training_data[:, -1]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=101)

In [None]:
class Objective_DTC:
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __call__(self, trial):
        params = {
            'criterion': trial.suggest_categorical('criterion', ['gini', 'entropy']),
            'splitter': trial.suggest_categorical('splitter', ['best', 'random']),
            # 'max_features': trial.suggest_categorical('max_features', ['auto', 'sqrt', 'log2']),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 64),
            'max_depth': trial.suggest_int('max_depth', 2, 64),
        }
        
        model = DecisionTreeClassifier(**params)
        
        scores = cross_validate(model, self.X, self.y, cv=5, scoring='accuracy', n_jobs=-1)
        
        return scores['test_score'].mean()

In [None]:
objective = Objective_DTC(X_train, y_train)
study = optuna.create_study(direction='maximize')
study.optimize(objective, timeout=60)
print('params:', study.best_params)

In [None]:
model = DecisionTreeClassifier(**study.best_params)

model.fit(X_train, y_train)
pred = model.predict(X_test)

In [None]:
print('正解率: {:.5f}%'.format(100 * accuracy_score(y_test, pred)))
print('適合率: {:.5f}%'.format(100 * precision_score(y_test, pred)))
print('再現率: {:.5f}%'.format(100 * recall_score(y_test, pred)))
print(confusion_matrix(y_test, pred))