Referance: https://www.youtube.com/watch?v=HdlDYng8g9s

In [None]:
import pandas as pd

from sklearn import datasets
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, StratifiedKFold
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score, cohen_kappa_score
from sklearn.svm import SVC

In [None]:
def load_data():
    data_dict = datasets.load_iris()
    
    X = pd.DataFrame(
        data_dict['data'],
        columns=data_dict['feature_names']
    )
    y = pd.Series(
        data_dict['target'],
        name='target'
    )
    
    return X, y

In [None]:
X, y = load_data()

X.shape, y.shape

In [None]:
# Class distribution
y.value_counts()

In [None]:
def eval_classif(y_true, y_pred):
    cofmat_df = pd.DataFrame(confusion_matrix(y_true, y_pred))
    cofmat_df.index.name   = 'True'
    cofmat_df.columns.name = 'Pred'

    roc_auc = roc_auc_score(pd.get_dummies(y_true), pd.get_dummies(y_pred), multi_class='ovr')
    kappa   = cohen_kappa_score(y_true, y_pred)
    
    print(cofmat_df)
    print()
    print(classification_report(y_true, y_pred, digits=5))
    print(f'ROC-AUC: {roc_auc : .5f}')
    print(f'Kappa:   {kappa :.5f}')

# Best Hyperparameter (GridSearchCV)

In [None]:
search = GridSearchCV(
    estimator=SVC(random_state=0),
    param_grid={
        'C': [1, 10, 20, 30, 40, 50],
        'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
        'gamma': ['scale', 'auto'],
    },
    scoring='f1_weighted',
    cv=StratifiedKFold(n_splits=10),
    n_jobs=-1,
    verbose=1,
)
search.fit(X, y)

In [None]:
result_df = pd.DataFrame(search.cv_results_)
result_df[['params', 'mean_test_score', 'rank_test_score']].sort_values(by='mean_test_score', ascending=False)

In [None]:
search.best_params_

In [None]:
svc = SVC(**search.best_params_, random_state=0)
svc.fit(X, y)

eval_classif(y, svc.predict(X))

# Best Hyperparameter (RandomizedSearchCV)

In [None]:
search = RandomizedSearchCV(
    estimator=SVC(random_state=0),
    param_distributions={
        'C': [1, 10, 20, 30, 40, 50],
        'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
        'gamma': ['scale', 'auto'],
    },
    scoring='f1_weighted',
    cv=StratifiedKFold(n_splits=10),
    n_jobs=-1,
    verbose=1,
    n_iter=10,
    random_state=0
)
search.fit(X, y)

In [None]:
result_df = pd.DataFrame(search.cv_results_)
result_df[['params', 'mean_test_score', 'rank_test_score']].sort_values(by='mean_test_score', ascending=False)

In [None]:
search.best_params_

In [None]:
svc = SVC(**search.best_params_, random_state=0)
svc.fit(X, y)

eval_classif(y, svc.predict(X))

# Best Model

In [None]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [None]:
model_dict = {
    'svc': {
        'model': SVC(random_state=0),
        'param_dict': {
            'C': [1, 10, 20, 30, 40, 50],
            'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
            'gamma': ['scale', 'auto'],
        }
    },
    'random_forest': {
        'model': RandomForestClassifier(random_state=0, n_jobs=-1),
        'param_dict': {
            'n_estimators': [10, 50, 100, 200, 300],
            'criterion': ['gini', 'entropy'],
            'max_depth': [None, 5, 10, 15, 20],
            'min_samples_split': [2, 5, 10, 15, 20],
            'min_samples_leaf': [1, 5, 10, 15, 20],
            'max_features': ['auto', 'sqrt', 'log2'],
            'max_leaf_nodes': [None, 10, 20, 30],
            'bootstrap': [True, False]
        }
    },
    'logistic_regression': {
        'model': LogisticRegression(max_iter=1_000, random_state=0, n_jobs=-1),
        'param_dict': {
            'penalty': ['l1', 'l2', 'elasticnet', 'none'],
            'C': [1, 10, 20, 30, 40, 50],
            'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
            'l1_ratio': [None, .5, .25, .75]
        }
    }
}

results = []
for k,v in model_dict.items():
    print(f'Searching {k}:')
    
    search = RandomizedSearchCV(
        estimator=v['model'],
        param_distributions=v['param_dict'],
        scoring='f1_weighted',
        cv=StratifiedKFold(n_splits=10),
        n_jobs=-1,
        verbose=1,
        n_iter=100,
        random_state=0
    )
    search.fit(X, y)
    
    results.append({
        'model': k,
        'best_score': search.best_score_,
        'best_params': search.best_params_,
    })

In [None]:
result_df = pd.DataFrame(results)
result_df.sort_values(by='best_score', ascending=False)