# Machine Learning Model Selection


In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

In [4]:
features, targets = load_iris(return_X_y=True)

X_train, X_test, y_train, y_test = train_test_split(
    features,
    targets,
    test_size=0.3,
    random_state=987654321,
    stratify=targets,
)

## SVC Model

In [5]:
from sklearn.model_selection import RandomizedSearchCV
from sklearn.svm import SVC
from scipy.stats import uniform

params = {
    "C": uniform(0.1, 10),
    "kernel": ["linear", "rbf", "poly"],
    "degree": range(1, 5),
}

model = SVC(
    random_state=42,
)

random_search = RandomizedSearchCV(
    model,
    params,
    n_iter=100,
    cv=5,
    random_state=42,
    n_jobs=-1,
)

random_search.fit(X_train, y_train)
random_search.best_params_

{'C': 6.274815096277165, 'degree': 2, 'kernel': 'rbf'}

In [6]:
random_search.score(X_test, y_test)

0.9777777777777777

## GradientBoostingClassifier Model

In [7]:
from sklearn.ensemble import GradientBoostingClassifier

params = {
    "learning_rate": uniform(0.1, 1),
    "n_estimators": range(10, 100),
    "max_depth": range(1, 5),
    "min_samples_split": range(2, 10),
    "min_samples_leaf": range(1, 5),
    "max_features": [1.0, "sqrt", "log2"],
    "subsample": uniform(0.5, 0.5),
}

model = GradientBoostingClassifier(
    loss="log_loss",
    random_state=42,
)

random_search = RandomizedSearchCV(
    model,
    params,
    n_iter=100,
    cv=5,
    random_state=42,
    n_jobs=-1,
)

random_search.fit(X_train, y_train)
random_search.best_params_

{'learning_rate': 0.8901755405312056,
 'max_depth': 3,
 'max_features': 1.0,
 'min_samples_leaf': 2,
 'min_samples_split': 8,
 'n_estimators': 50,
 'subsample': 0.9574798377718904}

In [8]:
random_search.score(X_test, y_test)

0.9555555555555556

## RandomForestClassifier Model

In [9]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(
    random_state=42,
)

param_distributions = {
    'max_depth': np.arange(1, 11),
    'min_samples_split': np.arange(2, 11),
    'min_samples_leaf': np.arange(1, 11),
    'max_features': ['sqrt', 'log2']
}

random_search = RandomizedSearchCV(
    model,
    param_distributions,
    n_iter=100,
    cv=5,
    random_state=42,
    n_jobs=-1,
)

random_search.fit(X_train, y_train)
random_search.best_params_

{'min_samples_split': 2,
 'min_samples_leaf': 9,
 'max_features': 'log2',
 'max_depth': 1}

In [10]:
random_search.score(X_test, y_test)

0.9333333333333333