In [1]:
# Import necessary libraries
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from scipy.stats import expon, reciprocal
import joblib

In [2]:
# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split the dataset into a training set and a test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Grid Search cross validation

In [8]:
# Create a pipeline integrating GridSearch
SVM_grid_search = Pipeline([
    ('scaler', StandardScaler()), 
    ('svm', GridSearchCV(
        estimator=SVC(random_state=42),
        param_grid={
            'C': [0.1, 1, 10, 100], 
            'gamma': [1, 0.1, 0.01, 0.001],
            'kernel': ['rbf', 'linear']
        },
        cv=5,
        verbose=0
        )
    )
])
SVM_grid_search.fit(X_train, y_train)
print(f"GridSearchCV Best parameters: {SVM_grid_search.named_steps['svm'].best_params_}")
print(f"GridSearchCV Best cross-validation score: {SVM_grid_search.named_steps['svm'].best_score_:.3f}")

GridSearchCV Best parameters: {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}
GridSearchCV Best cross-validation score: 0.967


## Randomized Search cross validation

In [9]:
# Create a pipeline integrating GridSearch
SVM_rand_search = Pipeline([
    ('scaler', StandardScaler()), 
    ('svm', RandomizedSearchCV(
        estimator=SVC(random_state=42),
        param_distributions={
            'C': reciprocal(0.1, 100), 
            'gamma': expon(scale=1.0)
        },
        n_iter=50,
        cv=5,
        verbose=0,
        random_state=42
        )
    )
])
SVM_rand_search.fit(X_train, y_train)
print(f"GridSearchCV Best parameters: {SVM_rand_search.named_steps['svm'].best_params_}")
print(f"GridSearchCV Best cross-validation score: {SVM_rand_search.named_steps['svm'].best_score_:.3f}")

GridSearchCV Best parameters: {'C': 5.9874749104613985, 'gamma': 0.047563849756408545}
GridSearchCV Best cross-validation score: 0.967


## Saving and loading trained estimators

We will see in the next two batches that it is possible to save a trained estimator and load it later when we want to use it. Code below uses a Python library called *joblib* to achieve this. Don't worry, we will see this later, below code is just a first grasp.

In [8]:
# Save the best GridSearchCV estimator
joblib.dump(SVM_grid_search.best_estimator_, 'grid_search_best_pipeline.joblib')

# Save the best RandomizedSearchCV estimator
joblib.dump(SVM_rand_search.best_estimator_, 'random_search_best_pipeline.joblib')

['grid_search_best_pipeline.joblib']

In [10]:
# Load and evaluate the best GridSearchCV estimator on unseen data
loaded_grid_model = joblib.load('grid_search_best_pipeline.joblib')
grid_test_score = loaded_grid_model.score(X_test, y_test)
print(f"Loaded GridSearchCV estimator Test set score: {grid_test_score:.3f}")

# Load and evaluate the best RandomizedSearchCV estimator on unseen data
loaded_random_model = joblib.load('random_search_best_pipeline.joblib')
random_test_score = loaded_random_model.score(X_test, y_test)
print(f"Loaded RandomizedSearchCV estimator Test set score: {random_test_score:.3f}")

Loaded GridSearchCV Model Test set score: 0.967
