In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, accuracy_score, classification_report
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score

In [2]:
# Load the data
X_train = pd.read_csv('../Resources/Datasets/X_train.csv', index_col=0)
y_train = pd.read_csv('../Resources/Datasets/y_train.csv', index_col=0)
X_test = pd.read_csv('../Resources/Datasets/X_test.csv', index_col=0)
y_test = pd.read_csv('../Resources/Datasets/y_test.csv', index_col=0)

In [3]:
# parameter tuning for SVM model
# Create the parameter grid based on the results of random search
param_grid = {
    'C': [0.1, 1, 10, 100, 1000],
    'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
    'kernel': ['rbf']
}

# Create a base model
svc = SVC()

# Manually create the splits in CV in order to be able to fix a random_state (GridSearchCV doesn't have that argument)
from sklearn.model_selection import ShuffleSplit
cv_sets = ShuffleSplit(n_splits = 3, test_size = .33, random_state = 8)

# Instantiate the grid search model
grid_search = GridSearchCV(estimator=svc, 
                           param_grid=param_grid,
                           scoring=make_scorer(f1_score, average = 'micro'),
                           cv=cv_sets,
                           verbose=1)

# Fit the grid search to the data
grid_search.fit(X_train, y_train.values.ravel())

# Print the best parameters found
print(grid_search.best_params_)

Fitting 3 folds for each of 25 candidates, totalling 75 fits
