In [2]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

In [4]:
from sklearn import datasets
iris = datasets.load_iris()
data = iris.data

In [5]:
X, y = datasets.load_iris(return_X_y=True)

# Split the dataset

In [9]:
# numbers from 1 to 150 randomly shuffled
random_inidices = np.random.choice(150,size=150, replace=False)

In [10]:
# assign the first 105 random indicies for training and the rest for testing
training_inices, test_indices = random_inidices[:105], random_inidices[105:]

In [12]:
# get the training data from the randomly picked indicies
training_data = np.zeros((105,4))
for num, index in enumerate(training_inices):
    training_data[num,:] = data[index]

In [13]:
# get the training targets from the randomly picked indicies
training_target = np.zeros((105))
for num, index in enumerate(training_inices):
    training_target[num] = iris.target[index]

In [19]:
# get the test data from the randomly picked inidicies
test_data = np.zeros((45,4))
for num, index in enumerate(test_indices):
    test_data[num,:] = data[index]

In [21]:
# get the test targets from the randomly picked inidicies
test_target = np.zeros(45)
for num, index in enumerate(test_indices):
    test_target[num] = iris.target[index]

# 5-fold cross-validation

In [14]:
param_grid = {'C': [0.001, 0.01, 0.1, 1, 10], 'gamma': [0.01, 0.1, 1, 10], 'kernel': ['rbf']}

In [18]:
accuracy_matrix = np.zeros((len(param_grid["C"]),len(param_grid["gamma"])))

# iterate over all C-gamma combinations and all folds
for c_index, c in enumerate(param_grid["C"]):  
    for g_index, gamma in enumerate(param_grid["gamma"]):
        
        # array for the accuracies in fold i
        acc_folds = np.zeros(5)
        
        # split the data into 5 subarrays
        fold_data_arrays = np.split(training_data, 5)
        fold_target_arrays = np.split(training_target, 5)
        
        for fold_i in range(5):
            
            #copy folds array to make no changes at the original list of arrays
            fold_i_data = fold_data_arrays.copy()
            fold_i_target = fold_target_arrays.copy()
            
            #test data for the i-th fold
            fold_i_test_d = fold_i_data.pop(fold_i)
            fold_i_test_t = fold_i_target.pop(fold_i)
            
            # training data for the i-th fold
            fold_i_training_d = np.concatenate(fold_i_data)
            fold_i_training_t = np.concatenate(fold_i_target)
            
            # calculate the pobability
            acc = SVC(C=c, gamma=gamma)
            acc.fit(fold_i_training_d, fold_i_training_t)
            acc_folds[fold_i] = acc.score(fold_i_test_d, fold_i_test_t)
        
        # calculate the average accuracy
        accuracy_matrix[c_index, g_index] = np.mean(acc_folds)

# best values for C and gamma
best_C = param_grid["C"][np.argmax(accuracy_matrix)//4]
best_gamma = param_grid["gamma"][np.argmax(accuracy_matrix)%4]

print(f"The accuracy of the C-gamma combinations is shown in the following matrix (C,gamma):\n\n\
{accuracy_matrix}")
print()
print(f"The highest accuracy is {np.amax(accuracy_matrix)}. \n\
It was achieved with (C,gamma) = {(best_C,best_gamma)}")

The accuracy of the C-gamma combinations is shown in the following matrix (C,gamma):

[[0.22857143 0.22857143 0.22857143 0.22857143]
 [0.22857143 0.22857143 0.22857143 0.22857143]
 [0.37142857 0.80952381 0.93333333 0.24761905]
 [0.8952381  0.95238095 0.95238095 0.92380952]
 [0.96190476 0.98095238 0.94285714 0.92380952]]

The highest accuracy is 0.980952380952381. 
It was achieved with (C,gamma) = (10, 0.1)


# train the whole trainig data with the optimal parameters from cross-validation

In [26]:
final_model = SVC(C=best_C,gamma=best_gamma, kernel='rbf')

In [27]:
final_model.fit(training_data, training_target)

SVC(C=10, gamma=0.1)

In [28]:
final_model.score(test_data, test_target)

0.9777777777777777