SVM classification has two main types.
1. One Versus One Classification
2. One Versus All Classification

in one versus one classification

In [4]:
import pandas as pd
import numpy as np
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression, LogisticRegression
from sklearn.compose import make_column_transformer, make_column_selector
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, MinMaxScaler, StandardScaler
from sklearn.model_selection import train_test_split, StratifiedKFold, GridSearchCV, KFold
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, confusion_matrix, log_loss, roc_auc_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.decomposition import PCA
from sklearn.svm import SVC

import warnings
warnings.filterwarnings('ignore')

In [10]:
df = pd.read_csv("Satellite.csv", sep = ";")
df

Unnamed: 0,x.1,x.2,x.3,x.4,x.5,x.6,x.7,x.8,x.9,x.10,...,x.28,x.29,x.30,x.31,x.32,x.33,x.34,x.35,x.36,classes
0,92,115,120,94,84,102,106,79,84,102,...,104,88,121,128,100,84,107,113,87,grey soil
1,84,102,106,79,84,102,102,83,80,102,...,100,84,107,113,87,84,99,104,79,grey soil
2,84,102,102,83,80,102,102,79,84,94,...,87,84,99,104,79,84,99,104,79,grey soil
3,80,102,102,79,84,94,102,79,80,94,...,79,84,99,104,79,84,103,104,79,grey soil
4,84,94,102,79,80,94,98,76,80,102,...,79,84,103,104,79,79,107,109,87,grey soil
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6430,60,83,96,85,64,87,100,88,64,83,...,92,66,87,108,89,63,83,104,85,red soil
6431,64,79,100,85,56,71,96,85,56,68,...,85,66,83,100,85,63,83,100,81,red soil
6432,56,68,91,81,56,64,91,81,53,64,...,81,59,87,96,81,63,83,92,74,vegetation stubble
6433,56,68,87,74,60,71,91,81,60,64,...,74,59,83,92,74,59,83,92,70,vegetation stubble


In [12]:
x, y = df.drop("classes", axis = 'columns'), df['classes']

In [14]:
x_train, x_test, y_train, y_test = train_test_split(x, y,
                                                    random_state = 24,
                                                    test_size = 0.3,
                                                    stratify = y)


In [40]:
# ______________________________________________________________________________________
# Pipeline

# one hot encoder
ohe = OneHotEncoder(
    handle_unknown="ignore", 
    sparse_output=False, 
    drop='first'
).set_output(transform='pandas')

trans_ohe = make_column_transformer(
    ('passthrough', make_column_selector(dtype_exclude=object)),
    (ohe, make_column_selector(dtype_include=object)),
    verbose_feature_names_out=False
).set_output(transform='pandas')


# scaler
scl_std = StandardScaler().set_output(transform = "pandas")
scl_mm = MinMaxScaler().set_output(transform = "pandas")


# model
svm = SVC(kernel = "linear",
         random_state = 24,
         probability = True)

pipe_pca = Pipeline([("SCL", scl_std), ("SVM", svm)])


# _____________________________________________________________________________________
# GCV

params = {
    'SVM__C' : np.linspace(0.001, 5, 5),
    'SVM__decision_function_shape' : ["ovo", 'ovr'],
}

kfolds = StratifiedKFold(n_splits = 5, 
                        random_state = 24, 
                        shuffle = True)

gcv = GridSearchCV(pipe_pca,
                  param_grid = params,
                  scoring = "neg_log_loss",
                  cv = kfolds,
                  verbose = 3)

gcv.fit(x, y)
# gcv.fit(x, y)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-0.400 total time=   3.5s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-0.392 total time=   3.3s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-0.418 total time=   3.4s
[CV 4/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-0.392 total time=   3.2s
[CV 5/5] END SVM__C=0.001, SVM__decision_function_shape=ovo;, score=-0.379 total time=   3.2s
[CV 1/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-0.400 total time=   3.2s
[CV 2/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-0.392 total time=   3.2s
[CV 3/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-0.418 total time=   3.3s
[CV 4/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-0.392 total time=   3.5s
[CV 5/5] END SVM__C=0.001, SVM__decision_function_shape=ovr;, score=-0.379 total time=   3.5s

In [41]:
# y_pred = gcv.predict(x_test)
# y_pred_prob_ovo = gcv.predict_proba(x_test)
y_pred_prob = gcv.predict_proba(x_test)

print(f"Best Score : {gcv.best_score_}")
print(f"Best Params : {gcv.best_params_}\n")

# print(f"Accuracy Score : {accuracy_score(y_test, y_pred)}")
# print(f"ROC_AUC Score : {roc_auc_score(y_test, y_pred_prob)}")
print(f"Log loss Score : {log_loss(y_test, y_pred_prob)}")

Best Score : -0.33696460799422745
Best Params : {'SVM__C': 1.2507499999999998, 'SVM__decision_function_shape': 'ovo'}

Log loss Score : 0.31582080303747834
