# Support Vector Machine with Polynomial and RBF Kernel

In [1]:
# increase the width of the notebook
from IPython.display import display, HTML, Markdown

display(HTML("<style>.container { width:90% !important; }</style>"))

In [2]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

## Separate features and target

In [3]:
# Load data
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

y_train = train["Score"]
y_test = test["Score"]

X_train = train.drop("Score", axis=1)
X_test = test.drop("Score", axis=1)

## Transformations

In [4]:
#Preprocessing pipelines
numeric_features = ["WhiteElo", "EloDif"]
categorical_features = ["Opening_name", "Time_format", "Increment_binary"]

numeric_transformer = Pipeline([
    ("scaler", StandardScaler())
])
categorical_transformer = Pipeline([
    ("onehot", OneHotEncoder(handle_unknown="ignore"))
])

preprocessor = ColumnTransformer([
    ("num", numeric_transformer, numeric_features),
    ("cat", categorical_transformer, categorical_features)
])

## GridSearch

pipe = Pipeline([
  ("preproc", preprocessor),
  ("svc", SVC())
])
param_grid = {
  "svc__kernel": ["rbf", "poly"],
  "svc__C": [0.1, 1, 10],
  "svc__gamma": ["scale", "auto"],
  "svc__degree": [2, 3, 4],
  "svc__class_weight": [None, "balanced"]
}
grid = GridSearchCV(pipe, param_grid, cv=5, scoring="accuracy", n_jobs=-1)
grid.fit(X_train, y_train)
print("Best params:", grid.best_params_)
print("Test set score:", grid.score(X_test, y_test))


In [5]:

X_train_t = preprocessor.fit_transform(X_train)
X_test_t  = preprocessor.transform(X_test)


## RBF Kernel

In [6]:
param_grid_rbf = param_grid_rbf = {
    "svc__C": [0.1, 1],
    "svc__class_weight": [None, "balanced"]
}

rbf_pipe = Pipeline([
    ("svc", SVC(kernel="rbf", random_state=42))
])

grid_rbf = GridSearchCV(
    rbf_pipe,
    param_grid=param_grid_rbf,
    cv=3,             # fewer folds → ~40% less work
    scoring="accuracy",
    n_jobs=1          # on a weak PC, 1 job is often faster & less memory‑hungry
)
grid_rbf.fit(X_train_t, y_train)

print("RBF best params:", grid_rbf.best_params_)
print("RBF test accuracy: {:.3f}".format(grid_rbf.score(X_test_t, y_test)))


RBF best params: {'svc__C': 0.1, 'svc__class_weight': None}
RBF test accuracy: 0.542


In [7]:
import joblib

best_svc_rbf = grid_rbf.best_estimator_
joblib.dump(best_svc_rbf, 'best_svc_rbf.joblib')

['best_svc_rbf.joblib']

## Polynomial Kernel

In [8]:
param_grid_poly = {
    "svc__C": [0.1, 1],
    "svc__degree": [2, 3],
    "svc__class_weight": [None, "balanced"]
}

poly_pipe = Pipeline([
    ("svc", SVC(kernel="poly", random_state=42))
])

grid_poly = GridSearchCV(
    poly_pipe,
    param_grid=param_grid_poly,
    cv=3,
    scoring="accuracy",
    n_jobs=-1
)
grid_poly.fit(X_train_t, y_train)

print("Poly best params:", grid_poly.best_params_)
print("Poly test accuracy: {:.3f}".format(grid_poly.score(X_test_t, y_test)))


Poly best params: {'svc__C': 0.1, 'svc__class_weight': None, 'svc__degree': 3}
Poly test accuracy: 0.534


In [9]:
import joblib
best_svc_poly = grid_poly.best_estimator_
joblib.dump(best_svc_poly, 'best_svc_poly.joblib')

['best_svc_poly.joblib']

### The accuracy achieved by both the Radial Basis Function (RBF) kernel and the Polynomial kernel SVM models is similar to that of our linear models, with scores of 0.542 and 0.534 respectively.