# SVM Model

In [2]:
import pandas as pd
import pickle
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV
import os
import numpy as np
import matplotlib.pyplot as plt

In [3]:
def format_best_svm_model(best_params):
    fixed_args = {
        "probability": True,
        "class_weight": "'balanced'",
        "random_state": 42
    }

    print("model_svm = SVC(")

    # Fixed arguments
    for k, v in fixed_args.items():
        print(f"    {k}={v},")

    # GridSearch-tuned parameters
    for k, v in best_params.items():
        if isinstance(v, str):
            print(f"    {k}='{v}',")
        else:
            print(f"    {k}={v},")
            
    print(")")

## Load Data

In [10]:
X_train = pd.read_csv('../Data/FINAL_SPLIT/Football-Training-2010_2025_ONEHOT_train.csv')
X_test  = pd.read_csv('../Data/FINAL_SPLIT/Football-Training-2010_2025_ONEHOT_test.csv')

y_train = pd.read_csv('../Data/FINAL_SPLIT/PL_dataset_2010-2025_train.csv')['MatchResult']
y_test  = pd.read_csv('../Data/FINAL_SPLIT/PL_dataset_2010-2025_test.csv')['MatchResult']

In [11]:
X_train = X_train.dropna()
y_train = y_train.loc[X_train.index]

X_test = X_test.dropna()
y_test = y_test.loc[X_test.index]

print(X_train.shape, X_test.shape)

(2687, 110) (933, 110)


In [12]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

## Train Model

In [13]:
# model_svm = SVC(
#     kernel="rbf",
#     C=0.1,
#     gamma="scale",
#     class_weight="balanced",
#     probability=True,
#     random_state=42
# )

model_svm = SVC(
    probability=True,
    class_weight='balanced',
    random_state=42,
    C=0.1,
    gamma='scale',
    kernel='linear',
)

In [14]:
_ = model_svm.fit(X_train_scaled, y_train)

In [15]:
model_svm.column_list = X_train.columns.tolist()

In [13]:
with open("saved_models_result/svm_model.pkl", "wb") as f:
    pickle.dump(model_svm, f)

## Test Model

In [16]:
y_pred = model_svm.predict(X_test_scaled)

## Using Gridsearch

In [12]:
param_grid = {
    "C": [0.01, 0.1, 0.05],
    "gamma": ["scale", 0.001, 0.01, 0.1],
    "kernel": ["rbf", "linear"]
}

grid_search = GridSearchCV(
    estimator=SVC(probability=True, class_weight="balanced", random_state=42),
    param_grid=param_grid,
    scoring="accuracy",
    cv=5,
    verbose=2,
    n_jobs=-1
)

grid_search.fit(X_train_scaled, y_train)
model_svm_best = grid_search.best_estimator_

Fitting 5 folds for each of 24 candidates, totalling 120 fits
[CV] END .................C=0.01, gamma=scale, kernel=linear; total time=   1.7s
[CV] END .................C=0.01, gamma=scale, kernel=linear; total time=   1.7s
[CV] END .................C=0.01, gamma=scale, kernel=linear; total time=   1.8s
[CV] END .................C=0.01, gamma=scale, kernel=linear; total time=   1.8s
[CV] END .................C=0.01, gamma=scale, kernel=linear; total time=   1.8s
[CV] END ....................C=0.01, gamma=scale, kernel=rbf; total time=   3.3s
[CV] END ....................C=0.01, gamma=scale, kernel=rbf; total time=   3.4s
[CV] END ....................C=0.01, gamma=scale, kernel=rbf; total time=   3.5s
[CV] END ....................C=0.01, gamma=scale, kernel=rbf; total time=   3.6s
[CV] END ....................C=0.01, gamma=0.001, kernel=rbf; total time=   3.4s
[CV] END ....................C=0.01, gamma=0.001, kernel=rbf; total time=   3.4s
[CV] END .................C=0.01, gamma=0.001, 

In [13]:
best_model = grid_search.best_estimator_
print("Best Parameters:", grid_search.best_params_)
format_best_svm_model(grid_search.best_params_)

Best Parameters: {'C': 0.05, 'gamma': 'scale', 'kernel': 'linear'}
model_svm = SVC(
    probability=True,
    class_weight='balanced',
    random_state=42,
    C=0.05,
    gamma='scale',
    kernel='linear',
)


In [14]:
y_pred_best = best_model.predict(X_test_scaled)
print("Accuracy: ", accuracy_score(y_test, y_pred_best))
print("Classification Report:\n", classification_report(y_test, y_pred_best))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred_best))

# Check for overfitting
train_acc = best_model.score(X_train_scaled, y_train)
test_acc = best_model.score(X_test_scaled, y_test)

print(f"\nTraining Accuracy: {train_acc:.4f}")
print(f"Testing Accuracy:  {test_acc:.4f}")

Accuracy:  0.6028368794326241
Classification Report:
               precision    recall  f1-score   support

           0       0.74      0.67      0.70       316
           1       0.34      0.44      0.39       163
           2       0.68      0.63      0.65       226

    accuracy                           0.60       705
   macro avg       0.59      0.58      0.58       705
weighted avg       0.63      0.60      0.61       705

Confusion Matrix:
 [[211  84  21]
 [ 46  72  45]
 [ 29  55 142]]

Training Accuracy: 0.6395
Testing Accuracy:  0.6028
