<a href="https://colab.research.google.com/github/Sujal-Patnaik/SVM-Classifier-from-Scratch/blob/main/SVM(Classifier)_from_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [19]:
import numpy as np

class SVM:
    def __init__(self, kernel='linear', C=1.0, tol=1e-3, max_passes=5, max_iter=1000, degree=3, gamma=None):
        self.C = C
        self.tol = tol
        self.max_passes = max_passes
        self.max_iter = max_iter
        self.kernel_type = kernel
        self.degree = degree
        self.gamma = gamma
        self.b = 0.0

    def kernel(self, x, y):
        if self.kernel_type == 'linear':
            return x @ y.T
        elif self.kernel_type == 'polynomial':
            return (x @ y.T + 1) ** self.degree
        elif self.kernel_type == 'rbf':
            if self.gamma is None:
                self.gamma = 1 / x.shape[1]
            x_norm = np.sum(x**2, axis=-1).reshape(-1, 1)
            y_norm = np.sum(y**2, axis=-1).reshape(1, -1)
            return np.exp(-self.gamma * (x_norm + y_norm - 2 * x @ y.T))
        else:
            raise ValueError("Unknown kernel")

    def fit(self, X, y):
        n_samples, n_features = X.shape
        y = y.astype(np.float64)
        y[y == 0] = -1  # convert labels to {-1, 1}

        self.X = X
        self.y = y
        self.alphas = np.zeros(n_samples)  #initialize the alphas as a 0 vector
        self.b = 0.0 #initialize b to 0
        self.K = self.kernel(X, X)

        passes = 0
        iters = 0

        while passes < self.max_passes and iters < self.max_iter:
            num_changed_alphas = 0
            for i in range(n_samples):
                E_i = self._decision_function_index(i) - y[i] #Error in the ith sample

                #KKT condition checking
                cond1 = (y[i] * E_i < -self.tol) and (self.alphas[i] < self.C)
                cond2 = (y[i] * E_i > self.tol) and (self.alphas[i] > 0)

                #The i is chose for which alpha[i] violates KKT
                if cond1 or cond2:
                    non_zero_alphas = np.where((self.alphas > 0) & (self.alphas < self.C))[0]
                    if len(non_zero_alphas) > 1:
                        E_list = self._decision_function_index(non_zero_alphas) - y[non_zero_alphas]
                        j = non_zero_alphas[np.argmax(np.abs(E_list - E_i))]
                    else:
                        j = np.random.randint(0, n_samples)
                        while j == i:
                            j = np.random.randint(0, n_samples)

                    #j is chosen to maximize |E_i - E_j| if possible (i.e., from non-bound alphas).
                    #Otherwise, it is randomly selected.

                    E_j = self._decision_function_index(j) - y[j]

                    alpha_i_old, alpha_j_old = self.alphas[i], self.alphas[j]

                    #Finding the Lower and Upper Bounds to clip the alpha[j]
                    if y[i] != y[j]:
                        L = max(0, alpha_j_old - alpha_i_old)
                        H = min(self.C, self.C + alpha_j_old - alpha_i_old)
                    else:
                        L = max(0, alpha_i_old + alpha_j_old - self.C)
                        H = min(self.C, alpha_i_old + alpha_j_old)

                    if L == H:
                        continue

                    eta = 2.0 * self.K[i, j] - self.K[i, i] - self.K[j, j]
                    if eta >= 0:
                        continue  #This ensures that we update the pair (i,j) only if the curvature of the objective function is suitable for descent

                    self.alphas[j] -= y[j] * (E_i - E_j) / eta #updating the alpha[j]
                    self.alphas[j] = np.clip(self.alphas[j], L, H)

                    if abs(self.alphas[j] - alpha_j_old) < 1e-5:
                        continue

                    self.alphas[i] += y[i] * y[j] * (alpha_j_old - self.alphas[j]) #updating the alpha[i] after alpha[j] is found out

                    b1 = self.b - E_i - y[i] * (self.alphas[i] - alpha_i_old) * self.K[i, i] - y[j] * (self.alphas[j] - alpha_j_old) * self.K[i, j]
                    b2 = self.b - E_j - y[i] * (self.alphas[i] - alpha_i_old) * self.K[i, j] - y[j] * (self.alphas[j] - alpha_j_old) * self.K[j, j]

                    if 0 < self.alphas[i] < self.C:
                        self.b = b1    #This means alpha[i] is a support vector
                    elif 0 < self.alphas[j] < self.C:
                        self.b = b2    #This means alpha[j] is a support vector
                    else:
                        self.b = (b1 + b2) / 2  #Since neither alpha[i] nor alpha[j] are support vectors, b1 and b2 are averaged to get b

                    num_changed_alphas += 1

            if num_changed_alphas == 0:
                passes += 1
            else:
                passes = 0

            iters += 1

        self.support_ = np.where(self.alphas > 1e-5)[0]
        self.alpha_sv = self.alphas[self.support_]
        self.X_sv = X[self.support_]
        self.y_sv = y[self.support_]
        print(f"Training complete (SMO). Support vectors found: {len(self.support_)}")

    def _decision_function_index(self, i):
        return np.sum(self.alphas * self.y * self.K[i]) + self.b

    def _kernel_matrix(self, X1, X2):
        return self.kernel(X1, X2)

    def project(self, X):
        K = self._kernel_matrix(X, self.X)
        return np.dot((self.alphas * self.y), K.T) + self.b

    def predict(self, X):
        return np.sign(self.project(X))

    def fit_gd(self, X, y, lr=0.001, epochs=1000):
        n_samples, n_features = X.shape
        y = y.astype(np.float64)
        y[y == 0] = -1  # convert labels to {-1, 1}

        self.w = np.zeros(n_features)
        self.b = 0.0

        for epoch in range(epochs):
            margin = y * (X @ self.w + self.b)
            mask = margin < 1  # indices where hinge loss is active

            dw = self.w - self.C * (X[mask].T @ y[mask])  #gradient wrt to w
            db = -self.C * np.sum(y[mask]) #gradient wrt to b

            self.w -= lr * dw #update rule for weight
            self.b -= lr * db #update rule for b


        print("Training complete (vectorized gradient descent).")
        self.support_ = np.where(y * (X @ self.w + self.b) < 1)[0]
        self.X_sv = X[self.support_]
        self.y_sv = y[self.support_]

    def predict_gd(self, X):
        if not hasattr(self, 'w'):
            raise AttributeError("Model has not been trained using gradient descent.")
        return np.sign(X @ self.w + self.b)

    def project_gd(self, X):
        return X @ self.w + self.b



In [20]:
from sklearn.datasets import load_digits
digits = load_digits()
mask = (digits.target == 1) | (digits.target == 7)
X = digits.data[mask, :10]  # First 10 features
y = digits.target[mask]
y = np.where(y == 1, 1, -1)  # 1 and -1


In [21]:

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# scratch built SVM
custom_svm = SVM(kernel='linear', C=1.0)
custom_svm.fit(X_train, y_train)
y_pred_custom = custom_svm.predict(X_test)

# Scikit-learn's SVM
from sklearn.svm import SVC
sklearn_svm = SVC(kernel='linear', C=1.0)
sklearn_svm.fit(X_train, y_train)
y_pred_sklearn = sklearn_svm.predict(X_test)


Training complete (SMO). Support vectors found: 70


In [22]:
from sklearn.metrics import accuracy_score, classification_report

print("Custom SVM Accuracy:", accuracy_score(y_test, y_pred_custom))
print("Sklearn SVM Accuracy:", accuracy_score(y_test, y_pred_sklearn))

print("\nCustom SVM Report:\n", classification_report(y_test, y_pred_custom))
print("\nSklearn SVM Report:\n", classification_report(y_test, y_pred_sklearn))


Custom SVM Accuracy: 0.8807339449541285
Sklearn SVM Accuracy: 0.8715596330275229

Custom SVM Report:
               precision    recall  f1-score   support

          -1       0.86      0.88      0.87        50
           1       0.90      0.88      0.89        59

    accuracy                           0.88       109
   macro avg       0.88      0.88      0.88       109
weighted avg       0.88      0.88      0.88       109


Sklearn SVM Report:
               precision    recall  f1-score   support

          -1       0.85      0.88      0.86        50
           1       0.89      0.86      0.88        59

    accuracy                           0.87       109
   macro avg       0.87      0.87      0.87       109
weighted avg       0.87      0.87      0.87       109



In [23]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


X, y = load_breast_cancer(return_X_y=True)

y = np.where(y == 0, -1, 1)


scaler = StandardScaler()
X = scaler.fit_transform(X)


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

#Training the sklearn SVM
clf = SVC(C=1.0, kernel='linear')
clf.fit(X_train, y_train)
y_pred_sklearn = clf.predict(X_test)

acc_sklearn = accuracy_score(y_test, y_pred_sklearn)
print(f"Sklearn SVM Accuracy: {acc_sklearn:.4f}")

#Training the SVM built from scratch
my_svm = SVM(C=1.0)
my_svm.fit(X_train, y_train)
y_pred_custom = my_svm.predict(X_test)

acc_custom = accuracy_score(y_test, y_pred_custom)
print(f"Custom SVM Accuracy: {acc_custom:.4f}")


Sklearn SVM Accuracy: 0.9561
Training complete (SMO). Support vectors found: 36
Custom SVM Accuracy: 0.9561


In [24]:

from sklearn.datasets import load_iris
data = load_iris()
X = data.data
y = data.target  # classes: 0, 1, 2


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# ========== Your SVM with Manual One-vs-Rest ==========
classes = np.unique(y_train)
custom_svms = []

# Train one binary SVM per class
for cls in classes:
    y_binary = np.where(y_train == cls, 1, -1)
    clf = SVM(kernel='linear', C=1.0)
    clf.fit(X_train, y_binary)
    custom_svms.append(clf)

# Predict using decision function from all classifiers
custom_preds = []
for x in X_test:
    scores = [clf.project(x.reshape(1, -1))[0] for clf in custom_svms]
    pred = np.argmax(scores)
    custom_preds.append(pred)

custom_acc = accuracy_score(y_test, custom_preds)
print(f"\nCustom SVM (OvR) Accuracy: {custom_acc:.4f}")

# ========== Sklearn SVC ==========
sklearn_clf = SVC(kernel='linear')
sklearn_clf.fit(X_train, y_train)
sklearn_preds = sklearn_clf.predict(X_test)
sklearn_acc = accuracy_score(y_test, sklearn_preds)
print(f"Sklearn SVC Accuracy: {sklearn_acc:.4f}")



Training complete (SMO). Support vectors found: 3
Training complete (SMO). Support vectors found: 69
Training complete (SMO). Support vectors found: 20

Custom SVM (OvR) Accuracy: 0.9556
Sklearn SVC Accuracy: 1.0000


The above cell is an example of multiclass classificaction using binary classifier.

In [25]:
from sklearn.datasets import load_digits
digits = load_digits()
X = digits.data
y = digits.target  # Classes: 0 to 9


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# SVM built from scratch (Manual One-vs-Rest)
classes = np.unique(y_train)
custom_svms = []

for cls in classes:
    y_binary = np.where(y_train == cls, 1, -1)
    clf = SVM(kernel='linear', C=1.0)
    clf.fit_gd(X_train, y_binary)
    custom_svms.append(clf)

custom_preds = []
for x in X_test:
    scores = [clf.project_gd(x.reshape(1, -1))[0] for clf in custom_svms]
    pred = np.argmax(scores)
    custom_preds.append(pred)

custom_acc = accuracy_score(y_test, custom_preds)
print(f"\nCustom SVM (OvR) Accuracy on Digits: {custom_acc:.4f}")

# ========== Scikit-learn SVM ==========
sklearn_clf = SVC(kernel='linear')
sklearn_clf.fit(X_train, y_train)
sklearn_preds = sklearn_clf.predict(X_test)
sklearn_acc = accuracy_score(y_test, sklearn_preds)
print(f"Sklearn SVC Accuracy on Digits: {sklearn_acc:.4f}")

Training complete (vectorized gradient descent).
Training complete (vectorized gradient descent).
Training complete (vectorized gradient descent).
Training complete (vectorized gradient descent).
Training complete (vectorized gradient descent).
Training complete (vectorized gradient descent).
Training complete (vectorized gradient descent).
Training complete (vectorized gradient descent).
Training complete (vectorized gradient descent).
Training complete (vectorized gradient descent).

Custom SVM (OvR) Accuracy on Digits: 0.9370
Sklearn SVC Accuracy on Digits: 0.9796


For the above dataset I have used the gradient descent method to fit my model rather than the standard SMO algorithm, because gradient descent would work much faster here and the Kernel is linear with which gradient descent algorithm is highly compatible.

In [33]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [36]:
import zipfile
with zipfile.ZipFile("/content/titanic.zip", 'r') as zip_ref:
    zip_ref.extractall('/content/titanic')

Please download the titanic dataset from Kaggle and upload the entire the zip file for this to work.

In [37]:
import pandas as pd
df = pd.read_csv('/content/titanic/train.csv')
display(df.head())

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [38]:
#--- Preprocessing ---#


df = df[['Survived', 'Pclass', 'Sex', 'Age', 'Fare']]  # simple features
df.dropna(inplace=True)


df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})

X = df[['Pclass', 'Sex', 'Age', 'Fare']].values
y = df['Survived'].values


y_custom = np.where(y == 1, 1, -1) # convert the data to 1 and -1


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
y_train_custom = np.where(y_train == 1, 1, -1)
y_test_custom = np.where(y_test == 1, 1, -1)


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

#--- Model Training of my SVM from scratch
custom_svm = SVM(kernel='rbf', C=1.0, gamma=0.5)
custom_svm.fit(X_train, y_train_custom)
y_pred_custom = custom_svm.predict(X_test)
y_pred_custom_converted = np.where(y_pred_custom == 1, 1, 0)  # convert back to 0/1

#--- Model Training of sklearn SVM
sk_svm = SVC(kernel='rbf', C=1.0, gamma=0.5)
sk_svm.fit(X_train, y_train)
y_pred_sklearn = sk_svm.predict(X_test)

#---- Accuracy comparision
acc_custom = accuracy_score(y_test, y_pred_custom_converted)
acc_sklearn = accuracy_score(y_test, y_pred_sklearn)

print(f"Custom SVM Accuracy: {acc_custom:.4f}")
print(f"Sklearn SVM Accuracy: {acc_sklearn:.4f}")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.dropna(inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Sex'] = df['Sex'].map({'male': 0, 'female': 1})


Training complete (SMO). Support vectors found: 258
Custom SVM Accuracy: 0.7413
Sklearn SVM Accuracy: 0.7413
