In [9]:
import torch
import pandas as pd
import torch.nn as nn
import numpy as np

In [None]:
path = 'datasets/'
data = pd.read_csv(path + 'iris.csv')[:100] # Binary classification
data.shape # 150, 5
data.keys() # 'sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'
features = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
label = 'species'
train_idx = list(range(0, 40)) + list(range(50, 90)) #+ list(range(100, 140))
val_idx = list(range(40, 50)) + list(range(90, 100)) #+ list(range(140, 150))
type(data['sepal_length'][val_idx])

# from text labels to id labels
label_idx = {}
idx = 0
for i in range(data.shape[0]):
    label_name = data[label][i]
    if label_name not in label_idx:
        label_idx[label_name] = idx
        idx += 1
print(label_idx)

{'setosa': 0, 'versicolor': 1}


In [8]:
X_train = torch.zeros((len(train_idx), len(features)))
y_train = torch.zeros(len(train_idx))

X_val = torch.zeros((len(val_idx), len(features)))
y_val = torch.zeros(len(val_idx))

for i, k in enumerate(features):
    X_train[:, i] = torch.tensor(data[k][train_idx].values, dtype=torch.float32)
    X_val[:, i] = torch.tensor(data[k][val_idx].values, dtype=torch.float32)

for i, j in enumerate(train_idx):
    idx = label_idx[ data[label][j] ]
    y_train[i] = torch.tensor(idx)

for i, j in enumerate(val_idx):
    idx = label_idx[ data[label][j] ]
    y_val[i] = torch.tensor(idx)

print("Training:", X_train.shape, y_train.shape)
print("Evaluation:", X_val.shape, y_val.shape)

Training: torch.Size([80, 4]) torch.Size([80])
Evaluation: torch.Size([20, 4]) torch.Size([20])


In [31]:
class SupportVectorMachine(nn.Module):
    def __init__(self, C:float, X_train:torch.Tensor, y_train:torch.Tensor):
        super(SupportVectorMachine, self).__init__()
        assert len(X_train.shape) == 2
        self.n = X_train.shape[0]
        self.x_dim = X_train.shape[1]
        assert self.n > 10*self.x_dim # 10x more datapoints than datadim
        assert len(y_train.shape) == 1
        assert X_train.shape[0] == y_train.shape[0]
        self.C = C # regularization parameter
        # normalize
        self.X_mean = X_train.mean(dim=0)
        self.X_std = X_train.std(dim=0)
        self.nor_X = (X_train - self.X_mean)/self.X_std
        self.y_train = y_train
        self.net = nn.Linear(X_train.shape[1], 1)
        self.optim = torch.optim.SGD(self.net.parameters(), lr=0.01)

    def forward(self, x):
        return self.net(x)

    def hinge_loss(self, y_true:torch.Tensor, y_pred:torch.Tensor):
        assert y_pred.shape == y_true.shape, f"y_pred {y_pred.shape}    y_true {y_true.shape}" # must be 1 dim
        return (1 - y_true * y_pred).clamp(min=0).mean(dim=0)


    def train(self, num_epochs:int = 1000, batch_size:int = 16):
        for epoch in range(num_epochs):
            idx = np.random.randint(low=0, high=self.n, size=batch_size)
            x = self.nor_X[idx]
            # print(idx, x.shape)
            y_true = self.y_train[idx]
            y_pred = self.forward(x)[:, 0]
            loss = self.hinge_loss(y_true=y_true, y_pred=y_pred)
            loss.backward()
            self.optim.step()
            self.optim.zero_grad()
            if epoch % 100 == 0:
                print(f"Epoch {epoch}:  loss: {loss.detach().item():.4f}")

    def predict(self, X):
        # normalize
        nor_X = (X - self.X_mean)/self.X_std
        with torch.no_grad():
            y_pred = self.forward(nor_X)
        y_pred = [torch.tensor([0]) if y < 0 else torch.tensor([1]) for y in y_pred]
        y_pred = torch.vstack(y_pred)
        return y_pred

svm = SupportVectorMachine(C=1.0, X_train=X_train, y_train=y_train)        
print(svm)

SupportVectorMachine(
  (net): Linear(in_features=4, out_features=1, bias=True)
)


In [32]:
svm.train()

Epoch 0:  loss: 1.5390
Epoch 100:  loss: 0.6176
Epoch 200:  loss: 0.2606
Epoch 300:  loss: 0.4375
Epoch 400:  loss: 0.6250
Epoch 500:  loss: 0.2500
Epoch 600:  loss: 0.3750
Epoch 700:  loss: 0.3750
Epoch 800:  loss: 0.5625
Epoch 900:  loss: 0.6250


In [41]:
# Accuracy on the training set

y_pred = svm.predict(X_train)
print(y_pred.shape, y_train.shape)
accuracy = torch.mean((y_pred[:,0] == y_train).float()).item()
print(accuracy)

torch.Size([80, 1]) torch.Size([80])
1.0


In [42]:
# Accuracy on the validation set

y_pred = svm.predict(X_val)
accuracy = torch.mean((y_pred[:,0] == y_val).float()).item()
print(accuracy)
# print(y_pred[:,0], y_val)

0.949999988079071
