In [1]:
from pathlib import Path
from principal_components import load_training_data
import numpy as np
import cvxpy as cp
from sklearn.decomposition import PCA
from sklearn.svm import SVC, LinearSVC
from sklearn.pipeline import make_pipeline

my_dir = Path.home() / "data/Faces"
train_dir = "fa_H"
test_dir = "fb_H"

### Support Vector Classifier

$$
\begin{aligned}
\text{minimize} \quad & \| a \|_2 + \gamma (\mathbf 1^T u + \mathbf 1^T v) \\
\text{subject to} \quad & a^T x_i - b \geq 1 - u_i, \quad i = 1, \ldots, N\\
& a^T y_i - b \leq -(1 - v_i), \quad i = 1, \ldots, M\\
& u \succeq 0, \quad v \succeq 0.
\end{aligned}
$$

In [2]:
train_X, train_labs, im_shape = load_training_data(my_dir / "fa_H")

In [3]:
test_X, test_labs, _ = load_training_data(my_dir / test_dir)

In [4]:
class_ids, class_counts = np.unique(train_labs, return_counts=True)

In [7]:
# Intruder detection
# Remove 100 classes from training set. Identify 100 people as authorized, and the rest unauthorized. 
authorized = class_ids[:100]
removed = class_ids[-100:]

In [8]:
train_idx = ~np.in1d(train_labs, removed)
train_X_rm = train_X[train_idx]
train_labs_rm = train_labs[train_idx]
authorized_idx = np.in1d(train_labs_rm, authorized)

In [9]:
test_authorized_idx = np.in1d(test_labs, authorized)

In [10]:
clf = make_pipeline(
    PCA(n_components=100),
    # SVC(kernel="linear")
    LinearSVC()
)
clf.fit(train_X_rm, authorized_idx)
clf.score(test_X, test_authorized_idx)



0.6095317725752508

In [68]:
c = class_ids.shape[0]

In [71]:
def svm_solver(x, y, g=0.1):
    # Slack variables
    m1 = x.shape[0]
    m2 = y.shape[0]
    N = x.shape[1]

    u = cp.Variable(m1, nonneg=True)
    v = cp.Variable(m2, nonneg=True)


    a = cp.Variable(N)
    b = cp.Variable()

    gamma = cp.Parameter(nonneg=True)

    constraints = [
        a.T @ x.T - b >= (1 - u),
        a.T @ y.T - b <= -(1 - v),
        cp.norm2(a) <= 1
    ]

    obj = cp.Minimize(cp.norm2(a) + gamma * (cp.sum(u) + cp.sum(v)))
    # obj = cp.Maximize(t)

    prob = cp.Problem(obj, constraints)
    gamma.value = g

    prob.solve()
    return a.value, b.value

In [72]:
# store the weights as a list. Index corresponds to [i, i+j] for i = 0, ..., c-2, j = i+1, ..., c-1
weights = []

for i in range(c-1):
    print(i, "/", c)
    for j in range(i+1, c):
        mask_i = np.argwhere(train_labs == class_ids[i]).flatten()
        mask_j = np.argwhere(train_labs == class_ids[j]).flatten()

        x = train_X_pca[mask_i]
        y = train_X_pca[mask_j]

        params = svm_solver(x, y)
        weights.append(params)

0 / 867
1 / 867
2 / 867
3 / 867
4 / 867
5 / 867
6 / 867
7 / 867
8 / 867
9 / 867
10 / 867
11 / 867


KeyboardInterrupt: 

In [76]:
clf = SVC(kernel="linear")

In [79]:
clf.fit(train_X_pca, train_labs)

SVC(kernel='linear')

In [84]:
test_X_pca = my_pca.transform(test_X)

In [86]:
clf.score(test_X_pca, test_labs)

0.681438127090301