In [1]:
import numpy as np
import os
import cv2
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error

In [2]:
def load_PCA_vectors(path, size=(100,100)):
    X, y = [], []
    for person in os.listdir(path):
        folder = os.path.join(path, person)
        for file in os.listdir(folder):
            X.append(np.load(os.path.join(folder,file)))
            y.append(person)
    return np.array(X), np.array(y)

In [3]:
X,y = load_PCA_vectors('Datasets/PCA_vectors')

In [18]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.2,stratify=y,random_state=42)

In [5]:
class SVM:
    def __init__(self, learning_rate = 0.001, lambda_param = 0.01,n_iters = 1000):
        self.lr = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = 1000
        self.w = None
        self.b = None
    
    def fit(self,X,y):
        n_samples,n_features = X.shape
        y_ = np.where(y<=0,-1,1)
        self.w = np.zeros(n_features)
        self.b = 0
        for _ in range(self.n_iters):
            for idx,x_i in enumerate(X):
                condition = y_[idx] * (np.dot(x_i,self.w) + self.b) >= 1
                if condition:
                    dw = 2 * self.lambda_param * self.w
                    db = 0
                else:
                    dw = 2 * self.lambda_param * self.w - np.dot(x_i,y_[idx])
                    db = -y_[idx]
                self.w -= self.lr * dw
                self.b -= self.lr * db
    def predict(self,X):
        linear_output = np.dot(X,self.w) + self.b
        return np.sign(linear_output)


In [6]:
class MultiClassSVM:
    def __init__ (self,learning_rate=0.001, lambda_param = 0.01, n_iters = 1000):
        self.lr = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.classes_ = None
        self.models = {}
    
    def fit (self,X,y):
        self.classes_ = np.unique(y)
        for c in self.classes_:
            print(f"Training class {c}")
            y_binary = np.where(y == c,1,-1)
            clf = SVM(self.lr,self.lambda_param,self.n_iters)
            clf.fit(X,y_binary)
            self.models[c] = clf
    def predict(self,X):
        scores = []
        for c in self.classes_:
            model = self.models[c]
            score = np.dot(X,model.w) + model.b
            scores.append(score)
        scores = np.stack(scores, axis = 1)
        preds = np.argmax(scores, axis = 1)
        return self.classes_[preds]

In [21]:
clf = MultiClassSVM(learning_rate=0.001,lambda_param=0.001,n_iters = 100000)
clf.fit(X_train,y_train)

Training class s1
Training class s10
Training class s11
Training class s12
Training class s13
Training class s14
Training class s15
Training class s16
Training class s17
Training class s18
Training class s19
Training class s2
Training class s20
Training class s21
Training class s22
Training class s23
Training class s24
Training class s25
Training class s26
Training class s27
Training class s28
Training class s29
Training class s3
Training class s30
Training class s31
Training class s32
Training class s33
Training class s34
Training class s35
Training class s36
Training class s37
Training class s38
Training class s39
Training class s4
Training class s40
Training class s5
Training class s6
Training class s7
Training class s8
Training class s9


In [22]:
y_pred = clf.predict(X_test)
acc = np.mean(y_pred == y_test)
print(f'Độ chính xác trên tập test {acc}')

Độ chính xác trên tập test 0.95
