In [1]:
import torch
from tqdm import trange

import utils as ut
import evaluate as eva

In [7]:
class SVM():
    def __init__(self, kwargs):
        self.model_name = 'SVM'
        self.W = torch.randn(kwargs['feature_dim']+1, kwargs['class_num'])
        self.N = kwargs['class_num']
        self.init_weight()
        
    def init_weight(self):
        torch.nn.init.xavier_normal_(self.W)
        
    def train(self, X, Y, alpha=0.01, reg=2.5e4, vec=True):
        """
        功能: 完成训练过程，包括(1)求解损失, 计算梯度. (2) 正则化，计算梯度，(3)更新参数 
        输入:
            X(Tensor):(N, K:3*32*32+1)
            Y(Tensor):(N)
            alpha(float):                   # 学习率
            reg(float):                     # 正则化系数
        输出:
            L(int):(1)                      # 损失，范围给主程序以打印显示        
        """
        
        # 计算梯度与正则化
        if vec:
            L, dW = self.cal_dw_with_vec(X, Y, reg)
        else:
            L, dW = self.cal_dw_with_loop(X, Y, reg)
            
        # 更新参数
        self.W -= alpha * dW
        return L
    
    def cal_dw_with_loop(self, X, Y, reg):
        """
        功能： 计算损失和梯度
        输入:
            X(Tensor):(N, K:3*32*32+1)
            Y(Tensor):(N)
            reg(float):                    # 正则化系数
        输出:
            L(int): 1                      # 损失               
            dW(Tensor):(K+1,C)             # 参数梯度       
        """
        L = 0.0
        N = X.size(0)
        F, C = self.W.size()
        dW = torch.zeros(F, C)
        
        # (1) 求解损失
        for idx, Xi in enumerate(X):
            yi = Y[idx]
            scores = Xi.matmul(self.W)
            syi = scores[yi]
            for j in range(self.N):
                if j == yi:
                    continue
                sj = scores[j]
                if syi - sj - 1 < 0:
                    L += (sj - syi + 1).item()
                    dW[:,j] += Xi.t()
                    dW[:,yi] -= Xi.t()
        
        # (2) 正则化
        L = L / N +  0.5*reg*torch.sum(torch.pow(self.W, 2)).item()
        dW = dW / N +  reg*self.W
        
        return L, dW
    
    def cal_dw_with_vec(self, X, Y, reg):
        """
        功能： 计算损失和梯度
        输入:
            X(Tensor):(N, K:3*32*32+1)
            Y(Tensor):(N)
            reg(float):                    # 正则化系数
        输出:
            L(int): 1                      # 损失               
            dW(Tensor):(K+1,C)             # 参数梯度       
        """
        
        N = X.size(0)
        F, C = self.W.size()
        
        score = X.matmul(self.W)                                       # (N, C)
        correct = score[range(N), Y.tolist()].unsqueeze(1)             # (N, 1)
        score = torch.relu(score-correct+1)                            # (N, C)
        score[range(N), Y.tolist()] = 0
        
        L = torch.sum(score).item()
        L = L / N +  0.5*reg*torch.sum(torch.pow(self.W, 2)).item()
        
        
        dW = torch.zeros(F, C)
        mask = torch.zeros(N, C)
        mask[score>0] = 1                                              # (N,C)
        mask[range(N), Y.tolist()] = -torch.sum(mask, 1)               # (N,C)
        dW = X.t().matmul(mask)                                        # (F,C)

        dW = dW / N + reg*self.W
        return L, dW
        
    def predict(self, X):
        """
        功能: 预测输入数据标签
        输入:
            X(Tensor): (N, 3*32*32)
        输出:
            labels(Tensor): (N)
        """
        S = X.matmul(self.W)  # (N, C)
        return torch.max(S, 1)[1]
        
        

In [3]:
train_set, test_set = ut.data_load('./data')

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


In [4]:
batch_num = 200
opt = {"feature_dim":3*32*32, "class_num":10}
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_num, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_num, shuffle=False, num_workers=4)

In [5]:
def train(alpha, reg, epoches):
    """
    功能：完成训练过程
    输入:
        alpha(int):(1)     # 学习率
        reg(int):(1)       # 正则化系数
        epoches(int):(1)   # 迭代次数
    输出:
        svmEr(class) 训练好的模型
        alpha
        reg
    """
    svmEr = SVM(opt)
    for epoch in range(epoches):
        train_data_interator = enumerate(train_loader)
        train_steps = test_steps = (len(train_set) + batch_num - 1) // batch_num

        t = trange(train_steps)
        loss_avg = ut.RunningAverage()
        print("epoch:{}".format(epoch))
        for i in t:
            idx, data = next(train_data_interator)
            X_batch, Y_batch = data
            X_batch = X_batch.view(X_batch.size(0), -1)
            X_batch = torch.cat((torch.ones(X_batch.size(0),1), X_batch), 1)   

            loss = svmEr.train(X_batch, Y_batch, alpha=1e-4, reg=1, vec=True)
            loss_avg.update(loss)
            t.set_postfix(loss='{:05.3f}/{:05.3f}'.format(loss_avg(), loss))
        print(loss_avg())
    return svmEr

def evaluate(svmEr):
    """
    功能：使用训练好的模型进行预测，并评测结果
    输入: 
        svmEr(class) 训练好的模型
    输出: 
        acc(int):(1) 模型准确率
    """
    test_data_interator = enumerate(test_loader)
    test_steps = test_steps = (len(test_set) + batch_num - 1) // batch_num

    t = trange(test_steps)
    Y_predict = []
    Y_true = []
    for i in t:
        idx, data = next(test_data_interator)
        X_batch, Y_batch = data
        Y_true.extend(Y_batch.tolist())
        X_batch = X_batch.view(X_batch.size(0), -1)
        X_batch = torch.cat((torch.ones(X_batch.size(0),1), X_batch), 1)   

        y = svmEr.predict(X_batch)
        Y_predict.extend(y.tolist())
        
    Y_predict = torch.LongTensor(Y_predict)
    Y_true = torch.LongTensor(Y_true)
    acc = torch.sum(Y_predict == Y_true).item() /len(Y_predict)
    
    return acc
    

In [6]:
lrs = [1e-2, 1e-3, 1e-4, 1e-5]
reg_strs = [0, 1, 10, 100, 1000]

result = {}

best_lr = None
best_reg = None
best_svm = None
best_acc = -1

for lr in lrs:
    for reg in reg_strs:
        svmEr = train(lr, reg, 25)
        acc = evaluate(svmEr)
        print("lr:{}; reg:{}; acc:{}".format(lr, reg, acc))
        if acc > best_acc:
            best_lr = lr
            best_reg = reg
            best_svm = svmEr
        result[(lr, reg)] = acc
print("the best: lr:{}; reg:{}; acc:{}".format(best_lr, best_reg, best_acc))

  3%|▎         | 7/250 [00:00<00:03, 67.91it/s, loss=19.389/19.424]

epoch:0


100%|██████████| 250/250 [00:01<00:00, 129.25it/s, loss=18.016/17.094]


18.01562213958741


  4%|▍         | 10/250 [00:00<00:02, 97.63it/s, loss=17.022/16.814]

epoch:1


100%|██████████| 250/250 [00:02<00:00, 123.84it/s, loss=16.391/15.977]

16.390617867126466



  5%|▌         | 13/250 [00:00<00:02, 115.95it/s, loss=15.721/15.361]

epoch:2


100%|██████████| 250/250 [00:02<00:00, 121.91it/s, loss=15.407/15.120]

15.40744403762817



  4%|▍         | 11/250 [00:00<00:02, 106.64it/s, loss=14.958/14.689]

epoch:3


100%|██████████| 250/250 [00:02<00:00, 121.24it/s, loss=14.691/14.609]


14.69140471282959


  4%|▍         | 10/250 [00:00<00:02, 99.67it/s, loss=14.353/14.064]

epoch:4


 29%|██▉       | 73/250 [00:00<00:01, 117.73it/s, loss=14.260/14.209]

KeyboardInterrupt: 

 29%|██▉       | 73/250 [00:19<00:01, 117.73it/s, loss=14.260/14.209]