In [None]:
import torch
from tqdm import trange

import utils as ut
import evaluate as eva

In [None]:
class SoftMax():
    def __init__(self, kwargs):
        self.model_name = 'SoftMax'
        self.W = torch.randn(kwargs['class_num'], kwargs['feature_dim']+1,)
        self.N = kwargs['class_num']
        self.init_weight()
        
    def init_weight(self):
        torch.nn.init.xavier_normal_(self.W)
        
    def train(self, X, Y, alpha=0.01, reg=2.5e4, vec=True):
        """
        功能: 完成训练过程，包括(1)求解损失, 计算梯度. (2) 正则化，计算梯度，(3)更新参数 
        输入:
            X(Tensor):(N, K:3*32*32+1)
            Y(Tensor):(N)
            alpha(float):                   # 学习率
            reg(float):                     # 正则化系数
        输出:
            L(int):(1)                      # 损失，范围给主程序以打印显示        
        """
        
        # 计算梯度与正则化
        if vec:
            L, dW = self.cal_dw_with_vec(X, Y, reg)
        else:
            L, dW = self.cal_dw_with_loop(X, Y, reg)
            
        # 更新参数
        self.W -= alpha * dW
        return L
    
    def cal_dw_with_loop(self, X, Y, reg):
        """
        功能： 计算损失和梯度
        输入:
            X(Tensor):(K:3*32*32+1, N)
            Y(Tensor):(C, N)
            reg(float):                    # 正则化系数
        输出:
            L(int): 1                      # 损失               
            dW(Tensor):(C,K)             # 参数梯度       
        """
        L = 0.0
        N = X.size(1)
        K, C = self.W.size()
        dW = torch.zeros(K, C)
        
        # (1) 求解损失
        for i in range(N):
            x = X[:,i].unsqueeze(1)            # (K,1)
            y = Y[:,i].unsqueeze(1)            # (C,1)
            L += -y.t().matmul(self.W).matmul(x).item() + torch.log(torch.sum(torch.exp(self.W.matmul(x)))).item()
            dW = dW + (-y + torch.softmax(self.W.matmul(x), 0)) * x.t()
        
        # (2) 正则化
        L = L / N +  0.5*reg*torch.sum(torch.pow(self.W, 2)).item()
        dW = dW / N +  reg*self.W
        
        return L, dW
    
    def cal_dw_with_vec(self, X, Y, reg):
        """
        功能： 计算损失和梯度
        输入:
            X(Tensor):(K:3*32*32+1, N)
            Y(Tensor):(C, N)
            reg(float):                    # 正则化系数
        输出:
            L(int): 1                      # 损失               
            dW(Tensor):(K,C)             # 参数梯度      
        """
        
        N = X.size(1)
        K, C = self.W.size()
        
        L1 = -Y.t().matmul(self.W).matmul(X)  # (N, N) 
        L2 = torch.sum(torch.exp(self.W.matmul(X)), 0)     # (C, N)
        L = torch.sum(L1[range(N), range(N)]).item() + torch.sum(torch.log(L2)).item()
        dW = -Y.matmul(X.t()) + torch.softmax(self.W.matmul(X), 0).matmul(X.t())
        
        L = L / N +  0.5*reg*torch.sum(torch.pow(self.W, 2)).item()
        dW = dW / N + reg*self.W
        return L, dW
        
    def predict(self, X):
        """
        功能: 预测输入数据标签
        输入:
            X(Tensor): (K, N)
        输出:
            labels(Tensor): (N)
        """
        S = X.t().matmul(self.W.t())  # (N, C)
        return torch.max(S, 1)[1]
        
        

In [None]:
train_set, test_set = ut.data_load('./data')

In [None]:
batch_num = 200
opt = {"feature_dim":3*32*32, "class_num":10}
train_loader = torch.utils.data.DataLoader(train_set, batch_size=batch_num, shuffle=True, num_workers=4)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=batch_num, shuffle=False, num_workers=4)

In [None]:
def train(alpha, reg, epoches):
    """
    功能：完成训练过程
    输入:
        alpha(int):(1)     # 学习率
        reg(int):(1)       # 正则化系数
        epoches(int):(1)   # 迭代次数
    输出:
        softMaxEr(class) 训练好的模型
        alpha
        reg
    """
    softMaxEr = SoftMax(opt)
    for epoch in range(epoches):
        train_data_interator = enumerate(train_loader)
        train_steps = test_steps = (len(train_set) + batch_num - 1) // batch_num

        t = trange(train_steps)
        loss_avg = ut.RunningAverage()
        print("epoch:{}".format(epoch))
        for i in t:
            idx, data = next(train_data_interator)
            X_batch, Y = data
            X_batch = X_batch.view(X_batch.size(0), -1)
            X_batch = torch.cat((torch.ones(X_batch.size(0),1), X_batch), 1)   
            X_batch = X_batch.t()
            C, N = 10, X_batch.size(1)
            Y_batch = torch.zeros(C, N)
            Y_batch[Y.tolist(), range(N)] = 1
            
            loss = softMaxEr.train(X_batch, Y_batch, alpha=1e-4, reg=1, vec=True)
            loss_avg.update(loss)
            t.set_postfix(loss='{:05.3f}/{:05.3f}'.format(loss_avg(), loss))
        print(loss_avg())
    return softMaxEr

def evaluate(model):
    """
    功能：使用训练好的模型进行预测，并评测结果
    输入: 
        svmEr(class) 训练好的模型
    输出: 
        acc(int):(1) 模型准确率
    """
    test_data_interator = enumerate(test_loader)
    test_steps = test_steps = (len(test_set) + batch_num - 1) // batch_num

    t = trange(test_steps)
    Y_predict = []
    Y_true = []
    for i in t:
        idx, data = next(test_data_interator)
        X_batch, Y_batch = data
        Y_true.extend(Y_batch.tolist())
        X_batch = X_batch.view(X_batch.size(0), -1)
        X_batch = torch.cat((torch.ones(X_batch.size(0),1), X_batch), 1)   
        
        X_batch = X_batch.t()

        y = model.predict(X_batch)
        Y_predict.extend(y.tolist())
        
    Y_predict = torch.LongTensor(Y_predict)
    Y_true = torch.LongTensor(Y_true)
    acc = torch.sum(Y_predict == Y_true).item() /len(Y_predict)
    
    return acc
    

In [None]:
lrs = [1e-2, 1e-3, 1e-4, 1e-5]
reg_strs = [0, 1, 10, 100, 1000]

result = {}

best_lr = None
best_reg = None
best_model = None
best_acc = -1

for lr in lrs:
    for reg in reg_strs:
        model = train(lr, reg, 100)
        acc = evaluate(model)
        print("lr:{}; reg:{}; acc:{}".format(lr, reg, acc))
        if acc > best_acc:
            best_lr = lr
            best_reg = reg
            best_model = model
        result[(lr, reg)] = acc
print("the best: lr:{}; reg:{}; acc:{}".format(best_lr, best_reg, best_acc))