# Softmax回归
- 多分类任务的逻辑回归版本

In [11]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.datasets import load_iris
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split

In [80]:
class SoftmaxRegression(object):
    def __init__(self, learning_rate=0.01, epochs=1000, is_regularization = None, lam=0.1,
                verbose = False):
        """
        :type learning_rate: float, 学习率
        :type epochs: int, 迭代次数
        :type is_regularization: int, None: 不使用正则化，l1: L1正则化，l2: L2正则化
        :type lam: 正则化项的系数
        """
        self.alpha = learning_rate
        self.epochs = epochs
        self.is_regularization = is_regularization
        self.lam = lam
        self.theta = None
        self.b = None
        self.verbose = verbose
        self.y_one_hot = OneHotEncoder()
    def train(self, train_x, train_y):
        m , n = train_x.shape
        if len(train_y.shape) == 1:
            train_y = train_y.reshape(-1, 1)
        self.y_one_hot.fit(train_y)
        train_y = self.y_one_hot.transform(train_y).A
        _, nn = train_y.shape
        self.theta = np.random.randn(nn ,n)
        self.b = np.random.randn(nn)
        
        for i in range(self.epochs):
            pre_y = self.predict(train_x)
#             print pre_y.shape, train_y.shape
            d_theta = np.dot((pre_y - train_y).T, train_x)
            d_b     = np.mean(pre_y - train_y, axis=0)
            assert d_b.shape == self.b.shape and d_theta.shape == self.theta.shape
#             print d_theta.shape
            # L1 正则
            if self.is_regularization == 'l1':
                d_l1 = np.ones_like(self.theta)
                d_l1[self.theta < 0] = -1
                d_theta += self.lam * d_l1
            # L2 正则
            elif self.is_regularization == 'l2':
                d_theta += self.lam * self.theta
            
            self.theta -= self.alpha * d_theta
            self.b     -= self.alpha * d_b
            if self.verbose is True and (i+1)%(m/10) == 0:
                print 'the %d train' % (i)
    
    def fit(self, train_x, train_y):
        self.train(train_x, train_y)
        
    def predict(self, test_x):
        return self.softmax(np.dot(test_x, self.theta.T)+self.b)
        
    def output(self, test_x):
        pre = self.predict(test_x) # M * S,需要转化为one-hot编码，最大的编码为1，其余的编码为0
        out = np.argmax(pre, axis = 1)
        return out
    
    def softmax(self, x):
        exp_x = np.exp(x)
        return exp_x / np.sum(exp_x, axis=1).reshape(-1, 1)

In [56]:
iris_data = load_iris()

In [57]:
x_data = iris_data.data
y_data = iris_data.target
x_data.shape

(150, 4)

In [58]:
y_data.shape

(150,)

In [70]:
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data)
assert x_train.shape[1] == x_test.shape[1]

In [86]:
sr = SoftmaxRegression(learning_rate=0.001, epochs=1000, is_regularization = None, 
                       lam=0.1,verbose = False)

In [87]:
sr.fit(x_train, y_train)

In [88]:
pre_y = sr.output(x_test)

In [89]:
accuracy_score(y_test, pre_y)

0.97368421052631582

In [90]:
confusion_matrix(y_test, pre_y)

array([[10,  0,  0],
       [ 0, 15,  1],
       [ 0,  0, 12]])