# 3.多分类

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat

## 3.1读取数据

In [2]:
data = loadmat('ex3data1.mat')
data

{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'y': array([[10],
        [10],
        [10],
        ...,
        [ 9],
        [ 9],
        [ 9]], dtype=uint8)}

In [3]:
type(data)

dict

In [4]:
data['X'].shape,data['y'].shape

((5000, 400), (5000, 1))

In [5]:
np.unique(data['y'])

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10], dtype=uint8)

data数据中有5000个样本，每个样本的特征值有400个，每幅图像为20$\times$20 pixels

y中的数据为每幅图像的label

## 3.2数据初始化

In [6]:
X = np.insert(data['X'],0,1,axis=1)
Y = data['y']
label_nums = np.unique(data['y'])
allTheta = np.zeros((len(label_nums),X.shape[1]))

In [7]:
X.shape,Y.shape,allTheta.shape,label_nums

((5000, 401),
 (5000, 1),
 (10, 401),
 array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10], dtype=uint8))

## 3.3代价函数

sigmoid函数：
<center>
    $g(z)=\frac{1}{1+e^{-z}}$
</center>
假设函数：
<center>
    $h_{\theta}(X)=$ $\frac{1}{1+e^{-\theta^{T}X}}$
</center>
代价函数：
<center>
    $J(\theta)=-\frac{1}{M}\sum\limits_{i=1}^{M}(1-Y^{(i)})\ln[1-h_\theta(X^{(i)})]-Y^{(i)}\ln[h_\theta(X^{(i)})] + \frac{\lambda}{2M}\sum\limits_{j=2}^{N}(\theta_j^2)$
</center>

In [8]:
def sigmoid(z):
    return 1/(1+np.exp(-z))

In [9]:
#theta(N,),X(M,N),Y(M,1)
def computeCost(theta,X,Y,learningRate):
    theta = theta.reshape((theta.shape[0],1)) #theta:(N,)->(N,1)
    J1 = -1/len(X)*np.sum((1-Y)*np.log(1-sigmoid(X@theta))+Y*np.log(sigmoid(X@theta)))
    theta_1_to_N = theta[:,1:]
    J2 = learningRate/(2*len(X))*np.sum(np.power(theta_1_to_N,2))
    return J1 + J2

## 3.4计算梯度 

正则化梯度：
<center>
$\frac{\partial J(\theta)}{\partial\theta_j}=$$\frac{1}{M}\sum\limits_{i=1}^{M}(h_\theta(X^{(i)})-Y^{(i)})X_j^{(i)}+\frac{\lambda }{M}{{\theta }_{j}}\text{ }\text{             for  j}\ge \text{1}$
</center>

In [10]:
#theta(N,1),X(M,N),Y(M,1)
def gradient(theta,X,Y,learningRate):
    theta = theta.reshape((theta.shape[0],1)) #theta:(n,)->(n,1)
    G1 = 1/len(X)*X.T@(sigmoid(X@theta)-Y) #(N,1)
    theta[0,0] = 0; #theta0不参与正则化
    G2 = learningRate/len(X)*theta #(N,1)
    return G1 + G2

## 3.5训练模型

In [11]:
from scipy.optimize import minimize
def train(allTheta,label_nums,X,Y,learningRate):
    for i in range(1,len(label_nums)+1):
        theta = np.zeros((X.shape[1],))
        y_i = np.array([1 if label == i else 0 for label in Y]) #处理Y，将除当前label的其他标签全部置为0，当前标签置为1
        y_i = np.reshape(y_i,(X.shape[0],1)) #y_i:(M,1)
        res = minimize(fun=computeCost,x0=theta,args=(X,y_i,learningRate),method='TNC',jac=gradient)
        allTheta[i-1,:] = res.x
    return allTheta

In [12]:
res_allTtheta = train(allTheta,label_nums,X,Y,1)

In [13]:
res_allTtheta,res_allTtheta.shape

(array([[-2.38144132e+00,  0.00000000e+00,  0.00000000e+00, ...,
          1.30301970e-03, -4.03530454e-10,  0.00000000e+00],
        [-3.16678713e+00,  0.00000000e+00,  0.00000000e+00, ...,
          4.37134315e-03, -4.98907775e-04,  0.00000000e+00],
        [-4.79115575e+00,  0.00000000e+00,  0.00000000e+00, ...,
         -2.87023610e-05, -2.48486856e-07,  0.00000000e+00],
        ...,
        [-7.99441011e+00,  0.00000000e+00,  0.00000000e+00, ...,
         -8.83764734e-05,  7.14701218e-06,  0.00000000e+00],
        [-4.57790042e+00,  0.00000000e+00,  0.00000000e+00, ...,
         -1.33412640e-03,  9.95192733e-05,  0.00000000e+00],
        [-5.37978818e+00,  0.00000000e+00,  0.00000000e+00, ...,
         -1.19016202e-04,  8.50472182e-06,  0.00000000e+00]]),
 (10, 401))

In [14]:
def predict_all(X,allTheta):
    h = sigmoid(X@allTheta.T)
    h_argmax = np.argmax(h,axis=1) + 1
    return h_argmax

In [15]:
Y_pred = predict_all(X,res_allTtheta)
correct = [1 if a==b else 0 for (a,b) in zip(Y_pred,Y)]
accuray = (sum(map(int,correct)))/float(len(correct))
print('accuracy = {0}%'.format(accuray*100))

accuracy = 94.48%
