# 003使用逻辑回归实现数字识别


In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat

In [12]:
data = loadmat("ex3data1.mat")
data

{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'y': array([[10],
        [10],
        [10],
        ...,
        [ 9],
        [ 9],
        [ 9]], dtype=uint8)}

In [21]:
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [26]:
def cost(theta,X,y,learningRate):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)
    first = np.multiply(-y,np.log(sigmoid(X*theta.T)))
    second = np.multiply((1-y),np.log(1-sigmoid(X*theta.T)))
    reg = (learningRate/X.shape[0]*2)*np.sum(np.power(theta[:,1:],2))
    return np.sum(first-second)/X.shape[0] + reg

In [37]:
def gradient(theta,X,y,learningRate):
    theta = np.matrix(theta)
    X = np.matrix(X)
    y = np.matrix(y)
    parameters = int(theta.ravel().shape[1])
    error = sigmoid(X*theta.T) - y
    grad = ((X.T * error)/X.shape[0]).T + ((learningRate/X.shape[0])*theta)
    grad[0,0] = np.sum(np.multiply(error,X[:,0]))/X.shape[0]
    print(grad)
    return np.array(grad).ravel()

In [42]:
from scipy.optimize import minimize

def one_vs_all(X,y,num_label,learning_rate):
    rows = X.shape[0]
    params = X.shape[1]
    
    all_theta = np.zeros((num_label,params+1))
    
    X = np.insert(X,0,values=np.ones(rows),axis=1)
    
    for i in range(1,num_label+1):
        theta = np.zeros(params+1)
        y_i = np.array([1 if label == i else 0 for label in y])
        y_i = np.reshape(y_i,(rows,1))
        
        fmin = minimize(fun=cost,x0 = theta,args=(X,y_i,learning_rate),method='TNC',jac=gradient)
        all_theta[i-1,:] = fmin.x
    return all_theta

In [43]:
all_theta = one_vs_all(data['X'],data['y'],10,1)
all_theta

array([[-1.45298497e+00,  0.00000000e+00,  0.00000000e+00, ...,
         4.19670873e-04,  1.94521356e-07,  0.00000000e+00],
       [-2.68654029e+00,  0.00000000e+00,  0.00000000e+00, ...,
         1.87871158e-03, -2.16956381e-04,  0.00000000e+00],
       [-3.74407791e+00,  0.00000000e+00,  0.00000000e+00, ...,
        -2.16930736e-05,  2.33500231e-08,  0.00000000e+00],
       ...,
       [-7.26956876e+00,  0.00000000e+00,  0.00000000e+00, ...,
        -5.54205881e-05,  4.26809733e-06,  0.00000000e+00],
       [-4.19028790e+00,  0.00000000e+00,  0.00000000e+00, ...,
        -2.37149959e-04,  1.40038556e-05,  0.00000000e+00],
       [-2.65202139e+00,  0.00000000e+00,  0.00000000e+00, ...,
         9.55090059e-05,  7.40278604e-06,  0.00000000e+00]])

In [45]:
def predict_all(X, all_theta):
    rows = X.shape[0]
    params = X.shape[1]
    num_labels = all_theta.shape[0]
    
    # same as before, insert ones to match the shape
    X = np.insert(X, 0, values=np.ones(rows), axis=1)
    
    # convert to matrices
    X = np.matrix(X)
    all_theta = np.matrix(all_theta)
    
    # compute the class probability for each class on each training instance
    h = sigmoid(X * all_theta.T)
    
    # create array of the index with the maximum probability
    h_argmax = np.argmax(h, axis=1)
    
    # because our array was zero-indexed we need to add one for the true label prediction
    h_argmax = h_argmax + 1
    
    return h_argmax

In [46]:
y_pred = predict_all(data['X'], all_theta)
correct = [1 if a == b else 0 for (a, b) in zip(y_pred, data['y'])]
accuracy = (sum(map(int, correct)) / float(len(correct)))
print ('accuracy = {0}%'.format(accuracy * 100))

accuracy = 93.16%


In [51]:
a = np.matrix([1,2,3])
a[0,0] = 2
a

matrix([[2, 2, 3]])

# 神经网络

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.io import loadmat

In [11]:
data = loadmat("ex3weights.mat")
print(data['Theta1'].shape,data['Theta2'].shape)
theta1,theta2 = data["Theta1"],data["Theta2"]
print(theta1,theta2)

(25, 401) (10, 26)
[[-2.25623899e-02 -1.05624163e-08  2.19414684e-09 ... -1.30529929e-05
  -5.04175101e-06  2.80464449e-09]
 [-9.83811294e-02  7.66168682e-09 -9.75873689e-09 ... -5.60134007e-05
   2.00940969e-07  3.54422854e-09]
 [ 1.16156052e-01 -8.77654466e-09  8.16037764e-09 ... -1.20951657e-04
  -2.33669661e-06 -7.50668099e-09]
 ...
 [-1.83220638e-01 -8.89272060e-09 -9.81968100e-09 ...  2.35311186e-05
  -3.25484493e-06  9.02499060e-09]
 [-7.02096331e-01  3.05178374e-10  2.56061008e-09 ... -8.61759744e-04
   9.43449909e-05  3.83761998e-09]
 [-3.50933229e-01  8.85876862e-09 -6.57515140e-10 ... -1.80365926e-06
  -8.14464807e-06  8.79454531e-09]] [[-0.76100352 -1.21244498 -0.10187131 -2.36850085 -1.05778129 -2.20823629
   0.56383834  1.21105294  2.21030997  0.44456156 -1.18244872  1.04289112
  -1.60558756  1.30419943  1.37175046  1.74825095 -0.23365648 -1.52014483
   1.15324176  0.10368082 -0.37207719 -0.61530019 -0.1256836  -2.27193038
  -0.71836208 -1.29690315]
 [-0.61785176  0.61559

In [17]:
data1 = loadmat("ex3data1.mat")
x2 = np.matrix(np.insert(data1['X'],0,values=np.ones(data1['X'].shape[0]),axis=1))
y2 = np.matrix(data1["y"])

matrix([[10],
        [10],
        [10],
        ...,
        [ 9],
        [ 9],
        [ 9]], dtype=uint8)

In [20]:
z2 = x2*theta1.T
z2.shape

(5000, 25)

In [22]:
a2 = sigmoid(z2)
a2

matrix([[5.03618685e-02, 7.93957162e-02, 9.93001966e-01, ...,
         9.72517962e-01, 9.43421623e-01, 1.07213787e-01],
        [8.05782163e-03, 5.10486829e-02, 9.33671593e-01, ...,
         8.91385592e-01, 9.90982126e-01, 5.94701645e-02],
        [1.41949887e-02, 2.44354705e-02, 9.97518887e-01, ...,
         8.24334311e-01, 9.56466386e-01, 8.86760824e-02],
        ...,
        [2.96781175e-01, 7.32901746e-01, 1.57724076e-01, ...,
         8.60396828e-01, 3.98474223e-02, 1.51177198e-01],
        [8.51205095e-01, 3.58434539e-01, 1.83675450e-01, ...,
         9.84854863e-01, 2.75399966e-01, 4.35605471e-02],
        [9.72262381e-01, 5.48598771e-06, 9.93379633e-01, ...,
         9.99235749e-01, 8.96120297e-01, 5.00966928e-02]])

In [26]:
x3 = np.insert(z2,0,values=np.ones(z2.shape[0]),axis=1)
z3 = x3*theta2.T
a3 = sigmoid(z3)
a3

matrix([[1.19074037e-22, 2.22770953e-02, 9.99261841e-01, ...,
         1.00000000e+00, 9.99999523e-01, 1.00000000e+00],
        [6.71283142e-16, 9.48599371e-07, 9.99999993e-01, ...,
         1.00000000e+00, 9.99942774e-01, 1.00000000e+00],
        [1.69852311e-23, 2.57677044e-01, 9.99999995e-01, ...,
         1.00000000e+00, 9.99995348e-01, 1.00000000e+00],
        ...,
        [3.05292555e-02, 1.99939993e-03, 9.99999999e-01, ...,
         1.00000000e+00, 1.00000000e+00, 1.37969710e-17],
        [5.94986692e-18, 4.14108668e-15, 2.60844525e-09, ...,
         1.00000000e+00, 1.00000000e+00, 2.37064998e-08],
        [3.31353469e-29, 9.99983989e-01, 2.59657255e-23, ...,
         1.00000000e+00, 1.00000000e+00, 1.00000000e+00]])

In [27]:
y_predict = np.argmax(a3,axis=1)+1
y_predict

matrix([[10],
        [10],
        [ 8],
        ...,
        [ 9],
        [ 8],
        [ 8]], dtype=int64)

In [30]:
from sklearn.metrics import classification_report#这个包是评价报告
print(classification_report(y2, y_predict))

              precision    recall  f1-score   support

           1       1.00      0.60      0.75       500
           2       0.70      0.88      0.78       500
           3       0.68      0.91      0.78       500
           4       0.93      0.85      0.89       500
           5       0.94      0.32      0.48       500
           6       0.90      0.84      0.87       500
           7       0.98      0.75      0.85       500
           8       0.29      0.85      0.43       500
           9       0.82      0.42      0.56       500
          10       0.94      0.36      0.52       500

    accuracy                           0.68      5000
   macro avg       0.82      0.68      0.69      5000
weighted avg       0.82      0.68      0.69      5000

