In [1]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

# Đọc và xử lý dữ liệu từ file csv

In [2]:
Train_data = pd.read_csv('.\\mnist_train.csv')
Test_data = pd.read_csv('.\\mnist_test.csv')

In [3]:
Train_data.head()

Unnamed: 0,5,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.608,0.609,0.610,0.611,0.612,0.613,0.614,0.615,0.616,0.617
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [4]:
print(Test_data.shape)
print(Train_data.shape)

(9999, 785)
(59999, 785)


In [5]:
Train_data = np.array(Train_data)
Test_data = np.array(Test_data)

m_train, n_train = Train_data.shape
m_test, n_test = Test_data.shape
np.random.shuffle(Train_data)

Train_data = Train_data[0: m_train].T
Y_train = Train_data[0]
X_train = Train_data[1: n_train]
X_train = X_train / 255

Test_data = Test_data[0: m_test].T
Y_test = Test_data[0]
X_test = Test_data[1: n_test]
X_test = X_test / 255

In [6]:
X_train[:, 0].shape

(784,)

# Cài đặt các hàm tính toán cần thiết

In [7]:
def init_params():
    W1 = np.random.rand(10, 784) - 0.5
    b1 = np.random.rand(10, 1) - 0.5
    W2 = np.random.rand(10, 10) - 0.5
    b2 = np.random.rand(10, 1) - 0.5
    return W1, b1, W2, b2

def ReLU(Z):
    return np.maximum(0, Z)

def Softmax(Z):
    A = np.exp(Z) / sum(np.exp(Z))
    return A

def one_hot(Y):
    one_hot_Y = np.zeros((Y.size, Y.max() + 1))
    one_hot_Y[np.arange(Y.size), Y] = 1
    one_hot_Y = one_hot_Y.T
    return one_hot_Y

def derivative_relu(Z):
    return Z > 0

def foward_propagation(W1, b1, W2, b2, X):
    Z1 = W1.dot(X) + b1
    A1 = ReLU(Z1)
    Z2 = W2.dot(A1) + b2
    A2 = Softmax(Z2)
    return Z1, A1, Z2, A2

def backpropagation(Z1, A1, Z2, A2, W1, W2, X, Y):
    m = Y.size
    one_hot_Y = one_hot(Y)
    dZ2 = A2 - one_hot_Y
    dW2 = 1 / m * dZ2.dot(A1.T)
    db2 = 1/m * np.sum(dZ2)

    dZ1 = W2.T.dot(dZ2) * derivative_relu(Z1)
    dW1 = 1 / m * dZ1.dot(X.T)
    db1 = 1 / m * np.sum(dZ1)
    return dW1, db1, dW2, db2

def update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha):
    W1 = W1 - alpha * dW1
    b1 = b1 - alpha * db1
    W2 = W2 - alpha * dW2
    b2 = b2 - alpha * db2
    return W1, b1, W2, b2

def get_predictions(A2):
    return np.argmax(A2, 0)

def get_accuracy(predictions, Y):
    #print(predictions)
    return np.sum(predictions == Y) / Y.size

# Cài đặt và thực nghiệm

In [8]:
def gradient_descent(X, Y, epochs, alpha):
    W1, b1, W2, b2 = init_params()

    for i in range(epochs):
        Z1, A1, Z2, A2 = foward_propagation(W1, b1, W2, b2, X)
        dW1, db1, dW2, db2 = backpropagation(Z1, A1, Z2, A2, W1, W2, X, Y)
        W1, b1, W2, b2 = update_params(W1, b1, W2, b2, dW1, db1, dW2, db2, alpha)
        predictions = get_predictions(A2)

        print("Epochs {}: Accuracy: {}".format(i + 1, get_accuracy(predictions, Y)))

    return W1, b1, W2, b2

In [10]:
W1, b1, W2, b2 = gradient_descent(X_train, Y_train, 500, 0.2)

Epochs 1: Accuracy: 0.09306821780363006
Epochs 2: Accuracy: 0.10585176419606994
Epochs 3: Accuracy: 0.12948549142485707
Epochs 4: Accuracy: 0.15586926448774147
Epochs 5: Accuracy: 0.18225303755062586
Epochs 6: Accuracy: 0.20583676394606576
Epochs 7: Accuracy: 0.22885381423023718
Epochs 8: Accuracy: 0.24753745895764928
Epochs 9: Accuracy: 0.2628877147952466
Epochs 10: Accuracy: 0.27612126868781145
Epochs 11: Accuracy: 0.28908815146919115
Epochs 12: Accuracy: 0.3060884348072468
Epochs 13: Accuracy: 0.3326722112035201
Epochs 14: Accuracy: 0.3473391223187053
Epochs 15: Accuracy: 0.3570226170436174
Epochs 16: Accuracy: 0.36607276787946463
Epochs 17: Accuracy: 0.37495624927082116
Epochs 18: Accuracy: 0.3832897214953583
Epochs 19: Accuracy: 0.3916398606643444
Epochs 20: Accuracy: 0.4001400023333722
Epochs 21: Accuracy: 0.4074401240020667
Epochs 22: Accuracy: 0.41527358789313157
Epochs 23: Accuracy: 0.42215703595059917
Epochs 24: Accuracy: 0.4303905065084418
Epochs 25: Accuracy: 0.437807296788

Sau 500 epochs, độ chính xác đạt được vào khoảng  88,3%

In [11]:
def test(X, Y, W1, b1, W2, b2):
    Z1, A1, Z2, A2 = foward_propagation(W1, b1, W2, b2, X)
    predictions = get_predictions(A2)
    print("Accuracy: {}".format(get_accuracy(predictions, Y)))

test(X_test, Y_test, W1, b1, W2, b2)

Accuracy: 0.8861886188618862


Kết quả trên bộ test cũng rơi vào khoảng 88%