In [79]:
import numpy as np
import struct
import os

In [80]:
# Load the MNIST data for this exercise
# mat_data contain the training and testing images or labels.
#   Each matrix has size [m,n] for images where:
#      m is the number of examples.
#      n is the number of pixels in each image.
#   or Each matrix has size [m,1] for labels contain the corresponding labels (0 to 9) where:
#      m is the number of examples.
def load_mnist(file_dir, is_images='True'):
    # Read binary data
    bin_file = open(file_dir, 'rb')
    bin_data = bin_file.read()
    bin_file.close()
    # Analysis file header
    if is_images:
        # Read images
        fmt_header = '>iiii'
        magic, num_images, num_rows, num_cols = struct.unpack_from(fmt_header, bin_data, 0)
    else:
        # Read labels
        fmt_header = '>ii'
        magic, num_images = struct.unpack_from(fmt_header, bin_data, 0)
        num_rows, num_cols = 1, 1
    data_size = num_images * num_rows * num_cols
    mat_data = struct.unpack_from('>' + str(data_size) + 'B', bin_data, struct.calcsize(fmt_header))
    mat_data = np.reshape(mat_data, [num_images, num_rows * num_cols])
    print('Load images from %s, number: %d, data shape: %s' % (file_dir, num_images, str(mat_data.shape)))
    return mat_data

# tranfer the image from gray to binary and get the one-hot style labels
def data_convert(x, y, m, k):
    x[x<=40]=0
    x[x>40]=1
    ont_hot_y = np.zeros((m,k))
    for t in np.arange(0,m):
        ont_hot_y[t,y[t]]=1
    ont_hot_y=ont_hot_y.T
    return x, ont_hot_y

# call the load_mnist function to get the images and labels of training set and testing set
def load_data(mnist_dir, train_data_dir, train_label_dir, test_data_dir, test_label_dir):
    print('Loading MNIST data from files...')
    train_images = load_mnist(os.path.join(mnist_dir, train_data_dir), True)
    train_labels = load_mnist(os.path.join(mnist_dir, train_label_dir), False)
    test_images = load_mnist(os.path.join(mnist_dir, test_data_dir), True)
    test_labels = load_mnist(os.path.join(mnist_dir, test_label_dir), False)
    return train_images, train_labels, test_images, test_labels

In [81]:
def softmax(x):
    [m,k]= x.shape
    p = np.zeros([m,k])
    for i in range(m):
        p[i,:]=np.exp(x[i,:])/np.sum(np.exp(x[i,:]))
    return p
def cal_loss(theta, x,y):  #x(m行n列），y（m行k列），theta（k行n列）
    [k,m]=y.shape
    theta = np.matrix(theta)
    sum = 0
    p = softmax(np.dot(x,theta.T))  
    p = p.T.reshape([k * m,1])
    y = y.reshape([k * m,1])
    temp_p=np.mat(np.log(p))
    loss = -1/m*np.dot(y.T,temp_p) 
    return loss 

def softmax_regression(theta, x, y, iters, alpha):
    # TODO: Do the softmax regression by computing the gradient
    # and the objective function value of every iteration and update the theta
    [m] = y[0].shape
    for i in range(iters):
        p = softmax(np.dot(x,theta.T));
        grad = (-1/m *  np.dot(x.T ,(y.T-p)) ).T   
        #更新theta
        theta = theta - alpha * grad 
        loss = cal_loss(theta,x,y)
          #每训练一次，输出当前训练步数与损失值
        if i % 10 == 0:
            print("train iters: ",i)
            print(" loss: ", loss)
            print("\n")
      #返回迭代后的theta值，和每次迭代的代价函数值
    return theta

In [82]:
def predict(test_images, theta):
    scores = np.dot(test_images, theta.T)
    preds = np.argmax(scores, axis=1)
    return preds

def cal_accuracy(y_pred, y):
    num = 0
    # TODO: Compute the accuracy among the test set and store it in acc
    for i in range(len(y)):
        if (y_pred[i]==y[i]):
            num += 1
    acc = num/len(y)
    print("accuracy: ",acc)
    return acc

In [83]:
def train(train_images, train_labels, k, iters=5, alpha=0.5):
    m, n = train_images.shape
    # data processing
    x, y = data_convert(train_images, train_labels, m, k)  # x:[m,n], y:[1,m]

    # Initialize theta.  Use a matrix where each column corresponds to a class,
    # and each row is a classifier coefficient for that class.
    theta = np.random.rand(k, n)  # [k,n]
    # do the softmax regression
    theta = softmax_regression(theta, x, y, iters, alpha)
    return theta

In [None]:
if __name__ == '__main__':
    # initialize the parameters needed
    mnist_dir = "mnist_data/"
    train_data_dir = "train-images.idx3-ubyte"
    train_label_dir = "train-labels.idx1-ubyte"
    test_data_dir = "t10k-images.idx3-ubyte"
    test_label_dir = "t10k-labels.idx1-ubyte"
    k = 10
    iters = 1000
    alpha = 0.75

    # get the data
    train_images, train_labels, test_images, test_labels = load_data(mnist_dir, train_data_dir, train_label_dir,
                                                                     test_data_dir, test_label_dir)
    print("Got data. ")

    # train the classifier
    theta = train(train_images, train_labels, k, iters, alpha)
    print("Finished training. ")

    # evaluate on the testset
    y_predict = predict(test_images, theta)
    accuracy = cal_accuracy(y_predict, test_labels)
    print("Finished test. ")

Loading MNIST data from files...
Load images from mnist_data/train-images.idx3-ubyte, number: 60000, data shape: (60000, 784)
Load images from mnist_data/train-labels.idx1-ubyte, number: 60000, data shape: (60000, 1)
Load images from mnist_data/t10k-images.idx3-ubyte, number: 10000, data shape: (10000, 784)
Load images from mnist_data/t10k-labels.idx1-ubyte, number: 10000, data shape: (10000, 1)
Got data. 
train iters:  0
 loss:  [[4.82655122]]


