## Homework 8_1_MINST_10_classes_classification_using_logistic_oop_to_do

In [1]:
import numpy as np

class LR:
    def __init__(self, learning_rate=0.00001, n_iters=200):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))


    def forward(self, X):
        weighted_sum = np.matmul(X, self.w)
        return self.sigmoid(weighted_sum)

    def predict(self, X):
        y_hat = self.forward(X)
        labels = np.argmax(y_hat, axis=1)
        return labels.reshape(-1, 1)

    def loss(self, X, Y):
        y_hat = self.forward(X)
        first_term = Y*np.log(y_hat)
        second_term = (1 - Y)*np.log(1 - y_hat)
        return -np.sum(first_term + second_term) / X.shape[0]

    def gradient(self, X, Y):
        return np.matmul(X.T, (self.forward(X) - Y)) / X.shape[0]
    
    def report(self, iteration, X, Y):
        # 注意 self.predict(X).shape 是 N x 1, 但是
        # Y_train.shape 是 N x 10, 必須在水平方向找到最大值後再變成 N x 1
        matches = np.count_nonzero(self.predict(X) == np.argmax(Y, axis=1).reshape(-1,1))
        n_test_examples = Y.shape[0]
        matches = matches*100.0/n_test_examples
        training_loss = self.loss(X,Y)
        print("%d - Loss: %.20f, %.2f%%" % (iteration, training_loss, matches))
    
    def fit(self, X, Y):

        self.w  =np.zeros((X.shape[1], Y.shape[1]))
        for i in range(self.n_iters):
            if i % 20 == 0:
                self.report(i,X,Y)         
            self.w-= self.gradient(X,Y)*self.lr
        self.report(self.n_iters, X, Y)
        return self.w               


In [2]:

# Load the MINST data and prepare the training and testing data sets
import gzip
import struct
import pandas as pd

def load_images(filename):
    # Open and unzip the file of images:
    with gzip.open(filename, 'rb') as f:
        # Read the header information into a bunch of variables:
        _ignored, n_images, columns, rows = struct.unpack('>IIII', f.read(16))
        # Read all the pixels into a NumPy array:
        all_pixels = np.frombuffer(f.read(), dtype=np.uint8)
        # Reshape the pixels into a matrix where each line is an image:
        return all_pixels.reshape(n_images, columns * rows)

def prepend_bias(X):
    # Insert a column of 1s in the position 0 of X.
    # (“axis=1” stands for: “insert a column, not a row”)
    return np.insert(X, 0, 1, axis=1)

def load_labels(filename):
    # Open and unzip the file of images:
    with gzip.open(filename, 'rb') as f:
        # Skip the header bytes:
        f.read(8)
        # Read all the labels into a list:
        all_labels = f.read()
        # Reshape the list of labels into a one-column matrix:
        return np.frombuffer(all_labels, dtype=np.uint8).reshape(-1, 1)

def one_hot_encode(Y):
    n_labels = Y.shape[0]
    n_classes = 10
    encoded_Y = np.zeros((n_labels, n_classes))
    for i in range(n_labels):
        label = Y[i]
        encoded_Y[i][label] = 1
    return encoded_Y
    
x_train = load_images("train-images-idx3-ubyte.gz")
x_test  = load_images("t10k-images-idx3-ubyte.gz")
# 60000 images, each 785 elements (1 bias + 28 * 28 pixels), X_train is (60000,785) 
X_train = prepend_bias(x_train)
# 10000 images, each 785 elements, with the same structure as X_train, X_test is (10000,785) 
X_test = prepend_bias(x_test)

# 60K labels, each a single digit from 0 to 9
Y_train_unencoded = load_labels("train-labels-idx1-ubyte.gz")

# 60K labels, each consisting of 10 one-hot encoded elements
Y_train = one_hot_encode(Y_train_unencoded)

# 10000 labels, each a single digit from 0 to 9
Y_test = load_labels("t10k-labels-idx1-ubyte.gz")

In [3]:
# Testing

clf = LR(learning_rate=0.00001, n_iters=200)
clf.fit(X_train, Y_train)
predictions = clf.predict(X_test)
Accuracy = np.count_nonzero(predictions == Y_test)* 100.0 /Y_test.shape[0]
print("Testing accuracy: %.2f%%" % (Accuracy))

0 - Loss: 6.93147180559945397249, 9.87%
20 - Loss: 1.25378277537179583234, 85.45%
40 - Loss: 1.08333180073001211774, 86.98%
60 - Loss: 1.00734656458890392550, 87.72%
80 - Loss: 0.96254470749127318818, 88.24%
100 - Loss: 0.93231667470232026940, 88.59%
120 - Loss: 0.91020541845761471222, 88.84%
140 - Loss: 0.89313863437623119967, 89.08%
160 - Loss: 0.87945224017547751760, 89.22%
180 - Loss: 0.86815894847049102090, 89.36%
200 - Loss: 0.85863196488041304555, 89.49%
Testing accuracy: 90.32%
