In [86]:
# 4 layer
import numpy as np
import time
import matplotlib.pyplot as plt

# def the function for convert label and image 28x28 into 1 + 728 and save as csv file
def convert(imgf, labelf, outf, n):
    f = open(imgf, "rb")
    o = open(outf, "w")
    l = open(labelf, "rb")

    f.read(16)
    l.read(8)
    images = []

    for i in range(n):
        image = [ord(l.read(1))]
        for j in range(28*28):
            image.append(ord(f.read(1)))
        images.append(image)

    for image in images:
        o.write(",".join(str(pix) for pix in image)+"\n")
    f.close()
    o.close()
    l.close()

# convert mnist files in to csv
#convert("train-images-idx3-ubyte", "train-labels-idx1-ubyte", "train_10000.csv", 10000)
#convert("t10k-images-idx3-ubyte", "t10k-labels-idx1-ubyte", "test_1000.csv", 1000)

# convert the data from csv to np.array
train_file = open("train_10000.csv", 'r')
train_list = train_file.readlines()
train_file.close()
#len(train_list)
test_file = open("test_1000.csv", 'r')
test_list = test_file.readlines()
test_file.close()

# NN building
class DNN:
  def __init__(self, sizes, epochs, lr):
    self.sizes = sizes
    self.epochs = epochs
    self.lr = lr

    # number of nodes in each layer
    input_layer=self.sizes[0]
    hidden_1=self.sizes[1]
    hidden_2=self.sizes[2]
    output_layer=self.sizes[3]

    self.params = {
        'W1':np.random.randn(hidden_1, input_layer) * np.sqrt(1. / hidden_1),
        'W2':np.random.randn(hidden_2, hidden_1) * np.sqrt(1. / hidden_2),
        'W3':np.random.randn(output_layer, hidden_2) * np.sqrt(1. / output_layer)
    }
  def sigmoid(self, x, derivative=False):
      if derivative:
          return (np.exp(-x))/((np.exp(-x)+1)**2)
      return 1/(1 + np.exp(-x))

  def softmax(self, x, derivative=False):
      # Numerically stable with large exponentials
      exps = np.exp(x - x.max())
      if derivative:
          return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
      return exps / np.sum(exps, axis=0)
  def forward_pass(self, x_train):
      params = self.params

      # input layer activations becomes sample
      params['A0'] = x_train

      # input layer to hidden layer 1
      params['Z1'] = np.dot(params["W1"], params['A0'])
      params['A1'] = self.sigmoid(params['Z1'])

      # hidden layer 1 to hidden layer 2
      params['Z2'] = np.dot(params["W2"], params['A1'])
      params['A2'] = self.sigmoid(params['Z2'])

      # hidden layer 2 to output layer
      params['Z3'] = np.dot(params["W3"], params['A2'])
      params['A3'] = self.softmax(params['Z3'])

      return params['A3']

  def backward_pass(self, y_train, output):
      params = self.params
      change_w = {}

      # Calculate W3 update
      error = 2 * (output - y_train) / output.shape[0] * self.softmax(params['Z3'], derivative=True)
      change_w['W3'] = np.outer(error, params['A2'])

      # Calculate W2 update
      error = np.dot(params['W3'].T, error) * self.sigmoid(params['Z2'], derivative=True)
      change_w['W2'] = np.outer(error, params['A1'])

      # Calculate W1 update
      error = np.dot(params['W2'].T, error) * self.sigmoid(params['Z1'], derivative=True)
      change_w['W1'] = np.outer(error, params['A0'])

      return change_w

  def update_network_parameters(self, changes_to_w):
      for key, value in changes_to_w.items():
          self.params[key] -= self.lr * value

  def compute_accuracy(self, test_data, output_nodes):
      predictions = []
      for x in test_data:
          all_values = x.split(',')
          # scale and shift the inputs
          inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
          # create the target output values (all 0.01, except the desired label which is 0.99)
          targets = np.zeros(output_nodes) + 0.01
          # all_values[0] is the target label for this record
          targets[int(all_values[0])] = 0.99
          output = self.forward_pass(inputs)
          pred = np.argmax(output)
          predictions.append(pred == np.argmax(targets))

      return np.mean(predictions)

  def train(self, train_list, test_list, output_nodes):
      start_time = time.time()
      for iteration in range(self.epochs):
          for x in train_list:
              all_values = x.split(',')
              # scale and shift the inputs
              inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
              # create the target output values (all 0.01, except the desired label which is 0.99)
              targets = np.zeros(output_nodes) + 0.01
              # all_values[0] is the target label for this record
              targets[int(all_values[0])] = 0.99
              output = self.forward_pass(inputs)
              changes_to_w = self.backward_pass(targets, output)
              self.update_network_parameters(changes_to_w)

          accuracy_train = self.compute_accuracy(train_list, output_nodes)
          print('Epoch: {0}, Time Spent: {1:.2f}s, Train Accuracy: {2:.2f}%'.format(
              iteration+1, time.time() - start_time, accuracy_train * 100))

          accuracy_test = self.compute_accuracy(test_list, output_nodes)
          print('Epoch: {0}, Time Spent: {1:.2f}s, Test Accuracy: {2:.2f}%'.format(
              iteration+1, time.time() - start_time, accuracy_test * 100))


# run the NN
dnn = DNN(sizes=[784, 128, 64, 10], epochs=20, lr=0.001)
dnn.train(train_list, test_list, 10)

Epoch: 1, Time Spent: 9.21s, Train Accuracy: 11.02%
Epoch: 1, Time Spent: 9.37s, Test Accuracy: 14.10%
Epoch: 2, Time Spent: 15.58s, Train Accuracy: 12.69%
Epoch: 2, Time Spent: 15.74s, Test Accuracy: 15.80%
Epoch: 3, Time Spent: 21.94s, Train Accuracy: 15.75%
Epoch: 3, Time Spent: 22.11s, Test Accuracy: 18.00%
Epoch: 4, Time Spent: 28.29s, Train Accuracy: 17.75%
Epoch: 4, Time Spent: 28.46s, Test Accuracy: 19.90%
Epoch: 5, Time Spent: 34.68s, Train Accuracy: 20.18%
Epoch: 5, Time Spent: 34.84s, Test Accuracy: 22.10%
Epoch: 6, Time Spent: 41.20s, Train Accuracy: 21.68%
Epoch: 6, Time Spent: 41.36s, Test Accuracy: 23.60%
Epoch: 7, Time Spent: 47.78s, Train Accuracy: 22.37%
Epoch: 7, Time Spent: 47.95s, Test Accuracy: 24.20%
Epoch: 8, Time Spent: 54.17s, Train Accuracy: 23.09%
Epoch: 8, Time Spent: 54.33s, Test Accuracy: 24.20%
Epoch: 9, Time Spent: 61.21s, Train Accuracy: 23.52%
Epoch: 9, Time Spent: 61.39s, Test Accuracy: 24.10%
Epoch: 10, Time Spent: 67.64s, Train Accuracy: 23.94%
Epo

In [87]:
# 5 layer
import numpy as np
import time

# def the function for convert label and image 28x28 into 1 + 728 and save as csv file
def convert(imgf, labelf, outf, n):
    f = open(imgf, "rb")
    o = open(outf, "w")
    l = open(labelf, "rb")

    f.read(16)
    l.read(8)
    images = []

    for i in range(n):
        image = [ord(l.read(1))]
        for j in range(28*28):
            image.append(ord(f.read(1)))
        images.append(image)

    for image in images:
        o.write(",".join(str(pix) for pix in image)+"\n")
    f.close()
    o.close()
    l.close()

# convert mnist files in to csv
#convert("train-images-idx3-ubyte", "train-labels-idx1-ubyte", "train_10000.csv", 10000)
#convert("t10k-images-idx3-ubyte", "t10k-labels-idx1-ubyte", "test_1000.csv", 1000)

train_file = open("train_10000.csv", 'r')
train_list = train_file.readlines()
train_file.close()
#len(train_list)
test_file = open("test_1000.csv", 'r')
test_list = test_file.readlines()
test_file.close()

# NN building
class DNN:
  def __init__(self, sizes, epochs, lr):
    self.sizes = sizes
    self.epochs = epochs
    self.lr = lr

    # number of nodes in each layer
    input_layer = self.sizes[0]
    hidden_1 = self.sizes[1]
    hidden_2 = self.sizes[2]
    hidden_3 = self.sizes[3]
    output_layer = self.sizes[4]

    self.params = {
        'W1': np.random.randn(hidden_1, input_layer) * np.sqrt(1. / hidden_1),
        'W2': np.random.randn(hidden_2, hidden_1) * np.sqrt(1. / hidden_2),
        'W3': np.random.randn(hidden_3, hidden_2) * np.sqrt(1./ hidden_3),
        'W4': np.random.randn(output_layer, hidden_3) * np.sqrt(1. / output_layer)
    }
  def sigmoid(self, x, derivative=False):
      if derivative:
          return (np.exp(-x))/((np.exp(-x)+1)**2)
      return 1/(1 + np.exp(-x))

  def softmax(self, x, derivative=False):
      # Numerically stable with large exponentials
      exps = np.exp(x - x.max())
      if derivative:
          return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
      return exps / np.sum(exps, axis=0)
  def forward_pass(self, x_train):
      params = self.params

      # input layer activations becomes sample
      params['A0'] = x_train

      # input layer to hidden layer 1
      params['Z1'] = np.dot(params["W1"], params['A0'])
      params['A1'] = self.sigmoid(params['Z1'])

      # hidden layer 1 to hidden layer 2
      params['Z2'] = np.dot(params["W2"], params['A1'])
      params['A2'] = self.sigmoid(params['Z2'])

      # hidden layer 2 to hidden layer 3
      params['Z3'] = np.dot(params["W3"], params['A2'])
      params['A3'] = self.sigmoid(params['Z3'])

      # hidden layer 3 to output layer
      params['Z4'] = np.dot(params["W4"], params['A3'])
      params['A4'] = self.softmax(params['Z4'])

      return params['A4']

  def backward_pass(self, y_train, output):
      params = self.params
      change_w = {}

      # Calculate W4 update
      error = 2 * (output - y_train) / output.shape[0] * self.softmax(params['Z4'], derivative=True)
      change_w['W4'] = np.outer(error, params['A3'])

      # Calculate W3 update
      error = np.dot(params['W4'].T, error) * self.sigmoid(params['Z3'], derivative=True)
      change_w['W3'] = np.outer(error, params['A2'])

      # Calculate W2 update
      error = np.dot(params['W3'].T, error) * self.sigmoid(params['Z2'], derivative=True)
      change_w['W2'] = np.outer(error, params['A1'])

      # Calculate W1 update
      error = np.dot(params['W2'].T, error) * self.sigmoid(params['Z1'], derivative=True)
      change_w['W1'] = np.outer(error, params['A0'])

      return change_w

  def update_network_parameters(self, changes_to_w):
      for key, value in changes_to_w.items():
          self.params[key] -= self.lr * value

  def compute_accuracy(self, test_data, output_nodes):
      predictions = []
      for x in test_data:
          all_values = x.split(',')
          # scale and shift the inputs
          inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
          # create the target output values (all 0.01, except the desired label which is 0.99)
          targets = np.zeros(output_nodes) + 0.01
          # all_values[0] is the target label for this record
          targets[int(all_values[0])] = 0.99
          output = self.forward_pass(inputs)
          pred = np.argmax(output)
          predictions.append(pred == np.argmax(targets))

      return np.mean(predictions)

  def train(self, train_list, test_list, output_nodes):
      start_time = time.time()
      for iteration in range(self.epochs):
          for x in train_list:
              all_values = x.split(',')
              # scale and shift the inputs
              inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
              # create the target output values (all 0.01, except the desired label which is 0.99)
              targets = np.zeros(output_nodes) + 0.01
              # all_values[0] is the target label for this record
              targets[int(all_values[0])] = 0.99
              output = self.forward_pass(inputs)
              changes_to_w = self.backward_pass(targets, output)
              self.update_network_parameters(changes_to_w)

          accuracy_train = self.compute_accuracy(train_list, output_nodes)
          print('Epoch: {0}, Time Spent: {1:.2f}s, Train Accuracy: {2:.2f}%'.format(
              iteration+1, time.time() - start_time, accuracy_train * 100))

          accuracy_test = self.compute_accuracy(test_list, output_nodes)
          print('Epoch: {0}, Time Spent: {1:.2f}s, Test Accuracy: {2:.2f}%'.format(
              iteration+1, time.time() - start_time, accuracy_test * 100))


# run the NN
dnn = DNN(sizes=[784, 256, 128, 64, 10], epochs=20, lr=0.001)
dnn.train(train_list, test_list, 10)

Epoch: 1, Time Spent: 12.28s, Train Accuracy: 12.66%
Epoch: 1, Time Spent: 12.48s, Test Accuracy: 13.00%
Epoch: 2, Time Spent: 28.41s, Train Accuracy: 21.60%
Epoch: 2, Time Spent: 28.75s, Test Accuracy: 20.10%
Epoch: 3, Time Spent: 50.72s, Train Accuracy: 19.30%
Epoch: 3, Time Spent: 51.07s, Test Accuracy: 17.10%
Epoch: 4, Time Spent: 76.31s, Train Accuracy: 18.93%
Epoch: 4, Time Spent: 76.72s, Test Accuracy: 16.60%
Epoch: 5, Time Spent: 101.05s, Train Accuracy: 17.82%
Epoch: 5, Time Spent: 101.27s, Test Accuracy: 15.70%
Epoch: 6, Time Spent: 123.98s, Train Accuracy: 16.47%
Epoch: 6, Time Spent: 124.26s, Test Accuracy: 14.60%
Epoch: 7, Time Spent: 147.17s, Train Accuracy: 15.23%
Epoch: 7, Time Spent: 147.45s, Test Accuracy: 13.20%
Epoch: 8, Time Spent: 167.16s, Train Accuracy: 14.38%
Epoch: 8, Time Spent: 167.42s, Test Accuracy: 12.30%
Epoch: 9, Time Spent: 191.05s, Train Accuracy: 13.99%
Epoch: 9, Time Spent: 191.52s, Test Accuracy: 12.00%
Epoch: 10, Time Spent: 211.41s, Train Accurac

In [88]:
# 4 layer with PCA
import numpy as np
import time
from keras.datasets import mnist
from sklearn.decomposition import PCA

TRAIN_NUM = 10000
TEST_NUM = 100
COMPONENT_NUM = 30

def import_data(TRAIN_NUM, TEST_NUM):
    TRAIN_DATA_3D = np.empty((TRAIN_NUM, 28, 28))
    TRAIN_LABEL = np.empty(TRAIN_NUM)
    TEST_DATA_3D = np.empty((TEST_NUM, 28, 28))
    TEST_LABEL = np.empty(TEST_NUM)
    TRAIN_DATA_3D[0: TRAIN_NUM] = X_TRAIN[0: TRAIN_NUM]
    TRAIN_LABEL[0: TRAIN_NUM] = Y_TRAIN[0: TRAIN_NUM]
    TEST_DATA_3D[0: TEST_NUM] = X_TEST[0: TEST_NUM]
    TEST_LABEL[0: TEST_NUM] = Y_TEST[0: TEST_NUM]
    TRAIN_DATA_2D = TRAIN_DATA_3D.reshape(TRAIN_NUM, -1)
    TEST_DATA_2D = TEST_DATA_3D.reshape(TEST_NUM, -1)
    return TRAIN_DATA_2D, TRAIN_LABEL, TEST_DATA_2D, TEST_LABEL

def pca_dimension_reduce(TRAIN_DATA, TEST_DATA, COMPONENT_NUM):
    pca = PCA(n_components = COMPONENT_NUM)
    train_reduce = pca.fit_transform(TRAIN_DATA)
    test_reduce = pca.transform(TEST_DATA)
    return train_reduce, test_reduce

def data_transform(PCA_reduce_list, labelf):
    output = np.zeros((len(PCA_reduce_list), 31))
    for i in range(len(PCA_reduce_list)):
        output[i, 0] = labelf[i]
        output[i, 1:31] = PCA_reduce_list[i]
        farray = output.astype(float)
    return farray

(X_TRAIN, Y_TRAIN), (X_TEST, Y_TEST) = mnist.load_data()
TRAIN_DATA, TRAIN_LABEL, TEST_DATA, TEST_LABEL = import_data(TRAIN_NUM, TEST_NUM)
TRAIN_REDUCE, TEST_REDUCE = pca_dimension_reduce(TRAIN_DATA, TEST_DATA, COMPONENT_NUM)
train_list = data_transform(TRAIN_REDUCE, Y_TRAIN)
test_list = data_transform(TRAIN_REDUCE, Y_TEST)
np.savetxt('train.pca30.csv', train_list, delimiter = ',', fmt='%.0f')
np.savetxt('test.pca30.csv', test_list, delimiter = ',', fmt='%.0f')

train_file = open("train.pca30.csv", 'r')
train_list = train_file.readlines()
train_file.close()
#len(train_list)
test_file = open("test.pca30.csv", 'r')
test_list = test_file.readlines()
test_file.close()

# NN building
class DNN:
  def __init__(self, sizes, epochs, lr):
    self.sizes = sizes
    self.epochs = epochs
    self.lr = lr

    # number of nodes in each layer
    input_layer=self.sizes[0]
    hidden_1=self.sizes[1]
    hidden_2=self.sizes[2]
    output_layer=self.sizes[3]

    self.params = {
        'W1':np.random.randn(hidden_1, input_layer) * np.sqrt(1. / hidden_1),
        'W2':np.random.randn(hidden_2, hidden_1) * np.sqrt(1. / hidden_2),
        'W3':np.random.randn(output_layer, hidden_2) * np.sqrt(1. / output_layer)
    }
  def sigmoid(self, x, derivative=False):
      if derivative:
          return (np.exp(-x))/((np.exp(-x)+1)**2)
      return 1/(1 + np.exp(-x))

  def softmax(self, x, derivative=False):
      # Numerically stable with large exponentials
      exps = np.exp(x - x.max())
      if derivative:
          return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
      return exps / np.sum(exps, axis=0)
  def forward_pass(self, x_train):
      params = self.params

      # input layer activations becomes sample
      params['A0'] = x_train

      # input layer to hidden layer 1
      params['Z1'] = np.dot(params["W1"], params['A0'])
      params['A1'] = self.sigmoid(params['Z1'])

      # hidden layer 1 to hidden layer 2
      params['Z2'] = np.dot(params["W2"], params['A1'])
      params['A2'] = self.sigmoid(params['Z2'])

      # hidden layer 2 to output layer
      params['Z3'] = np.dot(params["W3"], params['A2'])
      params['A3'] = self.softmax(params['Z3'])

      return params['A3']

  def backward_pass(self, y_train, output):
      params = self.params
      change_w = {}

      # Calculate W3 update
      error = 2 * (output - y_train) / output.shape[0] * self.softmax(params['Z3'], derivative=True)
      change_w['W3'] = np.outer(error, params['A2'])

      # Calculate W2 update
      error = np.dot(params['W3'].T, error) * self.sigmoid(params['Z2'], derivative=True)
      change_w['W2'] = np.outer(error, params['A1'])

      # Calculate W1 update
      error = np.dot(params['W2'].T, error) * self.sigmoid(params['Z1'], derivative=True)
      change_w['W1'] = np.outer(error, params['A0'])

      return change_w

  def update_network_parameters(self, changes_to_w):
      for key, value in changes_to_w.items():
          self.params[key] -= self.lr * value

  def compute_accuracy(self, test_data, output_nodes):
      predictions = []
      for x in test_data:
          all_values = x.split(',')
          # scale and shift the inputs
          inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
          # create the target output values (all 0.01, except the desired label which is 0.99)
          targets = np.zeros(output_nodes) + 0.01
          # all_values[0] is the target label for this record
          targets[int(all_values[0])] = 0.99
          output = self.forward_pass(inputs)
          pred = np.argmax(output)
          predictions.append(pred == np.argmax(targets))

      return np.mean(predictions)

  def train(self, train_list, test_list, output_nodes):
      start_time = time.time()
      for iteration in range(self.epochs):
          for x in train_list:
              all_values = x.split(',')
              # scale and shift the inputs
              inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
              # create the target output values (all 0.01, except the desired label which is 0.99)
              targets = np.zeros(output_nodes) + 0.01
              # all_values[0] is the target label for this record
              targets[int(all_values[0])] = 0.99
              output = self.forward_pass(inputs)
              changes_to_w = self.backward_pass(targets, output)
              self.update_network_parameters(changes_to_w)

          accuracy_train = self.compute_accuracy(train_list, output_nodes)
          print('Epoch: {0}, Time Spent: {1:.2f}s, Train Accuracy: {2:.2f}%'.format(
              iteration+1, time.time() - start_time, accuracy_train * 100))

          accuracy_test = self.compute_accuracy(test_list, output_nodes)
          print('Epoch: {0}, Time Spent: {1:.2f}s, Test Accuracy: {2:.2f}%'.format(
              iteration+1, time.time() - start_time, accuracy_test * 100))


# run the NN
dnn = DNN(sizes=[30, 128, 64, 10], epochs=20, lr=0.001)
dnn.train(train_list, test_list, 10)

Epoch: 1, Time Spent: 2.69s, Train Accuracy: 20.98%
Epoch: 1, Time Spent: 3.36s, Test Accuracy: 10.12%
Epoch: 2, Time Spent: 6.79s, Train Accuracy: 23.98%
Epoch: 2, Time Spent: 8.24s, Test Accuracy: 10.32%
Epoch: 3, Time Spent: 11.43s, Train Accuracy: 23.62%
Epoch: 3, Time Spent: 12.16s, Test Accuracy: 10.26%
Epoch: 4, Time Spent: 14.94s, Train Accuracy: 23.56%
Epoch: 4, Time Spent: 15.75s, Test Accuracy: 10.19%
Epoch: 5, Time Spent: 18.21s, Train Accuracy: 23.61%
Epoch: 5, Time Spent: 18.90s, Test Accuracy: 10.39%
Epoch: 6, Time Spent: 21.47s, Train Accuracy: 25.41%
Epoch: 6, Time Spent: 22.23s, Test Accuracy: 10.15%
Epoch: 7, Time Spent: 24.67s, Train Accuracy: 28.52%
Epoch: 7, Time Spent: 25.46s, Test Accuracy: 10.48%
Epoch: 8, Time Spent: 28.03s, Train Accuracy: 31.47%
Epoch: 8, Time Spent: 28.63s, Test Accuracy: 10.54%
Epoch: 9, Time Spent: 30.82s, Train Accuracy: 34.08%
Epoch: 9, Time Spent: 31.45s, Test Accuracy: 10.27%
Epoch: 10, Time Spent: 34.17s, Train Accuracy: 34.97%
Epoch

In [89]:
# 5 layer with PCA
import numpy as np
import time
from keras.datasets import mnist
from sklearn.decomposition import PCA

TRAIN_NUM = 10000
TEST_NUM = 100
COMPONENT_NUM = 30

def import_data(TRAIN_NUM, TEST_NUM):
    TRAIN_DATA_3D = np.empty((TRAIN_NUM, 28, 28))
    TRAIN_LABEL = np.empty(TRAIN_NUM)
    TEST_DATA_3D = np.empty((TEST_NUM, 28, 28))
    TEST_LABEL = np.empty(TEST_NUM)
    TRAIN_DATA_3D[0: TRAIN_NUM] = X_TRAIN[0: TRAIN_NUM]
    TRAIN_LABEL[0: TRAIN_NUM] = Y_TRAIN[0: TRAIN_NUM]
    TEST_DATA_3D[0: TEST_NUM] = X_TEST[0: TEST_NUM]
    TEST_LABEL[0: TEST_NUM] = Y_TEST[0: TEST_NUM]
    TRAIN_DATA_2D = TRAIN_DATA_3D.reshape(TRAIN_NUM, -1)
    TEST_DATA_2D = TEST_DATA_3D.reshape(TEST_NUM, -1)
    return TRAIN_DATA_2D, TRAIN_LABEL, TEST_DATA_2D, TEST_LABEL

def pca_dimension_reduce(TRAIN_DATA, TEST_DATA, COMPONENT_NUM):
    pca = PCA(n_components = COMPONENT_NUM)
    train_reduce = pca.fit_transform(TRAIN_DATA)
    test_reduce = pca.transform(TEST_DATA)
    return train_reduce, test_reduce

def data_transform(PCA_reduce_list, labelf):
    output = np.zeros((len(PCA_reduce_list), 31))
    for i in range(len(PCA_reduce_list)):
        output[i, 0] = labelf[i]
        output[i, 1:31] = PCA_reduce_list[i]
        farray = output.astype(float)
    return farray

(X_TRAIN, Y_TRAIN), (X_TEST, Y_TEST) = mnist.load_data()
TRAIN_DATA, TRAIN_LABEL, TEST_DATA, TEST_LABEL = import_data(TRAIN_NUM, TEST_NUM)
TRAIN_REDUCE, TEST_REDUCE = pca_dimension_reduce(TRAIN_DATA, TEST_DATA, COMPONENT_NUM)
train_list = data_transform(TRAIN_REDUCE, Y_TRAIN)
test_list = data_transform(TRAIN_REDUCE, Y_TEST)
np.savetxt('train.pca30.csv', train_list, delimiter = ',', fmt='%.0f')
np.savetxt('test.pca30.csv', test_list, delimiter = ',', fmt='%.0f')

train_file = open("train.pca30.csv", 'r')
train_list = train_file.readlines()
train_file.close()
#len(train_list)
test_file = open("test.pca30.csv", 'r')
test_list = test_file.readlines()
test_file.close()

# NN building
class DNN:
  def __init__(self, sizes, epochs, lr):
    self.sizes = sizes
    self.epochs = epochs
    self.lr = lr

    # number of nodes in each layer
    input_layer = self.sizes[0]
    hidden_1 = self.sizes[1]
    hidden_2 = self.sizes[2]
    hidden_3 = self.sizes[3]
    output_layer = self.sizes[4]

    self.params = {
        'W1': np.random.randn(hidden_1, input_layer) * np.sqrt(1. / hidden_1),
        'W2': np.random.randn(hidden_2, hidden_1) * np.sqrt(1. / hidden_2),
        'W3': np.random.randn(hidden_3, hidden_2) * np.sqrt(1./ hidden_3),
        'W4': np.random.randn(output_layer, hidden_3) * np.sqrt(1. / output_layer)
    }
  def sigmoid(self, x, derivative=False):
      if derivative:
          return (np.exp(-x))/((np.exp(-x)+1)**2)
      return 1/(1 + np.exp(-x))

  def softmax(self, x, derivative=False):
      # Numerically stable with large exponentials
      exps = np.exp(x - x.max())
      if derivative:
          return exps / np.sum(exps, axis=0) * (1 - exps / np.sum(exps, axis=0))
      return exps / np.sum(exps, axis=0)
  def forward_pass(self, x_train):
      params = self.params

      # input layer activations becomes sample
      params['A0'] = x_train

      # input layer to hidden layer 1
      params['Z1'] = np.dot(params["W1"], params['A0'])
      params['A1'] = self.sigmoid(params['Z1'])

      # hidden layer 1 to hidden layer 2
      params['Z2'] = np.dot(params["W2"], params['A1'])
      params['A2'] = self.sigmoid(params['Z2'])

      # hidden layer 2 to hidden layer 3
      params['Z3'] = np.dot(params["W3"], params['A2'])
      params['A3'] = self.sigmoid(params['Z3'])

      # hidden layer 3 to output layer
      params['Z4'] = np.dot(params["W4"], params['A3'])
      params['A4'] = self.softmax(params['Z4'])

      return params['A4']

  def backward_pass(self, y_train, output):
      params = self.params
      change_w = {}

      # Calculate W4 update
      error = 2 * (output - y_train) / output.shape[0] * self.softmax(params['Z4'], derivative=True)
      change_w['W4'] = np.outer(error, params['A3'])

      # Calculate W3 update
      error = np.dot(params['W4'].T, error) * self.sigmoid(params['Z3'], derivative=True)
      change_w['W3'] = np.outer(error, params['A2'])

      # Calculate W2 update
      error = np.dot(params['W3'].T, error) * self.sigmoid(params['Z2'], derivative=True)
      change_w['W2'] = np.outer(error, params['A1'])

      # Calculate W1 update
      error = np.dot(params['W2'].T, error) * self.sigmoid(params['Z1'], derivative=True)
      change_w['W1'] = np.outer(error, params['A0'])

      return change_w

  def update_network_parameters(self, changes_to_w):
      for key, value in changes_to_w.items():
          self.params[key] -= self.lr * value

  def compute_accuracy(self, test_data, output_nodes):
      predictions = []
      for x in test_data:
          all_values = x.split(',')
          # scale and shift the inputs
          inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
          # create the target output values (all 0.01, except the desired label which is 0.99)
          targets = np.zeros(output_nodes) + 0.01
          # all_values[0] is the target label for this record
          targets[int(all_values[0])] = 0.99
          output = self.forward_pass(inputs)
          pred = np.argmax(output)
          predictions.append(pred == np.argmax(targets))

      return np.mean(predictions)

  def train(self, train_list, test_list, output_nodes):
      start_time = time.time()
      for iteration in range(self.epochs):
          for x in train_list:
              all_values = x.split(',')
              # scale and shift the inputs
              inputs = (np.asfarray(all_values[1:]) / 255.0 * 0.99) + 0.01
              # create the target output values (all 0.01, except the desired label which is 0.99)
              targets = np.zeros(output_nodes) + 0.01
              # all_values[0] is the target label for this record
              targets[int(all_values[0])] = 0.99
              output = self.forward_pass(inputs)
              changes_to_w = self.backward_pass(targets, output)
              self.update_network_parameters(changes_to_w)

          accuracy_train = self.compute_accuracy(train_list, output_nodes)
          print('Epoch: {0}, Time Spent: {1:.2f}s, Train Accuracy: {2:.2f}%'.format(
              iteration+1, time.time() - start_time, accuracy_train * 100))

          accuracy_test = self.compute_accuracy(test_list, output_nodes)
          print('Epoch: {0}, Time Spent: {1:.2f}s, Test Accuracy: {2:.2f}%'.format(
              iteration+1, time.time() - start_time, accuracy_test * 100))


# run the NN
dnn = DNN(sizes=[30, 256, 128, 64, 10], epochs=20, lr=0.001)
dnn.train(train_list, test_list, 10)

Epoch: 1, Time Spent: 5.29s, Train Accuracy: 19.40%
Epoch: 1, Time Spent: 6.26s, Test Accuracy: 10.65%
Epoch: 2, Time Spent: 12.61s, Train Accuracy: 18.85%
Epoch: 2, Time Spent: 17.93s, Test Accuracy: 10.86%
Epoch: 3, Time Spent: 23.03s, Train Accuracy: 18.70%
Epoch: 3, Time Spent: 24.23s, Test Accuracy: 10.94%
Epoch: 4, Time Spent: 29.35s, Train Accuracy: 18.65%
Epoch: 4, Time Spent: 30.70s, Test Accuracy: 10.98%
Epoch: 5, Time Spent: 35.05s, Train Accuracy: 18.57%
Epoch: 5, Time Spent: 36.19s, Test Accuracy: 11.04%
Epoch: 6, Time Spent: 41.00s, Train Accuracy: 18.61%
Epoch: 6, Time Spent: 41.91s, Test Accuracy: 11.07%
Epoch: 7, Time Spent: 47.32s, Train Accuracy: 18.67%
Epoch: 7, Time Spent: 48.33s, Test Accuracy: 11.09%
Epoch: 8, Time Spent: 53.15s, Train Accuracy: 18.84%
Epoch: 8, Time Spent: 54.08s, Test Accuracy: 11.10%
Epoch: 9, Time Spent: 58.91s, Train Accuracy: 19.20%
Epoch: 9, Time Spent: 60.14s, Test Accuracy: 11.07%
Epoch: 10, Time Spent: 65.59s, Train Accuracy: 19.49%
Epo