# Neural Networks

In [1]:
import nltk

nltk.download("gutenberg")
nltk.download("stopwords")
nltk.download("punkt")

[nltk_data] Downloading package gutenberg to
[nltk_data]     C:\Users\zador\AppData\Roaming\nltk_data...
[nltk_data]   Package gutenberg is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\zador\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\zador\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [2]:
import numpy as np


from nltk.corpus import gutenberg, stopwords

import re
import csv

Stop_words = stopwords.words("english")
Sentences = gutenberg.sents("carroll-alice.txt")
TermsSentences = []
for terms in Sentences:
    terms = [w for w in terms if w not in Stop_words]
    terms = [w for w in terms if re.search(r"^[a-zA-Z]{2}", w) is not None]
    TermsSentences.append(terms)

In [3]:
gutenberg.words()

['[', 'Emma', 'by', 'Jane', 'Austen', '1816', ']', ...]

In [4]:
Filename = "results/Alice-sentences.csv"

with open(Filename, "w") as fout:
    writer = csv.writer(fout, delimiter=",", lineterminator="\n")
    for i in range(len(TermsSentences)):
        writer.writerow(TermsSentences[i])

In [5]:
from collections import defaultdict
from itertools import combinations

Transactions_list = []  # a list of transactions
Items_names = {}  # Lookup item ID to name
Items_ids = {}  # Lookup item name to ID

Items = None  # a list of item IDs, normally an increasing sequence of numbers

# Process the data
with open("results/Alice-sentences.csv", "r") as fin:
    reader = csv.reader(fin, delimiter=",")
    item_id = 0
    for row in reader:
        transaction = []
        for item in row:
            if item not in Items_ids:
                Items_ids[item] = item_id
                Items_names[item_id] = item
                item_id += 1
            transaction += [Items_ids[item]]
        Transactions_list += [transaction]

M, N = len(Items_ids), len(Transactions_list)

Items = np.arange(0, M)

# Information, sanity
print(f"M={M} items, N={N} transactions")

M=2793 items, N=1703 transactions


In [6]:
# Sanity check
print([Items_names[_] for _ in Items[0:7]])
print(Transactions_list[:7])

['Alice', 'Adventures', 'Wonderland', 'Lewis', 'Carroll', 'CHAPTER', 'Down']
[[0, 1, 2, 3, 4], [5], [6, 7, 8], [0, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 13, 19, 20, 21, 22, 18, 23, 0, 24, 20, 25], [26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 7, 50, 51, 52, 53], [54, 15, 55, 56, 0, 57, 55, 58, 59, 60, 7, 61, 62, 63], [62, 63]]


In [7]:
# Convert to numpy arrays
Transactions = np.full((N, M), False, dtype=bool)

for i, t in enumerate(Transactions_list):
    for item in t:
        Transactions[i][item] = True

# Sanity, print row index 10, 11
print(f"{Transactions[10:12].astype(int)}")

[[1 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]]


In [8]:
Filename = "EP_datasets/input_for_weka.csv"

with open(Filename, "w") as fout:
    writer = csv.writer(
        fout, delimiter=",", quoting=csv.QUOTE_ALL, quotechar="'", lineterminator="\n"
    )
    writer.writerow([Items_names[i] for i in range(M)])
    for i in range(N):
        writer.writerow(
            list(map(lambda x: "" if x == False else "True", Transactions[i]))
        )

In [9]:
class NeuralNetMLP(object):
    def __init__(
        self,
        n_hidden1=30,
        n_hidden2=30,
        epochs=100,
        eta=0.001,
        minibatch_size=1,
        seed=None,
    ):
        self.random = np.random.RandomState(seed)  # used to randomize weights
        self.n_hidden1 = n_hidden1  # size of the hidden layer 1
        self.n_hidden2 = n_hidden2  # size of the hidden layer 2
        self.epochs = epochs  # number of iterations
        self.eta = eta  # learning rate
        self.minibatch_size = (
            minibatch_size  # size of training batch - 1 would not work
        )

    @staticmethod
    def onehot(y, n_classes):  # one hot encode the input class y
        onehot = np.zeros((n_classes, y.shape[0]))
        for idx, val in enumerate(y.astype(int)):
            onehot[val, idx] = 1.0
        return onehot.T

    @staticmethod
    def sigmoid(z):  # Eq 1
        return 1.0 / (1.0 + np.exp(-np.clip(z, -250, 250)))

    def _forward(self, X):  # Eq 2
        # hidden layer 1
        z_h1 = np.dot(X, self.w_h1)
        a_h1 = self.sigmoid(z_h1)

        # hidden layer 2
        z_h2 = np.dot(a_h1, self.w_h2)
        a_h2 = self.sigmoid(z_h2)

        # output
        z_out = np.dot(a_h2, self.w_out)
        a_out = self.sigmoid(z_out)
        return z_h1, a_h1, z_h2, a_h2, z_out, a_out

    @staticmethod
    def compute_cost(y_enc, output):  # Eq 4
        term1 = -y_enc * (np.log(output))
        term2 = (1.0 - y_enc) * np.log(1.0 - output)
        cost = np.sum(term1 - term2)
        return cost

    def predict(self, X):
        z_h1, a_h1, z_h2, a_h2, z_out, a_out = self._forward(X)
        y_pred = np.argmax(z_out, axis=1)
        return y_pred

    def fit(self, X_train, y_train, X_valid, y_valid):
        import sys

        n_output = np.unique(y_train).shape[0]  # number of class labels
        n_features = X_train.shape[1]
        self.w_out = self.random.normal(
            loc=0.0, scale=0.1, size=(self.n_hidden2, n_output)
        )

        # 2 hidden layers
        self.w_h2 = self.random.normal(
            loc=0.0, scale=0.1, size=(self.n_hidden1, self.n_hidden2)
        )
        self.w_h1 = self.random.normal(
            loc=0.0, scale=0.1, size=(n_features, self.n_hidden1)
        )

        y_train_enc = self.onehot(y_train, n_output)  # one-hot encode original y
        for i in range(self.epochs):  # Ideally must shuffle at every epoch
            indices = np.arange(X_train.shape[0])
            for start_idx in range(
                0, indices.shape[0] - self.minibatch_size + 1, self.minibatch_size
            ):
                batch_idx = indices[start_idx : start_idx + self.minibatch_size]

                z_h1, a_h1, z_h2, a_h2, z_out, a_out = self._forward(
                    X_train[batch_idx]
                )  # neural network model

                sigmoid_derivative_h1 = a_h1 * (1.0 - a_h1)  # Eq 3
                sigmoid_derivative_h2 = a_h2 * (1.0 - a_h2)  # Eq 3

                delta_out = a_out - y_train_enc[batch_idx]  # Eq 5

                delta_h2 = (
                    np.dot(delta_out, self.w_out.T) * sigmoid_derivative_h2
                )  # Eq 6
                delta_h1 = np.dot(delta_h2, self.w_h2.T) * sigmoid_derivative_h1  # Eq 6

                grad_w_out = np.dot(a_h2.T, delta_out)  # Eq 7
                grad_w_h2 = np.dot(a_h1.T, delta_h2)  # Eq 7
                grad_w_h1 = np.dot(X_train[batch_idx].T, delta_h1)  # Eq 8

                self.w_out -= self.eta * grad_w_out  # Eq 9
                self.w_h2 -= self.eta * grad_w_h2  # Eq 9
                self.w_h1 -= self.eta * grad_w_h1  # Eq 9

            # Evaluation after each epoch during training
            z_h1, a_h1, z_h2, a_h2, z_out, a_out = self._forward(X_train)
            cost = self.compute_cost(y_enc=y_train_enc, output=a_out)
            y_train_pred = self.predict(
                X_train
            )  # monitoring training progress through reclassification
            y_valid_pred = self.predict(
                X_valid
            )  # monitoring training progress through validation
            train_acc = (np.sum(y_train == y_train_pred)).astype(float) / X_train.shape[
                0
            ]
            valid_acc = (np.sum(y_valid == y_valid_pred)).astype(float) / X_valid.shape[
                0
            ]
            sys.stderr.write(
                "\r%d/%d | Cost: %.2f "
                "| Train/Valid Acc.: %.2f%%/%.2f%% "
                % (i + 1, self.epochs, cost, train_acc * 100, valid_acc * 100)
            )
            sys.stderr.flush()
        return self

In [10]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
from numpy.random import ranf
from sklearn import svm


def load_mnist(path, kind="train"):
    from numpy import fromfile, uint8
    import os
    import struct

    labels_path = os.path.join(path, "%s-labels-idx1-ubyte" % kind)
    images_path = os.path.join(path, "%s-images-idx3-ubyte" % kind)
    with open(labels_path, "rb") as lbpath:
        magic, n = struct.unpack(">II", lbpath.read(8))
        labels = fromfile(lbpath, dtype=uint8)
        with open(images_path, "rb") as imgpath:
            magic, num, rows, cols = struct.unpack(">IIII", imgpath.read(16))
            images = fromfile(imgpath, dtype=uint8).reshape(len(labels), 784)
            images = ((images / 255.0) - 0.5) * 2
    return images, labels


X_train, y_train = load_mnist("./EP_datasets/mnist/", kind="train")
print(f"Rows= {X_train.shape[0]}, columns= {X_train.shape[1]}")

X_test, y_test = load_mnist("./EP_datasets/mnist/", kind="t10k")
print(f"Rows= {X_test.shape[0]}, columns= {X_test.shape[1]}")

Rows= 60000, columns= 784
Rows= 10000, columns= 784


In [11]:
# Define and fit the neural network
nn = NeuralNetMLP(
    n_hidden1=20, n_hidden2=20, epochs=50, eta=0.0005, minibatch_size=100, seed=1
)

nn.fit(
    X_train=X_train[:55000],
    y_train=y_train[:55000],
    X_valid=X_train[55000:],
    y_valid=y_train[55000:],
);

50/50 | Cost: 17517.23 | Train/Valid Acc.: 95.43%/95.58% 

In [12]:
from sklearn.metrics import confusion_matrix


def get_acc(_y_test, _y_pred):
    return (np.sum(_y_test == _y_pred)).astype(float) / _y_test.shape[0]

In [13]:
y_pred = nn.predict(X_test)

print(f"Accuracy= {get_acc(y_test,y_pred)*100:.2f}%")
print(confusion_matrix(y_test, y_pred))

Accuracy= 94.51%
[[ 962    0    1    1    1    6    5    1    3    0]
 [   0 1109    3    2    0    2    5    4   10    0]
 [  18    2  958    9    7    3    9   10   16    0]
 [   1    1   20  931    1   24    1    9   17    5]
 [   1    0    5    0  937    1    6    3   10   19]
 [  10    1    4   20    1  820    8    1   22    5]
 [  10    2    3    0    7   16  912    0    8    0]
 [   3    9   15   12    6    2    0  963    3   15]
 [   3    2    0   12    4   15    4    4  927    3]
 [   6    4    0   10   23    8    1   10   15  932]]


The performance of the 2 layer network for this problem is equally as good as the 1 layer network with only 50 epochs compared to the 300 from the 1 layer example. This means that the 2 layer network converges to an answer faster and therefore can be trained faster than the 1 layer network.