In [833]:
# Imports
import math
import random
import numpy as np

In [834]:
# Building the training, validation, and testing sets.

In [835]:
# The encryption key is how the data set is constructed.
originalKey = "abcdefghijklmnopqrstuvwxyz"
encryptionKey = "zyxwvutsrqponmlkjihgfedcba"

# Parameters for the sets.
datasetSize = 50
trainDistribution = 0.8
validationDistribution = 0.15

minDataSize = 10
maxDataSize = 30

In [836]:
# Building the actual set.
X_complete = []
Y_complete = []

for i in range(datasetSize):
    datasize = random.choice(range(minDataSize, maxDataSize + 1))
    X_value = ""
    Y_value = ""

    for j in range(datasize):
        X_value += random.choice(originalKey)
        Y_value += encryptionKey[originalKey.index(X_value[-1])]

    X_complete.append(X_value)
    Y_complete.append(Y_value)

In [837]:
# Splitting the sets
X_train = X_complete[:math.floor(datasetSize * trainDistribution)]
Y_train = Y_complete[:math.floor(datasetSize * trainDistribution)]

X_validation = X_complete[math.floor(datasetSize * trainDistribution):math.floor(datasetSize * (validationDistribution + trainDistribution))]
Y_validation = Y_complete[math.floor(datasetSize * trainDistribution):math.floor(datasetSize * (validationDistribution + trainDistribution))]

X_test = X_complete[math.floor(datasetSize * (validationDistribution + trainDistribution)):]
Y_test = Y_complete[math.floor(datasetSize * (validationDistribution + trainDistribution)):]

In [838]:
# Creating the classes for the MLP.

In [839]:
# The actual input for the MLP will be a 1 x 26 matrix (a 26-dimension vector transposed). Each column is a different letter.

# The first hidden layer will use a weight matrix of 26 x A, resulting in a 1 x A matrix. Each column is a different hidden node. We'll then add a 
# 1 x A bias matrix onto the layer. A is a pre-defined variable, and is the number of hidden nodes per layer.

# Every hidden layer from then on will use a A x A matrix, resulting in a 1 x A matrix. Between each layer, we'll use a tanh function as our
# non-linear function. At the end, we'll use a SoftMax function

# The output layer will use a weight matrix of 32 x 26, resulting in a 1 x 26 matrix. By transposing this, we get a 26 x 1 matrix, and thus we can get a
# valud output.

In [1234]:
class MatrixValue:
    def __init__(self, matrix, previous = [], name = ""):
        self.matrix = matrix
        self.previous = previous
        self.gradient = np.zeros(matrix.shape)
        self.name = name

        self.backward = lambda: None
        
    def __repr__(self):
        if self.name == "":
            return "Value: " + str(self.matrix) + "\n\nGradient: " + str(self.gradient)
        else:
            return self.name + " -\nValue: " + str(self.matrix) + "\n\nGradient: " + str(self.gradient)

    def __str__(self):
        if self.name == "":
            return "Value: " + str(self.matrix) + "\n\nGradient: " + str(self.gradient)
        else:
            return self.name + " -\nValue: " + str(self.matrix) + "\n\nGradient: " + str(self.gradient)

    def __neg__(self):
        self.out = MatrixValue(np.negative(self.matrix),
                               previous = [self],
                               name = "-" + self.name)

        def backward():
            self.gradient += np.negative(self.out.gradient)

        self.out.backward = backward
        return self.out

    def __add__(self, other):
        if isinstance(other, MatrixValue):
            self.out = MatrixValue(self.matrix + other.matrix,
                                   previous = [self, other],
                                   name = self.name + " + " + other.name)

            def backward():
                self.gradient += self.out.gradient
                other.gradient += self.out.gradient

            self.out.backward = backward
            return self.out

        else:
            assert isinstance(other, (int, float))
            self.out = MatrixValue(self.matrix + (np.ones(self.matrix.shape) * other),
                                   previous = [self],
                                   name = self.name + " + " + str(other))

            def backward():
                self.gradient += self.out.gradient

            self.out.backward = backward
            return self.out

    def __sub__(self, other):
        assert isinstance(other, (MatrixValue, int, float))

        result = self + (-other)
        result.name = self.name + " - " + (other.name if isinstance(other, MatrixValue) else str(other))
        return result

    def __mul__(self, other):
        if isinstance(other, MatrixValue):
            self.out = MatrixValue(np.matmul(self.matrix, other.matrix),
                                   previous = [self, other],
                                   name = self.name + " * " + other.name)

            def backward():
                self.gradient += np.matmul(self.out.gradient, np.transpose(other.matrix))
                other.gradient += np.matmul(np.transpose(self.matrix), self.out.gradient)

            self.out.backward = backward
            return self.out

        else:
            assert isinstance(other, (int, float))
            self.out = MatrixValue(self.matrix * other,
                                   previous = [self],
                                   name = self.name + " * " + str(other))

            def backward():
                self.gradient += self.out.gradient * other

            self.out.backward = backward
            return self.out

    def __truediv__(self, other):
        if isinstance(other, MatrixValue):
            self.out = MatrixValue(np.divide(self.matrix, other.matrix),
                                   previous = [self, other],
                                   name = self.name + " / " + other.name)

            def backward():
                self.gradient += np.divide(self.out.gradient, other.matrix)
                other.gradient += np.divide(self.out.gradient, self.matrix)

            self.out.backward = backward
            return self.out

        else:
            assert isinstance(other, (int, float))
            
            result = self * (other**-1)
            result.name = self.name + " / " + str(other)
            return result

    def __pow__(self, other):
        assert isinstance(other, (int, float))

        self.out = MatrixValue(np.power(self.matrix, np.ones(self.matrix.shape) * other),
                               previous = [self],
                               name = self.name + "^" + str(other))

        def backward():
            result = 3 * np.power(self.matrix, np.ones(self.matrix.shape) * (other - 1))
            self.gradient += np.multiply(self.out.gradient, result)

        self.out.backward = backward
        return self.out

    def exp(self):
        self.out = MatrixValue(np.exp(self.matrix),
                               previous = [self],
                               name = "e^(" + self.name + ")")

        def backward():
            self.gradient += np.multiply(self.out.gradient, np.exp(self.matrix))

        self.out.backward = backward
        return self.out

    def tanh(self):
        self.out = (((self * 2).exp() - 1) / ((self * 2).exp() + 1))
        self.out.name = "tanh(" + self.name + ")"
        self.out.previous = [self]

        def backward():
            t = MatrixValue(self.out.matrix)
            final = ((t**2 - 1) * -1).matrix

            self.gradient += np.multiply(self.out.gradient, final)

        self.out.backward = backward
        return self.out

    def relu(self):
        self.out = MatrixValue((self.matrix + np.abs(self.matrix)) / 2,
                               previous = [self],
                               name = "relu(" + self.name + ")")

        def backward():
            initialResult = self.out.matrix

            for i, row in enumerate(initialResult):
                for j, value in enumerate(row):
                    initialResult[i][j] = (0 if value == 0 else 1)

            self.gradient += np.multiply(initialResult, self.out.gradient)

        self.out.backward = backward

        return self.out

    def softmax(self):
        total = np.exp(self.matrix).sum()

        self.out = (self.exp() / total)
        self.out.name = "SoftMax(" + self.name + ")"
        self.out.previous = [self]

        def backward():
            initialResult = (np.exp(self.matrix) * (total**-1)) - (np.exp(self.matrix * 2) * (total**-2))
            self.gradient += np.multiply(self.out.gradient, initialResult)

        self.out.backward = backward
        return self.out

    def abs(self):
        self.out = MatrixValue(np.abs(self.matrix),
                               previous = [self],
                               name = "abs(" + self.name + ")")

        def backward():
            initialResult = np.divide(self.matrix, np.abs(self.matrix))
            self.gradient += np.multiply(self.out.gradient, initialResult)

        self.out.backward = backward
        return self.out
    
    def sum(self):
        self.out = MatrixValue(np.ones((1, 1)) * self.matrix.sum(),
                               previous = [self],
                               name = "sum(" + self.name + ")")

        def backward():
            self.gradient += np.ones(self.matrix.shape) * self.out.gradient.sum()

        self.out.backward = backward
        return self.out

    def buildGradients(self):
        self.gradient = np.ones(self.matrix.shape)
        
        topologicalOrder = []
        visited = []

        def buildTopology(node):
            if not node in visited:
                visited.append(node)

                for child in node.previous:
                    buildTopology(child)

                topologicalOrder.append(node)

        buildTopology(self)
        topologicalOrder.reverse()

        """
        for node in topologicalOrder:
            if node != self:
                node.gradient = np.zeros(node.matrix.shape)
        """

        for node in topologicalOrder:
            node.backward()

In [1235]:
# The encoder and decoder

In [1338]:
def encoder(inp):
    result = np.zeros((1, 26))
    characterList = "abcdefghijklmnopqrstuvwxyz"
    
    result[0][characterList.index(inp)] = 1

    return MatrixValue(result)

In [1339]:
def decoder(givenInp):
    inp = givenInp.matrix
    maxIndex = 0
    maximumValue = -math.inf

    for i in range(26):
        if maximumValue < inp[0][i]:
            maximumValue = inp[0][i]
            maxIndex = i

    return "abcdefghijklmnopqrstuvwxyz"[maxIndex]

In [1340]:
# Auxiliary Functions

In [1341]:
def generateParameter(rows, columns):
    return (np.random.rand(rows, columns) - (np.ones((rows, columns)) * 0.5)) * 2

In [1342]:
# Creating the Layers and MLP Class

In [1343]:
class Layer:
    def __init__(self, nodeCount, prevNodeCount, useBias = True, label = ""):
        self.useBias = True

        self.weights = MatrixValue(generateParameter(prevNodeCount, nodeCount))
        self.bias = MatrixValue(generateParameter(1, nodeCount))
        self.bias = MatrixValue(np.zeros((1, nodeCount)))

        if label != "":
            self.weights.name = label + " (Weights)"
            self.bias.name = label + " (Bias)"

    def calculate(self, inp):
        weightMul = inp * self.weights

        if self.useBias:
            return weightMul + self.bias
        else:
            return weightMul

In [1344]:
class MLP:
    def __init__(self, hiddenLayerCount, nodesPerLayer, evolutionRate = 0.01):
        self.layers = []
        self.parameters = []

        self.evolutionRate = evolutionRate

        for i in range(hiddenLayerCount + 1):
            prevNodeCount = nodesPerLayer if i != 0 else 26
            thisLayerCount = nodesPerLayer if i < (hiddenLayerCount) else 26

            layerLabel = ("Hidden Layer " + str(i + 1)) if i < (hiddenLayerCount) else "Output Layer"

            newLayer = Layer(thisLayerCount, prevNodeCount, (i < hiddenLayerCount), label = layerLabel)

            self.layers.append(newLayer)

            self.parameters.append(newLayer.weights)
            self.parameters.append(newLayer.bias)

    def calculate(self, inp):
        cInput = inp
        for i, layer in enumerate(self.layers):
            cInput = layer.calculate(cInput)

            if i < len(self.layers) - 1:
                cInput = cInput.tanh()
        
        output = (cInput).softmax()

        return output

    def backPropogate(self, loss):
        for parameter in self.parameters:
            parameter.gradient = np.zeros(parameter.matrix.shape)
        
        loss.buildGradients()

        for parameter in self.parameters:
            parameter.matrix += -1 * (self.evolutionRate * parameter.gradient)

In [1345]:
# Back Propogation

In [1363]:
# Initial Parameters
epochCount = 500000
updateInterval = 1
loss = []

In [1364]:
nodesPerHiddenLayer = 30
hiddenLayerCount = 0
agent = MLP(hiddenLayerCount, nodesPerHiddenLayer)

agent.evolutionRate = 0.01

In [1365]:
# Training
for epoch in range(epochCount):
    loss.append(0)
    for j in range(len(X_train)):
        X = X_train[j]
        Y = Y_train[j]
        
        for k, character in enumerate(X):
            agentResult = agent.calculate(encoder(character))
            expectedResult = encoder(Y[k])

            lossMatrix = expectedResult - agentResult
            trueLoss = lossMatrix.abs().sum()

            loss[-1] += trueLoss.matrix[0][0] / len(X)

            agent.backPropogate(trueLoss)

    loss[-1] /= (j + 1)
    
    if (epoch + 1) % updateInterval:
        print("Epoch #" + str(epoch + 1) + ": Loss - " + loss[-1])

KeyboardInterrupt: 

In [1369]:
# Validation
randomIndex = random.choice(range(len(X_validation)))

X_random = X_validation[randomIndex]
Y_random = Y_validation[randomIndex]

agentResult = ""

for character in X_random:
    agentResult += decoder(agent.calculate(encoder(character)))

print("Input value: " + X_random)
print("\nExpected result: " + Y_random)
print("Obtained result: " + agentResult)

Input value: krkoryhjrwrqqiyjiyurv

Expected result: piplibsqidijjrbqrbfie
Obtained result: piplibsqidijjrbqrbfie


In [1370]:
# Testing
totalLoss = 0

for i in range(len(X_test)):
    X = X_test[i]
    Y = Y_test[i]

    dataLoss = 0

    for j, character in enumerate(X):
        agentResult = agent.calculate(encoder(character))
        expectedResult = encoder(Y[j])

        lossMatrix = expectedResult - agentResult
        trueLoss = lossMatrix.abs().sum()

        dataLoss += trueLoss.matrix[0][0] / len(X)

    totalLoss += dataLoss / len(X_test)

print("Average Loss: " + str(totalLoss))

Average Loss: 5.5969460333487575e-05
