In [1]:
import random
import math
import time
import numpy as np
import pandas as pd

In [2]:
# transfer functions
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# derivative of sigmoid
def dsigmoid(y):
    return y * (1.0 - y)

# using softmax as output layer is recommended for classification where outputs are mutually exclusive
def softmax(w):
    exp_scores = np.exp(w)
    # axis is 1 to sum across rows
    return exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

# using tanh over logistic sigmoid for the hidden layer is recommended   
def tanh(x):
    return np.tanh(x)
    
# derivative for tanh sigmoid
def dtanh(y):
    return 1 - y*y

# Generate some labels for out data - this will be the true data
def generate_labels(samples, classes):
    y = np.array([])

    for i in range(classes):
        y = np.append(y, [i]*samples)

    np.random.shuffle(y)
    return np.vstack(y[:samples]).astype(int)

# Column normalize with negatives
def normalize_with_negatives(x):
    x = np.array(x)
    # ptp(0) returns the "peak-to-peak" (i.e. the range, max - min) along axis 0.
    # this is necessary to handle negative values and guaranteed the minimum value in each column is 0
    x_normed = (x - x.min(0)) / x.ptp(0)
    return x_normed

def normalize(x):
    x = np.array(x)
    x_normed = x / x.max(axis=0)
    return x_normed

In [3]:
class MLP_NeuralNetwork(object):
    
    # Class constructor
    def __init__(self, 
                 X, 
                 y, 
                 input_dim, 
                 hidden_dims, 
                 output_dim, 
                 iterations = 50, 
                 learning_rate = 0.001,
                 regularization = True,
                 regularization_rate = 0.01):
        
        """
        
        :param X: training samples
        :param y: training labels
        
        :param input_dim: number of input neurons
        :param hidden_dims: number of hidden neurons in an array
        :param output_dim: number of output neurons
        
        :param iterations: how many epochs
        :param learning_rate: initial learning rate
        
        """
        
        # initialize parameters
        self.regularization = regularization
        self.iterations = iterations
        self.learning_rate = learning_rate
        self.reg_lambda = regularization_rate
        
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.samples = X.shape[0]
        
        self.hidden_dims = hidden_dims
        
        # training input and labels
        self.X = X
        self.y = y
        
        # +1 as we need n layers - 1 weights and biases
        self.weights = [[] for i in range(len(self.hidden_dims)+1)]        
        self.biases = [[] for i in range(len(self.hidden_dims)+1)]    
        
        # preactivation layer arrays
        self.z = [[] for i in range(len(self.hidden_dims)+1)] 
        # activation layer arrays
        self.a = [[] for i in range(len(self.hidden_dims)+1)]
        # deltas arrays
        self.deltas = [[] for i in range(len(self.hidden_dims)+1)]
        self.deltaWeights = [[] for i in range(len(self.hidden_dims)+1)]
        self.deltaBiases = [[] for i in range(len(self.hidden_dims)+1)]

        
        # Initialize the parameters to random values. We need to learn these.
        np.random.seed(0)
        
        # the reason we are using the size of hidden_dims + 1 is because we need 
        # weights and biases for the total number of layers - 1
        for i in range(len(self.hidden_dims)+1):
            np.random.seed(np.random.randint(99))
            if i == 0:
                # do stuff for first layer
                self.weights[i] = np.random.randn(self.input_dim, self.hidden_dims[i])# / np.sqrt(self.input_dim)
                self.biases[i] = np.zeros((1, self.hidden_dims[i]))
                
            elif i == len(self.hidden_dims):
                # do stuff for last layer
                self.weights[i] = np.random.randn(self.hidden_dims[i-1], self.output_dim)# / np.sqrt(self.hidden_dims[i-1])
                self.biases[i] = np.zeros((1, self.output_dim))
                
            else:
                # do stuff for every other middle juicy deep layer
                self.weights[i] = np.random.randn(self.hidden_dims[i-1], self.hidden_dims[i])# / np.sqrt(self.hidden_dims[i-1])
                self.biases[i] = np.zeros((1, self.hidden_dims[i]))
            
        
        # This is what we return at the end
        self.model = {}
    
    # print some of the network matrices for observation and debugging.
    def print_network(self, weights = False, biases = False):
        
        if weights == True:
            for i in range(len(self.weights)):
                # print all the weights and their shapes to see if they match up
                print("weights", i, "of shape", self.weights[i].shape)
                print(self.weights[i])
        if biases == True: 
            for i in range(len(self.biases)):
                # print all the weights and their shapes to see if they match up
                print("biases", i, "of shape", self.biases[i].shape)
                print(self.biases[i])
            
            
    # calculate the accuracy of your training data
    # :param predictions: your actual predictions (1d array)
    # :param labels: real labels (1d array)
    def calculate_accuracy(self, predictions, labels):
        
        accuracy = 0
        for i in range(len(predictions)):
            
            if predictions[i] == labels[i]:
                accuracy += 1
                
        return accuracy / len(predictions)
    
    # feed forward function since it is being called a lot
    def feedforward(self, X):
        
        # Forward propagation to calculate our predictions
        probs = []
        
        for i in range(len(self.hidden_dims) + 1):
            if i == 0:
                # do stuff for first layer
                self.z[i] = X.dot(self.weights[i]) + self.biases[i]
                self.a[i] = np.tanh(self.z[i])
                
            elif i == len(self.hidden_dims):
                # do stuff for last layer
                self.z[i] = self.a[i-1].dot(self.weights[i]) + self.biases[i]
                # calculate the probabilities of the output layer
                probs = softmax(self.z[i])
                
            else:
                # do stuff for every other middle juicy deep layer
                self.z[i] = self.a[i-1].dot(self.weights[i]) + self.biases[i]
                self.a[i] = np.tanh(self.z[i])
                
        return probs
    
    # Helpers function to evaluate the total loss on the dataset
    def calculate_loss(self, model):
        
        # feed forward to get our predictions
        probs = self.feedforward(self.X)            
        
        correct_logprobs = -np.log(probs[range(self.samples), np.matrix(self.y).A1.astype(int)])
        data_loss = np.sum(correct_logprobs)
        
        # Add regulatization term to loss (optional)
        if self.regularization == True:
            sum = 0
            for i in range(len(self.weights)):
                # add the sum to a cumulative variable for use later to calculate data loss
                sum += np.sum(np.square(self.weights[i]))

            data_loss += self.reg_lambda/2 * sum
        
        return 1./self.samples*data_loss
    
    # Helper function to predict an output (0 or 1)
    def predict(self, X):
        
        # feed forward to get our predictions
        probs = self.feedforward(X)
        
        return np.argmax(probs, axis=1)
    
    # return the model with the weights and biases
    def return_model(self):
        return { 'weights': self.weights, 'biases': self.biases }
    
    # This function learns parameters for the neural network and returns the model.
    def build_model(self, 
                    num_passes=20000, 
                    batch_size=100, 
                    print_loss=True, 
                    print_loss_iteration=1000,
                    print_delta_shapes=False, 
                    print_deltas=False,
                    regularization=True):
        
        # loop through the batches
        for k in range(math.ceil(self.samples/batch_size)):
            print("batch",k)
            batch_X = self.X[(k*batch_size):((k*batch_size)+batch_size), :]
            batch_y = self.y[(k*batch_size):((k*batch_size)+batch_size), :]
            
            self.learning_rate = self.learning_rate/2
            
#             # reinitialize deltas
#             # preactivation layer arrays
#             self.z = [[None] for i in range(len(self.hidden_dims)+1)] 
#             # activation layer arrays
#             self.a = [[None] for i in range(len(self.hidden_dims)+1)]
#             # deltas arrays
#             self.deltas = [[None] for i in range(len(self.hidden_dims)+1)]
#             self.deltaWeights = [[None] for i in range(len(self.hidden_dims)+1)]
#             self.deltaBiases = [[None] for i in range(len(self.hidden_dims)+1)]
            
            # Gradient descent. For each batch...
            for j in range(0, num_passes):

                # print deltas and shapes if required
#                 print_delta_shapes = True
                if j == 1:
                    if print_delta_shapes == True:
                        print("deltas shape",np.array(self.deltas[1]).shape)
                        print("delta weights shape",np.array(self.deltaWeights).shape)
                        print("delta biases shape",np.matrix(self.deltaBiases).shape)
                    if print_deltas == True:
                        print("deltas",self.deltas)
                        print("delta weights",self.deltaWeights)
                        print("delta biases",self.deltaBiases)

                # feed forward to get our predictions
                probs = self.feedforward(batch_X)

                # BACKPROPAGATION

                # let's go in the opposite direction this time ;)
                for i in reversed(range(len(self.hidden_dims) + 1)):
                    if i == len(self.hidden_dims):
                        # do stuff for first delta (which is actually the last in the array)
                        self.deltas[i] = probs
                        
                        self.deltas[i][range(len(batch_y)), np.matrix(batch_y).A1.astype(int)] -= 1 
                        self.deltaWeights[i] = (self.a[i-1].T).dot(self.deltas[i])
                        self.deltaBiases[i] = np.sum(self.deltas[i], axis=0, keepdims=True)

                    elif i == 0:
                        # do stuff for first layer
                        self.deltas[i] = self.deltas[i+1].dot(self.weights[i+1].T) * (1 - np.power(self.a[i], 2))
                        self.deltaWeights[i] = np.dot(batch_X.T, self.deltas[i])
                        self.deltaBiases[i] = np.sum(self.deltas[i], axis=0, keepdims=True)

    #                 elif i == 0:
    #                     # do nothing
    #                     print("end of backprop loop",i)

                    elif i != 0:
                        # do stuff for every other middle juicy deep layer
                        self.deltas[i] = self.deltas[i+1].dot(self.weights[i+1].T) * (1 - np.power(self.a[i], 2))
                        self.deltaWeights[i] = (self.a[i-1].T).dot(self.deltas[i])
                        self.deltaBiases[i] = np.sum(self.deltas[i], axis=0, keepdims=True)


                if self.regularization == True:
                    for i in range(len(self.deltaWeights)):
                        self.deltaWeights[i] += self.reg_lambda * self.weights[i]

                for i in range(len(self.weights)):
                    self.weights[i] += -self.learning_rate * self.deltaWeights[i]
                    self.biases[i] += -self.learning_rate * self.deltaBiases[i]

                # Assign new parameters to the model
                model = { 'weights': self.weights, 'biases': self.biases}

                # Optionally print the loss.
                # This is expensive because it uses the whole dataset, so we don't want to do it too often.
                if print_loss and j % print_loss_iteration == 0:
                    print ("Loss after iteration %i: %f" %(j, self.calculate_loss(model)))
            
        return model

In [4]:
listy = [[] for i in range(3+2)]
listy

[[], [], [], [], []]

In [5]:
for i in reversed(range(3+2)):
    print(i)

4
3
2
1
0


In [6]:
occupied_data = pd.read_csv("./data/occupied.csv")
formatted_data = np.array(occupied_data)

np.random.shuffle(formatted_data)
y = np.array(formatted_data[:, [-1]])
X = normalize(formatted_data[:, :-1])

test_train_split = 0.7
y_train = y[:int(np.rint(y.shape[0]*test_train_split))]
X_train = X[:int(np.rint(X.shape[0]*test_train_split))]
y_test = y[int(np.rint(y.shape[0]*test_train_split)):]
X_test = X[int(np.rint(X.shape[0]*test_train_split)):]

# show a quick peak of what the data looks like
print(X[:5])

print(np.around(X, decimals=3)[:5])
# X = np.around(X, decimals=3)
print("X train",X_train.shape)
print("y train",y_train.shape)
print("X test",X_test.shape)
print("y test",y_test.shape)

FileNotFoundError: File b'./data/occupied.csv' does not exist

In [None]:
# Instantiate the neural network with our data
nn = MLP_NeuralNetwork(X_train, 
                       y_train, 
                       X_train.shape[1], 
                       [6, 20, 10], 
                       len(np.unique(y)), 
                       learning_rate=0.01,
                       regularization_rate=0.1,
                      regularization=False)

In [None]:
nn.print_network(weights=True)

In [None]:
nn.calculate_loss({})

In [None]:
# Build a model with the given parameters
model = nn.build_model(num_passes=5000, 
                       print_loss=True,
                       print_loss_iteration=1000,
                       batch_size=200, 
                       print_deltas=False, 
                       print_delta_shapes=False)

In [None]:
predictions = nn.predict(X_test)
np.set_printoptions(threshold=np.inf)
print(np.array(predictions))

In [None]:
labels = y_test.astype(int).squeeze()
labels

In [None]:
nn.calculate_accuracy(predictions, labels)