In [None]:
'''
CODE IMPORTS
'''
import sklearn
import numpy as np
import pandas as pd
import math
import random

In [None]:
DEBUG_ON=False
'''
NOTES:
  1. ASSUME HIDDEN LAYERS ALL HAVE SAME NUMBER OF NEURONS
  2. ASSUME INPUT LAYER SIZE == DEGREE OF DATA OBJECT
  3. LABEL IS NOT TAKEN AS A FEATURE
  4. TAKE BETA HYPER PARAM AS 1
  5. ASSUME HIDDEN LAYERS IS >= 1
'''

'''
CODE PERCEPTRON CLASS
'''
class Perceptron:
  def __init__(self):
    self.B = 1
    self.bias = None
    self.out_weights = None
    self.in_weights = None
    self.in_data = None
    self.out_data = None
    self.error = None


  def init_out_weights(self, degree):
    '''
    PARAM: DEGREE = NUM_UNITS_IN_NEXT_LAYER
    '''
    if(degree != 0):
      self.out_weights = np.random.rand(degree)
  
    #output layer perceptrons
    if(degree == 0):
      self.out_weights = None

  def set_in_weights(self, in_weights):
    self.in_weights = in_weights
  
  def set_out_weights(self, out_weights):
    self.out_weights = out_weights

  def set_in_data(self, data):
    self.in_data = data

  def set_out_data(self, data):
    self.out_data = data

  def set_bias(self, bias):
    self.bias = bias

  def set_error(self, error):
    self.error = error

  def sigmoid_activation(self, sum):
    Ij = sum + self.bias
    return 1 / (1 + math.e**(-self.B*Ij))


'''
CODE NEURAL NETWORK CLASS
'''
class NeuralNetwork:
  def __init__(self, labels):
    '''
    PROVIDE CLASS LABELS FOR TRAINING
    '''
    self.labels = set(labels)
    self.learning_rate = 0.8


    print("Creating new NN.")
    
    print("Enter input layer size. This should equal number of features.")
    self.input_layer_size = int(input())

    print("Enter num hidden layers.")
    self.num_hidden_layers = int(input())

    print("Enter hidden layer size.")
    self.hidden_layer_size = int(input())

    print("Enter output layer size. This should equal number of classes.")
    self.output_layer_size = int(input())

    print("Enter num iterations.")
    self.num_iterations = int(input())
    

  def place_empty_perceptrons(self):
    '''
    CREATE SPACE FOR LAYERS
    '''
    self.input_layer = []
    self.hidden_layers = []
    for i in range(self.num_hidden_layers):
      self.hidden_layers.insert(i,[])
    self.output_layer = []


    '''
    PUT PERCEPTRONS IN INPUT LAYER
    '''
    for i in range(self.input_layer_size):
      self.input_layer.insert(i, Perceptron())

    '''
    PUT PERCEPTRONS IN HIDDEN LAYER(S)
    '''
    for i in range(self.num_hidden_layers):
      for j in range(self.hidden_layer_size):
        self.hidden_layers[i].insert(j, Perceptron())
    
    '''
    PUT PERCEPTRONS IN OUTPUT LAYER
    '''
    for i in range(self.output_layer_size):
      self.output_layer.insert(i, Perceptron())
    if(DEBUG_ON):print("function place_empty_perceptrons complete")
    if(DEBUG_ON):self.to_string()

  def init_input_layer_weights_biases_and_data(self, data_object):
    '''
    PARAM: DATA_OBJECT = one N dimensional vector containing features of a data object
    The input layer should have N perceptrons. 
    That way, each perceptron is assigned ONE feature from a data object
    '''
    for i in range(self.input_layer_size):
      self.input_layer[i].set_in_data(data_object[i])
      self.input_layer[i].set_in_weights(None)
      self.input_layer[i].init_out_weights(degree=self.hidden_layer_size)
      self.input_layer[i].set_out_data(data_object[i])
      self.input_layer[i].set_bias(random.uniform(0, 1))
      if(DEBUG_ON):print("function init_input_layer_weights_biases_and_data complete")
      if(DEBUG_ON):self.to_string()

  def init_hidden_layer_weights_and_biases(self):
    '''
    here we will only initialize the weights and bias for the HIDDEN layer perceptrons
    '''
    if(self.num_hidden_layers == 1):
      for i in range(self.hidden_layer_size):
        self.hidden_layers[0][i].init_out_weights(degree=self.output_layer_size)
        self.hidden_layers[0][i].set_bias(random.uniform(0, 1))
    
    if(self.num_hidden_layers > 1):
      next_layer = self.hidden_layers[1]
      for i in range(self.num_hidden_layers):
        for j in range(self.hidden_layer_size):
          self.hidden_layers[i][j].init_out_weights(degree=len(next_layer))
          self.hidden_layers[i][j].set_bias(random.uniform(0, 1))
        if((i == self.hidden_layers - 1) or (self.num_hidden_layers == 2)):
          next_layer = self.output_layer
          continue
        if(self.hidden_layers > 2):
          next_layer = self.hidden_layers[i+1]

    if(self.num_hidden_layers == 1):
      for i in range(self.hidden_layer_size):
        in_weights = []
        for j in range(self.input_layer_size):
          in_weights.insert(j,self.input_layer[j].out_weights[i])
        self.hidden_layers[0][i].set_in_weights(in_weights)

    if(self.num_hidden_layers > 1):
      prev_layer = self.input_layer
      for i in range(self.num_hidden_layers):#for every hidden layer
        in_weights = []
        for j in range(self.hidden_layer_size):#for every neuron in the current hidden layer
          for k in range(len(prev_layer)):#apply out weights of prev layer as in weights for current layer
            in_weights.insert(k, prev_layer[k].out_weights[j])
        self.hidden_layers[i].set_in_weights(in_weights)
        prev_layer = self.hidden_layers[i]
    if(DEBUG_ON):print("function init_hidden_layer_weights_and_biases complete")
    if(DEBUG_ON):self.to_string()
    

  def init_output_layer_weights_and_biases(self):
    for i in range(self.output_layer_size):#for each node in the output layer
      in_weights = []
      for j in range(self.hidden_layer_size):#for each node in the hidden layer
        in_weights.insert(j,self.hidden_layers[self.num_hidden_layers-1][j].out_weights[i])
      self.output_layer[i].set_in_weights(in_weights)
      self.output_layer[i].set_out_weights(None)
      self.output_layer[i].set_bias(random.uniform(0, 1))
    if(DEBUG_ON):print("function init_output_layer_weights_and_biases complete")
    if(DEBUG_ON):self.to_string()

  def compute_weighted_sum(self, node, node_ix, prev_layer):
    sum = 0
    for i in range(len(prev_layer)):
        sum += prev_layer[i].out_data * prev_layer[i].out_weights[node_ix]
    return sum

  def forward_propagate(self, data_object):
    for i in range(self.input_layer_size):
      self.input_layer[i].in_data = data_object[i]

    '''
    PROPAGATE DATA FROM INPUT LAYER INTO 1ST HIDDEN LAYER
    '''
    if(DEBUG_ON):print("BEGIN INPUT LAYER PROPAGATION")
    for i in range(self.hidden_layer_size):
      curr_node = self.hidden_layers[0][i]
      curr_node.set_in_data(self.compute_weighted_sum(curr_node, i, self.input_layer))
      curr_node.set_out_data(curr_node.sigmoid_activation(curr_node.in_data))
    if(DEBUG_ON):print("INPUT LAYER PROPAGATION COMPLETE")
    if(DEBUG_ON):self.to_string()

    '''
    PROPAGATE DATA FROM HIDDEN LAYERS FORWARD
    '''
    if(DEBUG_ON):print("BEGIN HIDDEN LAYER PROPAGATION")
    if(self.num_hidden_layers == 1):
      for i in range(self.output_layer_size):
        for j in range(self.hidden_layer_size):
          curr_node = self.output_layer[i]
          last_hidden_layer = self.hidden_layers[0]
          curr_node.set_in_data(self.compute_weighted_sum(curr_node, i, last_hidden_layer))
          curr_node.set_out_data(curr_node.sigmoid_activation(curr_node.in_data))
      

    if(self.num_hidden_layers > 1):
      for i in range(self.num_hidden_layers):#for each hidden layer
        for j in range(self.hidden_layer_size):#for each neuron in the curr HL
          if(i == self.num_hidden_layers-1):
            break
          if(i == 0):
            curr_node = self.hidden_layers[i][j]
            curr_node.set_out_data(curr_node.sigmoid_activation(curr_node.in_data))
          if(i != 0):
            curr_node = self.hidden_layers[i][j]
            prev_layer = self.hidden_layers[i-1]
            curr_node.set_in_data(self.compute_weighted_sum(curr_node, j, prev_layer))
            curr_node.set_out_data(curr_node.sigmoid_activation(curr_node.in_data))


      '''
      time to propagate info from last hidden layer to output layer...
      ...if there is more than 1 hidden layer
      '''
      for i in range(self.output_layer.size):
          prev_layer = self.hidden_layers[self.num_hidden_layers-1]
          curr_node = self.output_layer[i]
          curr_node.set_in_data(curr_node, prev_layer)
          curr_node.set_out_data(self.sigmoid_activation(curr_node.in_data))
    
    if(DEBUG_ON):print("HIDDEN LAYER PROPAGATION COMPLETE")
    if(DEBUG_ON):self.to_string()

    
    '''
    COMPUTE OUTPUT LAYER PROBABILITY SCORES
    '''
    if(DEBUG_ON):print("BEGIN OUTPUT LAYER PROBABILITY CALCULATION")
    self.class_probabilities = dict()
    j = 0
    for i in self.labels:
      self.class_probabilities[i] = self.output_layer[j].out_data
      j += 1 
    if(DEBUG_ON):print("forward prop done")
    if(DEBUG_ON):self.to_string()
    if(DEBUG_ON):print("\n\tclass probabilities are {} ".format(self.class_probabilities))
    return self.class_probabilities



  def backward_propagate(self):
    '''
    BACK PROPAGATE THE ERRORS FROM OUTPUT LAYER
    '''
    for i in range(self.output_layer_size):
      curr_node = self.output_layer[i]
      curr_out_data = curr_node.out_data
      curr_node.set_error(curr_out_data * (1-curr_out_data) * (int(np.array(list(self.labels))[i]) - curr_out_data))

    '''
    BACK PROPAGATE THE ERRORS IN HIDDEN LAYER
    '''
    if(self.num_hidden_layers == 1):
      for i in range(self.hidden_layer_size):
        curr_node = self.hidden_layers[0][i]
        self.find_cumulative_error(curr_node, i, self.output_layer)

    if(self.num_hidden_layers > 1):
      out_layer = self.output_layer
      for i in range(self.num_hidden_layers, 0, -1):#for each hidden layer
        for j in range(self.hidden_layer_size):#for each node in the hidden layer
          curr_node = self.hidden_layers[i][j]
          self.find_cumulative_error(curr_node, j, out_layer)
        out_layer = self.hidden_layers[i]
    
    if(DEBUG_ON):print("\n\n\tBACK PROP COMPLETE\n")
    if(DEBUG_ON):self.to_string()

    '''
    UPDATE WEIGHTS USING ERRORS: OUTPUT TO HIDDEN LAYER
    '''
    for i in range(self.output_layer_size):
      curr_node = self.output_layer[i]
      for j in range(len(curr_node.in_weights)):
        prev_weight = curr_node.in_weights[j]
        delta_weight = self.learning_rate * curr_node.error * curr_node.out_data
        curr_node.in_weights[j] = prev_weight + delta_weight
    
    if(DEBUG_ON):print("\n\n\tUPDATE WEIGHTS USING ERRORS: OUTPUT TO HIDDEN LAYER COMPLETE\n")
    if(DEBUG_ON):self.to_string()


    '''
    UPDATE OUT WEIGHTS FOR LAST HIDDEN LAYER: STEAL OUTPUT LAYER NEW WEIGHTS
    '''
    for i in range(self.output_layer_size):
      for j in range(self.hidden_layer_size):
        self.hidden_layers[self.num_hidden_layers-1][j].out_weights[i] = self.output_layer[i].in_weights[j]

    if(DEBUG_ON):print("\n\n\tUPDATE OUT WEIGHTS FOR LAST HIDDEN LAYER: STEAL OUTPUT LAYER NEW WEIGHTS COMPLETE\n")
    if(DEBUG_ON):self.to_string()


    '''
    UPDATE IN WEIGHTS FOR HIDDEN LAYER
    '''
    if(self.num_hidden_layers == 1):
      for i in range(self.hidden_layer_size):
        curr_node = self.hidden_layers[0][i]
        for j in range(len(curr_node.in_weights)):
          prev_weight = curr_node.in_weights[j]
          delta_weight = self.learning_rate * curr_node.error * curr_node.out_data
          curr_node.in_weights[j] = prev_weight + delta_weight


    if(self.num_hidden_layers > 1):
      for i in range(self.num_hidden_layers, 0, -1):
        curr_layer = self.hidden_layers[i]
        if(i == 0):
          prev_layer = self.input_layer
        if(i != 0):
          prev_layer = self.hidden_layers[i-1]
        for j in range(self.hidden_layer_size):
          curr_node = curr_layer[j]
          for k in range(len(curr_node.in_weights)):
            prev_weight = curr_node.in_weights[k]
            delta_weight = self.learning_rate * curr_node.error * curr_node.out_data
            curr_node.in_weight = prev_weight + delta_weight
    
    if(DEBUG_ON):print("\n\n\tUPDATE IN WEIGHTS FOR HIDDEN LAYER COMPLETE\n")
    if(DEBUG_ON):self.to_string()


    '''
    UPDATE BIASES FOR EACH NEURON IN OUTPUT LAYER
    '''
    for i in range(self.output_layer_size):
      curr_node = self.output_layer[i]
      prev_bias = curr_node.bias
      delta_bias = self.learning_rate * curr_node.error
      curr_node.bias = prev_bias + delta_bias
    
    '''
    UPDATE BIASES FOR EACH NEURON IN HIDDEN LAYER(S)
    '''
    for i in range(self.num_hidden_layers):
      for j in range(self.hidden_layer_size):
        curr_node = self.hidden_layers[i][j]
        prev_bias = curr_node.bias
        delta_bias = self.learning_rate * curr_node.error
        curr_node.bias = prev_bias + delta_bias
      
    if(DEBUG_ON):print("\n\n\tback prop done")
    if(DEBUG_ON):self.to_string()




  def find_cumulative_error(self, curr_node, cn_ix, out_layer):
    cn_error = curr_node.out_data * (1-curr_node.out_data)
    sum = 0
    for i in range(len(curr_node.out_weights)):
      sum += curr_node.out_weights[i]* out_layer[i].out_data
    curr_node.set_error(cn_error * sum)
    return cn_error * sum


  def fit(self, x_train):
    self.place_empty_perceptrons()
    self.init_input_layer_weights_biases_and_data(x_train[0])
    self.init_hidden_layer_weights_and_biases()
    self.init_output_layer_weights_and_biases()
    for i in range(len(x_train)):
      for j in range(self.num_iterations):
        self.forward_propagate(x_train[i])
        self.backward_propagate()

  def predict(self, x_test):
    y_pred = np.zeros(len(x_test))
    for i in range(len(x_test)):
      y_pred[i] = np.argmax(np.array(list(self.forward_propagate(x_test[i]))))
    return y_pred
  
  def accuracy(self, y_pred, y_test):
    num_correct = 0
    for i in range(len(y_pred)):
      if(y_pred[i] == y_test[i]):
        num_correct += 1
    return num_correct / len(y_pred)

  def to_string(self):
    print("\n\nNN\nInput Layer:\n")
    for i in range(self.input_layer_size):
      print("Node: {}\t In Weights: {}\t In Data: {}\t Out Data: {}\t Out Weights: {}\t Bias: {}\t Error: {}".format(i,self.input_layer[i].in_weights, self.input_layer[i].in_data, self.input_layer[i].out_data, self.input_layer[i].out_weights, self.input_layer[i].bias, self.input_layer[i].error))
    
    print("\n\nHidden Layer(s)\n")
    for i in range(self.num_hidden_layers):
      print("Hidden Layer {}".format(i))
      for j in range(self.hidden_layer_size):
        print("Node: {}\t In Weights: {}\t In Data: {}\t Out Data: {}\t Out Weights: {}\t Bias: {}\t Error: {}".format(j,self.hidden_layers[i][j].in_weights, self.hidden_layers[i][j].in_data, self.hidden_layers[i][j].out_data, self.hidden_layers[i][j].out_weights, self.hidden_layers[i][j].bias, self.hidden_layers[i][j].error))
    
    print("\n\nOutput Layer\n")
    for i in range(self.output_layer_size):
      print("Node: {}\t In Weights: {}\t In Data: {}\t Out Data: {}\t Out Weights: {}\t Bias: {}\t Error: {}".format(i,self.output_layer[i].in_weights, self.output_layer[i].in_data, self.output_layer[i].out_data, self.output_layer[i].out_weights, self.output_layer[i].bias, self.output_layer[i].error))

In [None]:
'''
BRING IN DATA
'''
train_dataset_1 = pd.read_csv('dataset1_training.txt', delim_whitespace=True)
train_dataset_2 = pd.read_csv('dataset2_training.txt', delim_whitespace=True)

test_dataset_1 = pd.read_csv('dataset1_testing.txt', delim_whitespace=True)
test_dataset_2 = pd.read_csv('dataset2_testing.txt', delim_whitespace=True)

Y_train_1 = np.array(train_dataset_1.pop('label')).astype(int)
X_train_1 = np.array(train_dataset_1).astype(float)

Y_train_2 = np.array(train_dataset_2.pop('label')).astype(int)
X_train_2 = np.array(train_dataset_2).astype(float)

Y_test_1 = np.array(test_dataset_1.pop('label')).astype(int)
X_test_1 = np.array(test_dataset_1).astype(float)

Y_test_2 = np.array(test_dataset_2.pop('label')).astype(int)
X_test_2 = np.array(test_dataset_2).astype(float)


In [383]:
'''
CREATE NEW 2 NNs AND SHOW THEIR ACCURACY
'''
ffnn1 = NeuralNetwork(Y_train_1)
ffnn1.fit(X_train_1)
Y_pred_1 = ffnn1.predict(X_test_1)
print("ffnn1 accuracy is {}\n\n".format(ffnn1.accuracy(Y_test_1, Y_pred_1)))

ffnn2 = NeuralNetwork(Y_train_2)
ffnn2.fit(X_train_2)
Y_pred_2 = ffnn1.predict(X_test_2)
print("ffnn2 accuracy is {}\n\n".format(ffnn1.accuracy(Y_test_2, Y_pred_2)))

Creating new NN.
Enter input layer size. This should equal number of features.
2
Enter num hidden layers.
1
Enter hidden layer size.
4
Enter output layer size. This should equal number of classes.
2
Enter num iterations.
10
ffnn1 accuracy is 0.4444444444444444


Creating new NN.
Enter input layer size. This should equal number of features.
2
Enter num hidden layers.
1
Enter hidden layer size.
4
Enter output layer size. This should equal number of classes.
2
Enter num iterations.
10
ffnn2 accuracy is 0.5




In [382]:
'''
CREATE TWO SVMs AND SHOW THEIR ACCURACY
'''
from sklearn import svm
from sklearn.metrics import accuracy_score
clf1 = svm.SVC()
clf1.fit(X_train_1, Y_train_1)
y_pred_1_svm = clf1.predict(X_test_1)
print("svm for dataset 1 accuracy is: {}".format(accuracy_score(Y_test_1, y_pred_1_svm)))

clf2 = svm.SVC()
clf2.fit(X_train_2, Y_train_2)
y_pred_2_svm = clf2.predict(X_test_2)
print("svm for dataset 2 accuracy is: {}".format(accuracy_score(Y_test_2, y_pred_2_svm)))

svm for dataset 1 accuracy is: 1.0
svm for dataset 2 accuracy is: 1.0
