In [228]:
import numpy as np
import matplotlib.pyplot as plt

# uncomment and run to upload winde.data to google colab if it is not included in the .ipynb file
# from google.colab import files
# uploaded = files.upload()


def main():
  np.random.seed(55)
  #load data
  data = np.loadtxt('wine.data', delimiter=',')
  names = ['Alcohol', 'Malic Acid', 'Ash', 'Alvalinity of ash', 'Magnesium', 'Total phenols', 'Nonfalvinoid phenols', '{Proanthocyanins', 'Color intensity', 'Hue', 'OD280/OD315 of diluted wines', 'Proline']

  #shuffle data
  r = list(range(data.shape[0]))
  np.random.shuffle(r)
  data = data[r,:]

  inputs = data[:,1:]

  targets = np.zeros((data[:,0].shape[0], 3))
  for i, item in enumerate(data[:,0].astype(int) - 1):
    targets[i] = vectorize(3, item)

  #splits data into 4 parts training and 1 part testing
  cut_point = int(data.shape[0] / 5)
  training_inputs = inputs[cut_point:,]
  training_targets = targets[cut_point:,]

  testing_inputs = inputs[0:cut_point,]
  testing_targets = targets[0:cut_point,]

  #MLP
  net = mlp(training_inputs, training_targets, testing_inputs, testing_targets, 10, momentum=0.25)
  net.train(0.0001, 100)

  testing_inputs = np.concatenate((testing_inputs,-np.ones((testing_inputs.shape[0],1))),axis=1)
  print('Predictions:\t', net.predict(testing_inputs))
  print('Actual Classes:\t', np.argmax(testing_targets, axis=1))


def vectorize(n, i):
  v = np.zeros(n)
  v[i] = 1

  return v




class mlp:


  def __init__(self, training_inputs, training_targets, testing_inputs, testing_targets, hidden_nodes, beta=1, momentum=0.0):
    self.training_inputs = np.concatenate((training_inputs,-np.ones((training_inputs.shape[0],1))),axis=1)
    self.training_targets = training_targets
    self.testing_inputs = np.concatenate((testing_inputs,-np.ones((testing_inputs.shape[0],1))),axis=1)
    self.testing_targets = testing_targets

    self.in_count = training_inputs.shape[1]
    self.out_count = training_targets.shape[1]
    self.training_data_count = training_inputs.shape[0]
    self.testing_data_count = testing_inputs.shape[0]

    self.hidden_count = hidden_nodes
    
    self.beta = beta
    self.momentum = momentum

    #weights from input to hidden layer
    self.weights_IH = (np.random.rand(self.in_count + 1,self.hidden_count) - 0.5) * 2 / np.sqrt(self.in_count)

    #weights from hidden layer to output 
    self.weights_HO = (np.random.rand(self.hidden_count + 1,self.out_count) - 0.5) * 2 / np.sqrt(self.hidden_count)


  def train(self, eta, iterations):
    weights_IH_delta = np.zeros((np.shape(self.weights_IH)))
    weights_HO_delta = np.zeros((np.shape(self.weights_HO)))

    for epoch in range(iterations):
      print('Epoch #', epoch)

      #shuffle data
      self.shuffle_data()

      #Learning algorithm
      for i in range(self.training_inputs.shape[0]):
        row = np.reshape((self.training_inputs[i,]), (1, self.in_count + 1))
        outputs = self.forwardpass(row)

        #calculate sigmas
        sigma_out = (outputs - self.training_targets[i,]) * outputs * (1 - outputs)
        sigma_hidden = self.beta * self.hidden * (1 - self.hidden) * np.dot(sigma_out, self.weights_HO.transpose())
        #remove the biases
        sigma_hidden = sigma_hidden[:,:-1]

        #Calcute weight deltas
        weights_HO_delta = eta * (np.dot(self.hidden.transpose(), sigma_out)) + (self.momentum * weights_HO_delta)
        weights_IH_delta = eta * (np.dot(row.transpose(), sigma_hidden)) + (self.momentum * weights_IH_delta)

        #update weights
        self.weights_HO -= weights_HO_delta
        self.weights_IH -= weights_IH_delta
      #Accuracy Calculation
      predictions = self.predict(self.testing_inputs)
      actual = np.argmax(self.testing_targets, axis=1)
      accuracy = sum(predictions == actual) / len(predictions) * 100
      print(f'\tAccuracy: {accuracy:0.2f}%')
      

  def forwardpass(self, inputs):
    self.hidden = np.dot(inputs,self.weights_IH)
    self.hidden = self.sigmoid(-self.beta * self.hidden)
    self.hidden = np.concatenate((self.hidden,-np.ones((inputs.shape[0],1))),axis=1)

    outputs = np.dot(self.hidden,self.weights_HO)

    return self.softmax(outputs)


  def predict(self, input):
    return np.argmax(self.forwardpass(input), axis=1)


  def shuffle_data(self):
    training_shuffle = list(range(self.training_data_count))
    testing_shuffle = list(range(self.testing_data_count))

    np.random.shuffle(training_shuffle)
    self.training_inputs = self.training_inputs[training_shuffle,:]
    self.training_targets = self.training_targets[training_shuffle,:]
      
    np.random.shuffle(testing_shuffle)
    self.testing_inputs = self.testing_inputs[testing_shuffle,:]
    self.testing_targets = self.testing_targets[testing_shuffle,:]


  def sigmoid(self, x):
    return 1.0 / (1.0 + np.exp(-x))


  def softmax(self, v):
    for row in range(v.shape[0]):
      a = np.exp(v[row])
      v[row] = a / np.sum(a)
     
    return v

  def print_data(self):
    print('Input Nodes:', self.in_count, '\nHidden Nodes:', self.hidden_count, '\nOutput Nodes:', self.out_count, '\n# Data points:', self.training_data_count)
    print('Weights IH shape:', self.weights_IH.shape, '\nWeights HO shape: ', self.weights_HO.shape)




if __name__ == '__main__':
  main()

Epoch # 0
	Accuracy: 34.29%
Epoch # 1
	Accuracy: 34.29%
Epoch # 2
	Accuracy: 34.29%
Epoch # 3
	Accuracy: 34.29%
Epoch # 4
	Accuracy: 34.29%
Epoch # 5
	Accuracy: 34.29%
Epoch # 6
	Accuracy: 34.29%
Epoch # 7
	Accuracy: 34.29%
Epoch # 8
	Accuracy: 34.29%
Epoch # 9
	Accuracy: 34.29%
Epoch # 10
	Accuracy: 34.29%
Epoch # 11
	Accuracy: 34.29%
Epoch # 12
	Accuracy: 34.29%
Epoch # 13
	Accuracy: 34.29%
Epoch # 14
	Accuracy: 34.29%
Epoch # 15
	Accuracy: 34.29%
Epoch # 16
	Accuracy: 34.29%
Epoch # 17
	Accuracy: 34.29%
Epoch # 18
	Accuracy: 34.29%
Epoch # 19
	Accuracy: 34.29%
Epoch # 20
	Accuracy: 34.29%
Epoch # 21
	Accuracy: 34.29%
Epoch # 22
	Accuracy: 34.29%
Epoch # 23
	Accuracy: 34.29%
Epoch # 24
	Accuracy: 34.29%
Epoch # 25
	Accuracy: 34.29%
Epoch # 26
	Accuracy: 34.29%
Epoch # 27
	Accuracy: 34.29%
Epoch # 28
	Accuracy: 34.29%
Epoch # 29
	Accuracy: 34.29%
Epoch # 30
	Accuracy: 34.29%
Epoch # 31
	Accuracy: 34.29%
Epoch # 32
	Accuracy: 34.29%
Epoch # 33
	Accuracy: 34.29%
Epoch # 34
	Accuracy: 34

Report: It doesn't work. The model begins by predicting random classes (expected) but converges to predicting every input as one class. Changing beta, momentum, or hidden nodes, or number of epochs doesn't keep it from predicting just one class for everything. Right now, I dont know where the error(s) I've made are. As I understand it (which could be wrong) the backprop algorithm is working as it should which means I'm giving it bad data but I don't think I am.