In [None]:
import pandas as pd
import numpy as np
import math
from google.colab import files
import io
np.random.seed(42)
rstate = np.random.RandomState(42)
#upload data
uploaded = files.upload()
data = pd.read_csv(io.BytesIO(uploaded['wine.data']), header=None)

#normalize data
for column in range(1, 14):
  data[column] = (data[column] - data[column].min()) / (data[column].max() - data[column].min())



#randomly divide date into subsets
cross = [np.empty((0, 14)), np.empty((0, 14)), np.empty((0, 14)), np.empty((0, 14)), np.empty((0, 14))]
rando = [0, 1, 2, 3, 4]

for row in range(data.shape[0]):
  sample = np.array([data.iloc[row]])
  if len(rando) == 0:
    rando = [0, 1, 2, 3, 4]
  
  index = np.random.randint(0, len(rando))
  
  r = rando[index]
  del rando[index]
  cross[r] = np.append(cross[r], sample, axis = 0)

#initialize variables

rate = 0.4
beta = 1.0
M = 3
bias = 1.0

totalEpochs = 0
avgAccuracy = 0
avgError = 0
#cross validation
for x in range(5):
  #initialize weights
  print()
  print("Cross Validation Set "+ str(x+1))
  print()
  inputWeights = np.random.uniform(-1/math.sqrt(14), 1/math.sqrt(14), (13, M))
  hiddenWeights = np.random.uniform(-1/math.sqrt(M+1), 1/math.sqrt(M+1), (M, 3))
  inputBiasWeights = np.random.uniform(-1/math.sqrt(14), 1/math.sqrt(14), (1, M))
  hiddenBiasWeights = np.random.uniform(-1/math.sqrt(M+1), 1/math.sqrt(M+1), (1, 3))
  

  #train until error on validation set stops decreasing or accuracy of 1.0 is reached
  
  train = True
  currentError = 100
  epoch = 1
  accuracy = 0
  while train and accuracy != 1.0:
    
    #loop through every training subset
    for y in range(5):
      if y != x:
        
        #loop through every sample in current training subset
        for s in range(cross[y].shape[0]):
          #forward propogation
          
          sample = (cross[y])[s][1:14]

          #calculate values of hidden layer
          
          hidden = np.dot(sample,inputWeights)


          for i in range(M):
            hidden[i] += inputBiasWeights[0][i] * bias
          
          
          for m in range(M):
            hidden[m] = 1 / (1 + math.exp(-1 * beta * hidden[m]))
          
          
          #calculate values of output layer
          output = np.dot(hidden, hiddenWeights)
          
          for i in range(3):
            output[i] += hiddenBiasWeights[0][i] * bias
          
          exp = np.array([math.exp(output[0]), math.exp(output[1]), math.exp(output[2])])
          sum = np.sum(exp)
          for i in range(3):
            output[i] = exp[i] / sum
          
          

          #back propogation
          desired = np.array([0, 0, 0])
          desired[int((cross[y])[s][0])-1] = 1
          
          

          #calculate output error
          outError = np.empty([3])
          for i in range(3):
            outError[i] = (output[i] - desired[i]) * output[i] * (1 - output[i])
          

          #update hidden bias weights
          for i in range(3):
            hiddenBiasWeights[0][i] -= rate * outError[i] * bias

          #update hidden weights and calculate hidden error
          hiddenError = np.empty([M])
          
          for j in range(M):
            hiddenError[j] = 0
            for k in range(3):
              hiddenWeights[j][k] = hiddenWeights[j][k] - rate * outError[k] * hidden[j]
              hiddenError[j] = hiddenError[j] + beta * (1 - hidden[j]) * hidden[j] * outError[k] * hiddenWeights[j][k]
          
        
          #update input weights
          for i in range(13):
            for j in range(M):
              inputWeights[i][j] = inputWeights[i][j] - rate * hiddenError[j] * sample[i]
          
          for i in range(M):
            inputBiasWeights[0][i] -= rate * hiddenError[i] * bias
          
        
    
    #calculate accuracy and error on current testing subset
    samples = cross[x].shape[0]
    correctPredictions = 0
    sse = 0

    #loop through every sample in testing subset
    for s in range(samples):
      sample = (cross[x])[s][1:14]

      #calculate values of hidden layer
      
      hidden = np.dot(sample,inputWeights)


      for i in range(M):
        hidden[i] += inputBiasWeights[0][i] * bias
      
      
      for m in range(M):
        hidden[m] = 1 / (1 + math.exp(-1 * beta * hidden[m]))
      
      
      #calculate values of output layer
      output = np.dot(hidden, hiddenWeights)
      
      for i in range(3):
        output[i] += hiddenBiasWeights[0][i] * bias
      
      exp = np.array([math.exp(output[0]), math.exp(output[1]), math.exp(output[2])])
      sum = np.sum(exp)
      max = 0
      for i in range(3):
        output[i] = exp[i] / sum
        if output[i] > output[max]:
          max = i
      
      #tally correct predictions and update error
      target = int((cross[x])[s][0])
     
      
      error = 0
      for i in range(3):
        if i+1 == target:
          error += math.pow(output[i] - 1, 2)
        else:
          error += math.pow(output[i] - 0, 2) 

      sse += .5 * error
      
      if max+1 == target:
        correctPredictions+= 1
    #print(currentAccuracy)
    sse = sse / samples
    if sse > currentError:
      train = False
    else:
      currentError = sse
    
    accuracy = correctPredictions / samples
    
    epoch+=1
  print("Epochs: " + str(epoch))
  print("Accuracy: " + str(accuracy))
  print("Error: " + str(currentError))
  avgAccuracy += accuracy
  avgError += currentError
  totalEpochs += epoch

print("Avg Accuracy: " + str(avgAccuracy / 5.0))
print("Avg Error: " + str(avgError / 5.0))
print("Epochs: "+ str(totalEpochs))


    



Saving wine.data to wine.data

Cross Validation Set 1

Epochs: 15
Accuracy: 1.0
Error: 0.04680045697078627

Cross Validation Set 2

Epochs: 122
Accuracy: 0.9722222222222222
Error: 0.02383224356571257

Cross Validation Set 3

Epochs: 31
Accuracy: 1.0
Error: 0.0292117670892849

Cross Validation Set 4

Epochs: 16
Accuracy: 1.0
Error: 0.050294907753511475

Cross Validation Set 5

Epochs: 15
Accuracy: 1.0
Error: 0.03530928924669627
Avg Accuracy: 0.9944444444444445
Avg Error: 0.0370897329251983
Epochs: 199


| Hidden Neurons | Learning Rate | Average Accuracy | Average Error | Total Epochs |
| --- | --- | --- | --- | --- |
| 2 | .1 | .994 | .066 | 516 |
| 2 | .25 | .994 | .040 | 262 |
| 2 | .4 | .846 | .092 | 323 |
| 2 | .65 | .840 | .100 | 202 |
| 3 | .1 | .994 | .061 | 576 |
| 3 | .25 | .994 | .043 | 290 |
| 3 | .4 | .994 | .037 | 199 |
| 3 | .65 | .701 | .179 | 114 |
| 4 | .1 | .989 | .091 | 441 |
| 4 | .25 | .989 | .035 | 228 |
| 4 | .4 | .696 | .163 | 126 |
| 4 | .65 | .700 | .191 | 116 |
| 5 | .1 | .994 | .056 | 1229 |
| 5 | .25 | .994 | .036 | 599 |
| 5 | .4 | .563 | .237 | 65 |
| 5 | .65 | .563 | .283 | 63 |





Data is randomly divided into 5 subsets. After a sample has been placed in a subset, no more samples will be placed into that subset until every other subset has had a sample added to it. Since the data is already sorted by the class of the sample, this guarantees that each subset will have an equal amount of samples and an equal distribution of classes within those samples. The model has one hidden layer using sigmoid activation function and an output layer with three neurons using softmax activation function. Weights are initialized to random uniform values in the range (-1/sqrt(n), 1/sqrt(n)), with n being the number of inputs to those weights. This ensures that each neuron in the next layer has a value around 1 when those weights are propagated forward. Each iteration of the 5-fold cross validation ends when an accuracy of 1.0 is reached on the testing set or when the validation error begins increasing. 

The model performed very well using cross validation. It converged quickly to an accuracy of 1.0 on most randomized testing sets and reached a high accuracy on the others. 3 hidden neurons generally performed the best, leading to higher accuracies in fewer epochs. Higher numbers of hidden neurons could reach similar accuracies but required lower learning rates and more iterations. Learning rates of .25 generally lead to the best balance of error and number of iterations. Performance of the model steeply declined as the learning rate was increased to .65. The model performed significantly better than the expected .333 accuracy of the random baseline model, with the highest average accuracies reaching .994 on the testing sets. 