<a href="https://colab.research.google.com/github/MitchMathieu/cisc452-a3/blob/master/CISC452_a3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# CISC 452 - Assignment 3
IMPLEMENT A HYBRID COMPETITIVE LEARNING NETWORK

In [0]:
! git clone https://github.com/MitchMathieu/cisc452-a3.git
! ls cisc452-a3

fatal: destination path 'cisc452-a3' already exists and is not an empty directory.
iris_test.txt  iris_train.txt


In [0]:
import numpy as np
import random

In [0]:
def read_txt(file_path):
  x = []
  y = []
  f = open(file_path, 'r')
  for line in f:
    line = line.rstrip().split(',')
    x.append(np.array(line[:-1], dtype=np.float32))
    y.append(line[-1])
  return np.array(x), np.array(y)

In [0]:
train_x, train_y = read_txt("cisc452-a3/iris_train.txt")
test_x, test_y = read_txt("cisc452-a3/iris_test.txt")
# print(train_x)
# print(train_y)

Encoding

In [0]:
def cat_to_vector(label):
  # one hot encode the flower type
  if label == "Iris-setosa":
    return np.array([1, 0, 0], dtype=np.int)
  elif label == "Iris-versicolor":
    return np.array([0, 1, 0], dtype=np.int)
  elif label == "Iris-virginica":
    return np.array([0, 0, 1], dtype=np.int)
    
def vector_to_cat(vec):
  if np.array_equal(vec, [1, 0, 0]):
    return "Iris-setosa"
  elif np.array_equal(vec, [0, 1, 0]):
    return "Iris-versicolor"
  elif np.array_equal(vec, [0, 0, 1]):
    return "Iris-virginica"
  
def onehot_iris(arr):
  arr_enc = []
  for cat in arr:
    vec = cat_to_vector(cat)
    arr_enc.append(vec)
  return np.array(arr_enc)

In [0]:
train_y_enc = onehot_iris(train_y)
test_y_enc = onehot_iris(test_y)
# print(test_y_enc)

# LVQ1
LVQ algorithm
* init random weights
* assign each output node to a class 
* find output node whose weight vector is closest to input vec
* move weight vec closer if correct, further if incorrect
* repeat until convergence

In [0]:
from math import sqrt
import random

In [0]:
class LVQ:
  def __init__(self, num_inputs, num_outputs):
    self.weights = np.random.rand(num_outputs, num_inputs)
#     self.weights = [self.random_codebook(train_x) for i in range(num_outputs)]
  
#   def random_codebook(self, train):
#     n_records = len(train)
#     n_features = len(train[0])
#     codebook = [train[random.randrange(n_records)][i] for i in range(n_features)]
#     return codebook
    
  def weight_to_class(self, cv):
    arr = np.zeros(3)
    for i, w in enumerate(self.weights):
      if np.array_equal(w, cv):
        arr[i] = 1
    return arr
    
  def euclidian_dist(self, vec1, vec2):
    distance = 0.0
    for i in range(len(vec1)):
      distance += (vec1[i] - vec2[i])**2
    return sqrt(distance)
  
  def find_closest_vec(self, pattern):
    distances = []
    for vec in self.weights:
      dist = self.euclidian_dist(pattern, vec)
      distances.append((vec, dist))
    distances.sort(key=lambda tup: tup[1])
#     print(f"dist: {distances[0][1]}")
    return distances[0][0]
  
  def train(self, train_x, train_y, lrate, epochs):
    for epoch in range(epochs):
      rate = lrate * (1.0 - (epoch/float(epochs)))
      sum_err = 0.0
      for pattern, true_label in zip(train_x, train_y):
        closest_vec = self.find_closest_vec(pattern)
        print(f"pattern:{pattern}")
        print(f"closest_vec:{closest_vec}")
        pred_label = self.weight_to_class(closest_vec)
        print(f"pred_label:{pred_label}, true_label:{true_label}")
        for i in range(pattern.size):
          error = pattern[i] - closest_vec[i]
          sum_err += error**2
          if np.array_equal(pred_label, true_label):
            closest_vec[i] += rate * error
          else:
            closest_vec[i] -= rate * error
        print(f"closest_vec adjusted:{closest_vec}\n")
      print(f">epoch={epoch+1}, lrate={rate:.3f}, error={sum_err:.3f}")
      
  def test(self, test_x, test_y):
    correct = 0.0
    for pattern, label in zip(test_x, test_y):
      pred = self.weight_to_class(self.find_closest_vec(pattern))
      if np.array_equal(pred, label):
        correct += 1
      print(f"actual={label} got={pred}")
    print(f"correct={correct}")
    print(f"final accuracy={(correct/len(test_y)):.4f}")

In [0]:
lvq1 = LVQ(4, 3)
print(f"init weights:\n{lvq1.weights}")

init weights:
[[0.68316658 0.27543469 0.41084977 0.63348662]
 [0.65034377 0.07646891 0.19403644 0.20909199]
 [0.05511963 0.3625815  0.03665456 0.05605129]]


In [0]:
#Shuffles x, y and onehot array entirely! Very quick!
def shuffle(x,y,onehot):
    indices = [i for i in range(len(x))]
    np.random.shuffle(indices)
    return x[indices],y[indices],onehot[indices]

In [0]:
train_x, train_y, train_y_enc = shuffle(train_x,
                                       train_y,train_y_enc)

lvq1.train(train_x, train_y_enc, 0.5, 10)
print(f"trained weights:\n{lvq1.weights}")

pattern:[7.1 3.  5.9 2.1]
closest_vec:[0.68316658 0.27543469 0.41084977 0.63348662]
pred_label:[1. 0. 0.], true_label:[0 0 1]
closest_vec adjusted:[-2.52525008 -1.08684797 -2.33372539 -0.09977002]

pattern:[5.4 3.  4.5 1.5]
closest_vec:[0.65034377 0.07646891 0.19403644 0.20909199]
pred_label:[0. 1. 0.], true_label:[0 1 0]
closest_vec adjusted:[3.02517193 1.53823446 2.34701822 0.854546  ]

pattern:[5.7 3.  4.2 1.2]
closest_vec:[3.02517193 1.53823446 2.34701822 0.854546  ]
pred_label:[0. 1. 0.], true_label:[0 1 0]
closest_vec adjusted:[4.36258587 2.26911723 3.27350901 1.02727302]

pattern:[6.9 3.1 5.1 2.3]
closest_vec:[4.36258587 2.26911723 3.27350901 1.02727302]
pred_label:[0. 1. 0.], true_label:[0 0 1]
closest_vec adjusted:[3.09387876 1.85367589 2.36026357 0.39090956]

pattern:[5.5 2.4 3.8 1.1]
closest_vec:[3.09387876 1.85367589 2.36026357 0.39090956]
pred_label:[0. 1. 0.], true_label:[0 1 0]
closest_vec adjusted:[4.29693938 2.12683799 3.08013176 0.74545479]

pattern:[6.4 2.7 5.3 1.9]


In [0]:
lvq1.test(test_x, test_y_enc)

actual=[1 0 0] got=[0. 1. 0.]
actual=[1 0 0] got=[0. 1. 0.]
actual=[1 0 0] got=[0. 1. 0.]
actual=[1 0 0] got=[0. 1. 0.]
actual=[1 0 0] got=[0. 1. 0.]
actual=[1 0 0] got=[0. 1. 0.]
actual=[1 0 0] got=[0. 1. 0.]
actual=[1 0 0] got=[0. 1. 0.]
actual=[1 0 0] got=[0. 1. 0.]
actual=[1 0 0] got=[0. 1. 0.]
actual=[0 1 0] got=[0. 1. 0.]
actual=[0 1 0] got=[0. 1. 0.]
actual=[0 1 0] got=[0. 1. 0.]
actual=[0 1 0] got=[0. 1. 0.]
actual=[0 1 0] got=[0. 1. 0.]
actual=[0 1 0] got=[0. 1. 0.]
actual=[0 1 0] got=[0. 1. 0.]
actual=[0 1 0] got=[0. 1. 0.]
actual=[0 1 0] got=[0. 1. 0.]
actual=[0 1 0] got=[0. 1. 0.]
actual=[0 0 1] got=[0. 1. 0.]
actual=[0 0 1] got=[0. 1. 0.]
actual=[0 0 1] got=[0. 1. 0.]
actual=[0 0 1] got=[0. 1. 0.]
actual=[0 0 1] got=[0. 1. 0.]
actual=[0 0 1] got=[0. 1. 0.]
actual=[0 0 1] got=[0. 1. 0.]
actual=[0 0 1] got=[0. 1. 0.]
actual=[0 0 1] got=[0. 1. 0.]
actual=[0 0 1] got=[0. 1. 0.]
correct=10.0
final accuracy=0.3333
