In [63]:
from matplotlib import image
import math
import matplotlib.pyplot as plt
import requests
import random
import numpy as np
import numpy.linalg as lin
import scipy.special as ss

# Convert binary text into a list
def convert_to_list(raw_img):
  converted_list = []
  temp_row = []
  for i in raw_img:
    if(i != '\n'):
      if(i == '0'):
        temp_row.append(0)
      else:
        temp_row.append(1)
    else:
      converted_list.append(temp_row)
      temp_row = []

  return converted_list

###############################################
# Displaying listed data as images
###############################################
def display_img(img_data):
  plt.imshow(img_data, cmap='Greys',  interpolation='nearest')
  plt.axis('off')
  return

###############################################
# Adding noise to the system
###############################################
def add_noise(data,noise_prob):

  # Initialize noisy data
  noisy_data = []

  # Sanity checker
  if(noise_prob < 0 or noise_prob > 1):
    print("Error! Noise probability isn't correct")
    return

  # Get total length per row
  col_length = len(data[0])

  # Generate fixed length
  shuffle_list = [x for x in range(col_length)]
  cutoff_idx = round(col_length * noise_prob)

  # Iterate per row
  for row in data:

    # Do random indexing
    random.shuffle(shuffle_list)
    temp_row = []

    # Start flipping bits
    for i in range(col_length):
      if(shuffle_list[i] < cutoff_idx):
        if(row[i] == 0):
          temp_row.append(1)
        else:
          temp_row.append(0)
      else:
        temp_row.append(row[i])

    noisy_data.append(temp_row)

  return noisy_data

###############################################
# This just displays a clean set of letters
###############################################
def show_set(clean_letters):

  fig, axs = plt.subplots(6, 5, figsize=(20, 20))

  counter = 0
  for i in range(5):
    for j in range(5):
      axs[i,j].imshow(1-np.reshape(clean_letters[i*5+j],(7,5)), cmap='Greys',  interpolation='nearest')

  axs[5,0].axis('off')
  axs[5,1].axis('off')

  axs[5,2].imshow(1-np.reshape(clean_letters[25],(7,5)), cmap='Greys',  interpolation='nearest')

  axs[5,3].axis('off')
  axs[5,4].axis('off')

  plt.show()

###############################################
# Displays a single letter
###############################################
def show_letter(letter):
  plt.imshow(1-np.reshape(letter,(7,5)), cmap='Greys',  interpolation='nearest')

###############################################
# Magnitude counter
###############################################
def get_mag(A):
  return np.sum(A)

###############################################
# Importing data
###############################################
# This data set contains all the letter from A to Z
# Each row is a vectorized version of the letter
# Each letter image has 7x5 pixel dimensions
# The data set is arranged such that A is the first row and Z is the last
# We made them into arrays too for simplicity

# clean_letters = convert_to_list(list(requests.get('https://raw.githubusercontent.com/rgantonio/CoE161---FileDump/main/letters.txt').text))
with open("char_data.txt") as data_file:
  clean_letters = convert_to_list(data_file.read())
clean_letters = np.array(clean_letters)


In [64]:
###############################################
# Set parameters
###############################################
D = 512
M = 10    # Number of ones
print("Density: " + str(M/D))

Density: 0.01953125


In [65]:
# Generate random HV, with 10% ones
def u_gen_rand_hv(D):

    # Sanity checker
    if (D % 2):
        print("Error - D can't be an odd number")
        return 0

    hv = np.zeros(D, dtype = int)
    indices = np.random.permutation(D)

    hv[indices >= M] = 0
    hv[indices < M] = 1

    return hv

In [66]:
###############################################
# Overlap calculation
###############################################
def overlap(A,B,D):
    and_out = np.logical_and(A,B)
    hv = "".join(str(int(x)) for x in and_out)
    f = open("char_after_and.txt", "a")
    f.write(hv + "\n" + str(np.sum(and_out)) + "\n")
    f.close()
    return np.sum(and_out)/D

In [67]:
def perm(A,N):
  return np.roll(A,N)

In [68]:
def superimpose(block):
  # Apply OR to all HV's in block
  return np.array([1 if x >= 1 else 0 for x in sum(block)])

In [69]:
def countones(Z):
  count = 0
  for i in Z:
    count += i
    
  return count

In [70]:
def HGN(block):
  # implementation as in the paper (additive CDT)
  K = 1
  thinned_list = []
  Z = superimpose(block)
  # print(countones(Z)/D)
  for k in range(1,K+1):
    thinned_list.append(np.logical_and(Z, perm(Z,k)))
  res = superimpose(thinned_list)
  # print(countones(res)/D)
  return res

# **Encoding**

In [71]:
def create_item_mem(N,D):
  item_mem = dict()
  for n in range(N):
    item_mem[n] = u_gen_rand_hv(D)
  return item_mem

In [72]:
def create_im(D):
  # Initialize item memory
  letter_im = create_item_mem(35,D) # 7x5=35 pixels per letter -> 35 HV's necessary

  # Output IM in "im_man.txt"
  im_hw = ""
  for pixel in letter_im.keys():
    hv = "".join(str(x) for x in letter_im[pixel])
    hv = str(D) + "'b" + hv
    im_hw = im_hw + "6'd" + str(pixel) + ": " + "im_man_out = " + hv + ";\n"

  # f = open("char_im_man.txt", "w")
  # f.write(im_hw)
  # f.close()
  # print("Item memory written in: char_im_man.txt")
  return letter_im

In [73]:
# HDC encoder
def hdc_encode(letter, letter_im, D):
  letter_block = []
  for i in range(len(letter)):
    if(letter[i] == 1):
      hv = perm(letter_im[i],1)
    else:
      hv = letter_im[i]

    letter_block.append(hv)

    # print(countones(superimpose(letter_block)))
  return HGN(letter_block)

In [74]:
# Create associative memory
def create_am(D, letter_im):
  keys = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']

  letter_am = dict()

  # open('char_after_perm.txt', 'w').close()
  or_output = ""

  for i in range(len(keys)):
    letter = clean_letters[i]

    letter_block = []
    for j in range(len(letter)):
      if(letter[j] == 1):
        hv = perm(letter_im[j],1)
      else:
        hv = letter_im[j]

      letter_block.append(hv)


    # Output permuted item vectors for each letter
    perm_output = ""
    for pixel in range(len(letter_block)):
      hv = "".join(str(x) for x in letter_block[pixel])
      # hv = str(D) + "'b" + hv
      perm_output = perm_output + hv + "\n"

    # f = open("char_after_perm.txt", "a")
    # f.write(str(i) + "\n" + perm_output)
    # f.close()


    # Output superimposed letter HV's
    summed = superimpose(letter_block)
    hv2 = "".join(str(x) for x in summed)
    # hv2 = str(D) + "'b" + hv2
    or_output = or_output + hv2 + "\n"

    letter_am[keys[i]] = hdc_encode(clean_letters[i],letter_im,D)

  # f = open("char_after_or.txt", "w")
  # f.write(or_output)
  # f.close()


  am_hw = ""
  cdt_output = ""
  # Output IM in "im_man.txt"
  for letter in range(len(keys)):
    hv = "".join(str(x) for x in letter_am[keys[letter]])
    # hv2 = str(D) + "'b" + hv
    # am_hw = am_hw + "6'd" + str(letter) + ": " + "am_man_out = " + hv2 + "\n"
    am_hw = am_hw + hv
    if (letter != len(keys)-1):
      am_hw += "\n"
    cdt_output = cdt_output + hv + "\n"

  f = open("char_am.txt", "w")
  f.write(am_hw)
  f.close()
  print("Associative memory written in: char_am.txt")

  # f = open("char_after_cdt.txt", "w")
  # f.write(cdt_output)
  # f.close()
  return letter_am


In [75]:
def import_im(D):
  letter_im = dict()
  f = open("char_im_man.txt","r")
  for n in range(35):
    line = f.readline()
    line = list(line[-(D+2):-2])
    line = [int(x) for x in line]
    letter_im[n] = np.array(line)
  
  f.close()
  return letter_im

In [76]:
def import_am(D, keys):
  letter_am = dict()
  f = open("char_am.txt","r")
  for n in range(len(keys)):
    line = f.readline()
    # line = list(line[-(D+2):-2])
    line = list(line[0:1024])
    # print(len(line))
    line = [int(x) for x in line]
    letter_am[keys[n]] = np.array(line)
  
  f.close()
  return letter_am

In [77]:
def export_chars(keys):
  open("char_data_out.txt","w").close()
  text = ""
  f = open("char_data_out.txt","a")
  for k in range(len(keys)):
    text += str(bin(k)[2:].zfill(5))
    if (k != (len(keys)-1)):
      text += "\n"
  f.write(text)
    
  f.close()
  return

In [78]:
letter_im = create_im(D)
# print(letter_im2)
keys = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
# letter_im = import_im(D)
# letter_am = import_am(D, keys)
# print(letter_am['a'])
letter_am = create_am(D, letter_im)
# print(letter_am2)
export_chars(keys)

Associative memory written in: char_am.txt


In [79]:
def similarity_search(letter,letter_im,letter_am,D):
  sim_score = 0
  sim_letter = '0'
  test_hv = hdc_encode(letter,letter_im,D)
  for i in letter_am:
    temp_score = overlap(test_hv, letter_am[i],D)
    if temp_score > sim_score:
      sim_score, sim_letter = temp_score, i
  return sim_letter, sim_score

# Testing

In [80]:
def test_model(test_data,correct_values,letter_im,letter_am,D,print_flag):

  # Simply iterate through all elements in the clean_letters set
  score = 0
  test_len = len(test_data)

  keys = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
  score_dict = dict()
  for k in keys:
    score_dict[k] = 0


  open('char_after_and.txt', 'w').close()

  for i in range(test_len):

    sim_letter, sim_score = similarity_search(test_data[i],letter_im,letter_am,D)

    if sim_letter == correct_values[i]:
      score += 1
      score_dict[correct_values[i]] += 1;
      if(print_flag):
        print("CORRECT prediction! sim_letter: " + sim_letter + " sim_score: " + str(sim_score))
    else:
      if(print_flag):
        print("WRONG prediction! sim_letter: " + sim_letter + " sim_score: " + str(sim_score))

  print("Final accuracy is: %f" % (score/test_len*100))

  for k in keys:
    score_dict[k] = score_dict[k]/(len(test_data)/26)
  print(score_dict)


In [81]:
test_model(clean_letters,keys,letter_im,letter_am,D,True)

CORRECT prediction! sim_letter: a sim_score: 0.248046875
CORRECT prediction! sim_letter: b sim_score: 0.234375
CORRECT prediction! sim_letter: c sim_score: 0.25
CORRECT prediction! sim_letter: d sim_score: 0.25
CORRECT prediction! sim_letter: e sim_score: 0.240234375
CORRECT prediction! sim_letter: f sim_score: 0.216796875
CORRECT prediction! sim_letter: g sim_score: 0.232421875
CORRECT prediction! sim_letter: h sim_score: 0.248046875
CORRECT prediction! sim_letter: i sim_score: 0.25390625
CORRECT prediction! sim_letter: j sim_score: 0.234375
CORRECT prediction! sim_letter: k sim_score: 0.234375
CORRECT prediction! sim_letter: l sim_score: 0.236328125
CORRECT prediction! sim_letter: m sim_score: 0.265625
CORRECT prediction! sim_letter: n sim_score: 0.2734375
CORRECT prediction! sim_letter: o sim_score: 0.244140625
CORRECT prediction! sim_letter: p sim_score: 0.216796875
CORRECT prediction! sim_letter: q sim_score: 0.24609375
CORRECT prediction! sim_letter: r sim_score: 0.208984375
CORR

# Distorted testing

In [82]:
def distort_img(image,N):

  # Since we know we have 35 pixels only
  # We can do random indexing for this part
  rand_idx = [x for x in range(35)]
  random.shuffle(rand_idx)

  # Initialize some empty image to avoid referencing issues
  ret_img = np.zeros(35)

  for i in range(35):

    # if the random idx is in the distortion list
    # let's flip bits
    if(rand_idx[i] < N):
      if(image[i] == 0):
        ret_img[i] = 1
      else:
        ret_img[i] = 0
    else:
      ret_img[i] = image[i]

  return ret_img

Keep the M_repetitions to a maximum of 100. It takes long to simulate the data. 50 repetitions is just right.

In [83]:
###############################################
# Variations and testing
###############################################

def create_test_set(clean_letters, M, N):

  labels = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']

  # Initialize empty lists
  test_data = []
  test_answers = []

  for i in range(len(clean_letters)):
    for j in range(M):
      test_data.append(distort_img(clean_letters[i],N))
      test_answers.append(labels[i])

  return test_data, test_answers

In [84]:
###############################################
# Testing out distortions
###############################################
test_distortions = True

# Tunable parameters
N_distortions = 2
M_repetitions = 100
display_log   = False

if (test_distortions):
  # First create the test data
  test_data, test_answers = create_test_set(clean_letters, M_repetitions ,N_distortions)

  # Test using our model earlier
  test_model(test_data,test_answers,letter_im,letter_am,D,display_log)

Final accuracy is: 96.769231
{'a': 1.0, 'b': 0.99, 'c': 0.99, 'd': 0.96, 'e': 1.0, 'f': 0.94, 'g': 0.82, 'h': 0.87, 'i': 1.0, 'j': 0.98, 'k': 1.0, 'l': 1.0, 'm': 0.86, 'n': 0.96, 'o': 0.89, 'p': 0.98, 'q': 1.0, 'r': 0.96, 's': 1.0, 't': 1.0, 'u': 1.0, 'v': 1.0, 'w': 0.96, 'x': 1.0, 'y': 1.0, 'z': 1.0}
