# Neural Networks: List 2

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import hopfield

# Problem 3

Find the maximum number $p$ of patterns which a network can store and find the ratio $p/N$. 

The first task is ill-defined. As we can see below the number of random-generated remembered patterns grows with the size of training set. For the $N=5$ network there are $2^5=32$ different possible patterns and the network is even possible to remember up to $26$ of them while feeded with full $32$ pattern set.

In [2]:
def old_p_to_N_ratio(p_max, N, iterations):
  ps = []
  for p in [i for i in range(1, p_max+1)]:
    for i in range(iterations):
      network = hopfield.Hopfield(N, print_format=(N,1), synchronous=False, zero_self_interaction=True)
      train_set = np.random.randint(2, size=(p, N)) * 2 - 1
      train_set = np.array(list(set([tuple(x) for x in train_set])))
      network.train(train_set)
      response = np.apply_along_axis(func1d=network.update, axis=1, arr=train_set, print_form=False)
      ps.append((N, p, ((train_set == response).sum(axis=1) == N).sum()))
  return ps

In [3]:
max_ps = old_p_to_N_ratio(32, 5, 100)

In [6]:
max_ps_df = pd.DataFrame(max_ps, columns=['Net_size','Train_size', 'Remem'])
max_ps_df.describe()

Unnamed: 0,Net_size,Train_size,Remem
count,3200.0,3200.0,3200.0
mean,5.0,16.5,12.114375
std,0.0,9.234536,5.948779
min,5.0,1.0,1.0
25%,5.0,8.75,7.0
50%,5.0,16.5,13.0
75%,5.0,24.25,17.0
max,5.0,32.0,26.0


In [8]:
max_ps_df.groupby('Train_size').Remem.value_counts()

Train_size  Remem
1           1        100
2           2         99
            1          1
3           3         82
            2         18
4           4         75
            3         19
            2          6
5           5         59
            4         35
            3          5
            2          1
6           6         47
            5         37
            4          9
            3          6
            2          1
7           7         43
            6         27
            5         18
            3          7
            4          5
8           7         42
            8         35
            6         12
            5          8
            4          2
            1          1
9           8         45
            9         28
                    ... 
30          19        32
            20        19
            21        18
            18        11
            17         7
            22         6
            23         3
            16         2
       

Below, we incrementally increase the size of the training set until the network misses one of them and return the size of the last fully remembered train_set. This is repeated over given number of iterations.

In [10]:
def p_to_N_ratio(N, iterations):
  ps = []
  for i in range(iterations):
    p = 1
    while True:
      network = hopfield.Hopfield(N, print_format=(N,1), synchronous=False, zero_self_interaction=True)
      while True:
        train_set = np.random.randint(2, size=(2 * p, N)) * 2 - 1
        train_set = np.array(list(set([tuple(x) for x in train_set])))
        if train_set.shape[0] >= p:
          train_set = train_set[:p]
#           print(f'Got the trainset of size {train_set.shape[0]}.')
          break
#         else:
#           print(f'Not there yet! The size {train_set.shape[0]}.')
      network.train(train_set)
      response = np.apply_along_axis(func1d=network.update, axis=1, arr=train_set, print_form=False)
      known = ((train_set == response).sum(axis=1) == N).sum()
#       print(f'Train {p} known {known}')
      if  known < p or p >= 2**N:
        ps.append(p - 1)
        break
      p += 1
  return ps

In [11]:
pNs = [(pd.Series(p_to_N_ratio(N, 100)).mean(), N) for N in [5, 10, 15, 20, 25, 50, 100]]

We can see that the ratio is actually higher than the theoretical $p/N<0.138$.

In [13]:
pNs_df = pd.DataFrame(pNs, columns=[ 'Avg_Rem','Network_size'])
pNs_df = pNs_df.assign(pN=pNs_df.Avg_Rem/pNs_df.Network_size )
pNs_df

Unnamed: 0,Avg_Rem,Network_size,pN
0,13.57,5,2.714
1,5.06,10,0.506
2,5.25,15,0.35
3,5.68,20,0.284
4,6.14,25,0.2456
5,7.88,50,0.1576
6,11.1,100,0.111


In [14]:
pNs_df.describe()

Unnamed: 0,Avg_Rem,Network_size,pN
count,7.0,7.0,7.0
mean,7.811429,32.142857,0.624029
std,3.302738,33.273756,0.930644
min,5.06,5.0,0.111
25%,5.465,12.5,0.2016
50%,6.14,20.0,0.284
75%,9.49,37.5,0.428
max,13.57,100.0,2.714
