# Neural Networks: List 2

In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import pandas as pd
import sys
sys.path.insert(0, '../')
import hopfield

# Problem 3

Find the maximum number $p$ of patterns which a network can store and find the ratio $p/N$. 

The first task is ill-defined. As we can see below the number of random-generated remembered patterns grows with the size of training set. For the $N=5$ network there are $2^5=32$ different possible patterns and the network is even possible to remember up to $26$ of them while feeded with full $32$ pattern set.

In [2]:
def old_p_to_N_ratio(p_max, N, iterations):
  ps = []
  for p in [i for i in range(1, p_max+1)]:
    for i in range(iterations):
      network = hopfield.Hopfield(N, print_format=(N,1), synchronous=False, zero_self_interaction=True)
      train_set = np.random.randint(2, size=(p, N)) * 2 - 1
      train_set = np.array(list(set([tuple(x) for x in train_set])))
      network.train(train_set)
      response = np.apply_along_axis(func1d=network.update, axis=1, arr=train_set, print_form=False)
      ps.append((N, p, ((train_set == response).sum(axis=1) == N).sum()))
  return ps

In [3]:
max_ps = old_p_to_N_ratio(32, 5, 100)

In [4]:
max_ps_df = pd.DataFrame(max_ps, columns=['Net_size','Train_size', 'Remem'])
max_ps_df.describe()

Unnamed: 0,Net_size,Train_size,Remem
count,3200.0,3200.0,3200.0
mean,5.0,16.5,2.28875
std,0.0,9.234536,1.042275
min,5.0,1.0,0.0
25%,5.0,8.75,2.0
50%,5.0,16.5,2.0
75%,5.0,24.25,3.0
max,5.0,32.0,8.0


In [5]:
max_ps_df.groupby('Train_size').Remem.value_counts()

Train_size  Remem
1           1        100
2           2         73
            1         20
            0          7
3           2         43
            1         39
            3         11
            0          7
4           1         42
            2         38
            0         13
            3          6
            4          1
5           1         42
            2         35
            0         15
            3          5
            4          2
            5          1
6           1         45
            2         41
            3          6
            0          5
            4          3
7           2         57
            1         22
            0          9
            3          8
            4          4
8           2         49
                    ... 
27          6          1
            8          1
28          2         41
            3         30
            4         26
            1          1
            5          1
            6          1
29     

Below, we incrementally increase the size of the training set until the network misses one of them and return the size of the last fully remembered train_set. This is repeated over given number of iterations.

In [6]:
def p_to_N_ratio(N, iterations):
  ps = []
  for i in range(iterations):
    p = 1
    while True:
      network = hopfield.Hopfield(N, print_format=(N,1), synchronous=False, zero_self_interaction=True)
      while True:
        train_set = np.random.randint(2, size=(2 * p, N)) * 2 - 1
        train_set = np.array(list(set([tuple(x) for x in train_set])))
        if train_set.shape[0] >= p:
          train_set = train_set[:p]
#           print(f'Got the trainset of size {train_set.shape[0]}.')
          break
#         else:
#           print(f'Not there yet! The size {train_set.shape[0]}.')
      network.train(train_set)
      response = np.apply_along_axis(func1d=network.update, axis=1, arr=train_set, print_form=False)
      known = ((train_set == response).sum(axis=1) == N).sum()
#       print(f'Train {p} known {known}')
      if  known < p or p >= 2**N:
        ps.append(p - 1)
        break
      p += 1
  return ps

In [7]:
pNs = [(pd.Series(p_to_N_ratio(N, 100)).mean(), N) for N in [5, 10, 15, 20, 25, 50, 100]]

We can see that the ratio is actually higher than the theoretical $p/N<0.138$.

In [8]:
pNs_df = pd.DataFrame(pNs, columns=[ 'Avg_Rem','Network_size'])
pNs_df = pNs_df.assign(pN=pNs_df.Avg_Rem/pNs_df.Network_size )
pNs_df

Unnamed: 0,Avg_Rem,Network_size,pN
0,1.72,5,0.344
1,2.91,10,0.291
2,3.61,15,0.240667
3,4.05,20,0.2025
4,4.53,25,0.1812
5,6.69,50,0.1338
6,9.9,100,0.099


In [9]:
pNs_df.describe()

Unnamed: 0,Avg_Rem,Network_size,pN
count,7.0,7.0,7.0
mean,4.772857,32.142857,0.213167
std,2.72907,33.273756,0.086024
min,1.72,5.0,0.099
25%,3.26,12.5,0.1575
50%,4.05,20.0,0.2025
75%,5.61,37.5,0.265833
max,9.9,100.0,0.344
