In [17]:
import numpy as np
import matplotlib.pyplot as plt

In [18]:
class rosenblatt_perceptron:
    def __init__(self, N, threshold):
        self.weights = np.zeros(N)
        self.theta = threshold
        
    def forward(self, x):
        self.x = x
        self.z = np.dot(x, self.weights)
        return np.sign(self.z - self.theta)
    
    def update(self, y):
        E = self.z * y
        if E <= 0:
            self.weights += (self.x * y) / len(self.weights)
            return 1
        
        return 0

In [19]:
n_d = 50 # nr of independently generated datasets with different P
alpha = np.arange(0, 5, step = 0.25) # number of samples in the dataset
N = [5, 20, 40, 60, 100] # number of data dimensions
epochs = 100
embed_str_iter = 10 

In [20]:
def create_dataset(P, N):
    D = []
    print("P is ", str(P))
    for idx in range(P):
        x = np.random.normal(loc=0, scale=1, size=N)
        y = np.random.choice([-1, 1], p=[0.5, 0.5])
        D.append([x, y])
        
    return D

In [21]:
def theoretical_P_ls(P, N):
    # calculate the probability for a set of 
    # randomly assigned labels to be linearly seperable
    if P <= N:
        p_ls = 1
    else:

        p_ls = pow(2, 1 - P)
        sum = 0
        for i in range (0, N):
            sum += np.math.factorial(P-1)/( np.math.factorial(P-i-1) *  np.math.factorial(i))
        p_ls = p_ls * sum
        return p_ls

In [22]:
def calculate_P_ls(converged, total):
    P_ls = converged / total
    return P_ls


In [23]:
def plot(Q_ls, alpha):
    plt.plot(alpha, Q_ls)

In [24]:
for n_dim in N:
    histogram_plotted_check = 0

    P = alpha * n_dim
    print("P ", str(P))
    points_x = []
    points_y = []
    points_y_theory = []
    for iter in P:
        converged = 0
        # n_d trials with same value of P but regenerated datasets with P
        for trial in range(0, n_d):
            iter = int(iter)
            print(iter)
            D = create_dataset(iter, n_dim)
            # print(" dataset ", D)
            percep = rosenblatt_perceptron(n_dim, 0)

            # expose every data point from the training set a number of times (=embed_str_iter) to the forward update 
            # print(" dataset ", D)
            histogram_embed_str = {}
            for index, value in enumerate(D):
                # save histogram by index of datapoint, not by value of datapoint
                # TODO: save by datapoint value?
                histogram_embed_str[index] = 0
                # print(f"hsitogram value of {histogram_embed_str[index]}  at key {index}")
            # print("initial histogram ", histogram_embed_str)
            
            #histogram_embed_str = key: 0 for key in D
            for exposure in range(embed_str_iter):
                for _ in range(epochs):
                    converge_check = 0
                    for idx in range(iter):
                        x = D[idx][0]
                        y = D[idx][1]
                        update = percep.forward(x)
                        histogram_embed_str[idx] = histogram_embed_str[idx]+1 if update == 1 else histogram_embed_str[idx]
                        converge_check += percep.update(y)
                    if converge_check == 0:
                        print("converged after " + str(_ + 1) + " epoch")
                        converged += 1
                        if _ > 100 and histogram_plotted_check == 0:
                            print(f"HISTOGRAM: {histogram_embed_str}")
                            names = list(histogram_embed_str.keys())
                            values = list(histogram_embed_str.values())

                            plt.bar(range(len(histogram_embed_str)), values, tick_label=names)
                            plt.xlabel("Input point index")
                            plt.ylabel("Embedded Strength of input point")
                            plt.legend(loc="upper right")
                            plt.show()
                            histogram_plotted_check = 1
                        
                        break
            #plot histogram
            # print("hsitogram ")
            # print(histogram_embed_str)
            # plt.hist(histogram_embed_str)
            # plt.show()
        Q_ls = calculate_P_ls(converged, n_d)
        theo_Q_ls = theoretical_P_ls( iter , n_dim)
        print("Q_ls is ", str(Q_ls))
        # now plot
        print("plot ", str(iter/n_dim), str(Q_ls))
        print("theoretical Qls ", str(theo_Q_ls))
        points_x.append(iter/n_dim)
        points_y.append(Q_ls)
        points_y_theory.append(theo_Q_ls)
    plt.plot(points_x , points_y, label=f"empirical N = {n_dim}", marker='v')
    plt.plot(points_x , points_y_theory, label=f"theoretical N = {n_dim}", linestyle='dashed', marker='o')
plt.xlabel("apha = P / N")
plt.ylabel("Q_ls")
plt.legend(loc="upper right")
plt.show()

    

P  [ 0.    1.25  2.5   3.75  5.    6.25  7.5   8.75 10.   11.25 12.5  13.75
 15.   16.25 17.5  18.75 20.   21.25 22.5  23.75]
0
P is  0
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
0
P is  0
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
0
P is  0
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
0
P is  0
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 epoch
converged after 1 

KeyboardInterrupt: 