In [210]:
import numpy as np
import time
import threading
import os

In [177]:
# Small example
# train_data_filename = 'dtrain123.dat'
# test_data_filename = 'dtest123.dat'

# train_data = np.loadtxt(train_data_filename)
# test_data = np.loadtxt(test_data_filename)

# train_labels = train_data[:, 0] - 1; train_samples = train_data[:, 1:]
# test_labels = test_data[:,0] - 1; test_samples = test_data[:,1:]
def traingen2(train_labels, mistake_counters, num_classes, kernel_mat_train):
    
    mistakes = 0
    for i in range(train_labels.shape[0]):
        
        pred_arg = (kernel_mat_train[i,:].T @ mistake_counters).squeeze()
        
        pred_class = np.argmax(pred_arg)
        label = int(train_labels[i])
        
        label_vect = np.ones(num_classes) * -1
        label_vect[label] = 1
        
        class_mistakes = (pred_arg * label_vect) <= 0
        
        mistake_counters[i, class_mistakes] -= (2*(pred_arg[class_mistakes] > 0) - 1)
        
        if pred_class != label:
            mistakes += 1
            
    return mistake_counters, mistakes

In [178]:
# Load full data
path_real_data = "./zipcombo.dat"
data = np.loadtxt(path_real_data)

In [180]:
data_samples = data[:, 1:]; data_labels = data[:, 0]

In [184]:
data_samples.shape

(9298, 256)

In [236]:
# Auxiliary Functions
def polynomial_kernel(train_dat, test_pt, p):
    """
    Computes the polynomial kernel for a given order
    Input:
    train_dat = m x dim array
    test_pt = n x dim vector
    p = polynomial order, scalar
    
    Output:
    result = m x n array, where result[i,j] = K(x_train[i,:], test_pt[j,:])
    
    Note: To allow bulk evaluation for efficiency
    """
    return ((train_dat @ test_pt.T)**p)

def init_mistake_counters(train_data, num_class):
    """
    To use kernels, need only to keep track of the number of times during training (for each perceptron)
    how many times a sample is predicted wrongly (since this many times would its featurised vector be added
    to the 'weight'). This avoids explicitly computing the weight vectors because it needs the feature map computation
    which is not feasible if e.g. gaussian kernel. Furthermore, the predictions later depends on the polarity of the label
    hence our 'counters' is also multiplied by the sample label
    
    Input:
    train_data: only to get number of points
    
    Output:
    matrix of zeros, size = m x num_class. Because keep track for every perceptron
    """
    m = train_data.shape[0]
    return np.zeros((m, num_class))

def class_predict(mistake_counters, kernel_mat):
    """
    Note: y_hat will be vector of -1s and only one 1 at the corresponding class.
    
    Input:
    data_pt = 1 x dim vector to predict
    mistake_counters as above
    kernel_mat = m x 1 array where K[i] = K(xi, data_pt)
    
    Output:
    predictions: vector of -1s and one 1 at the predicted class
    """
    num_class = mistake_counters.shape[1]
    
    inner_term = mistake_counters * kernel_mat
    inner_term = np.sum(inner_term, axis = 0).reshape(-1)
    
    print(inner_term)
    
    pred_class = np.argmax(inner_term)
    
    predictions = -1* np.ones((num_class))
    predictions[pred_class] = 1
    
    return inner_term, predictions

def class_predict2(mistake_counters, kernel_mat):
    """
    Note: y_hat will be vector of -1s and only one 1 at the corresponding class.
    
    Input:
    data_pt = 1 x dim vector to predict
    mistake_counters as above
    kernel_mat = m x n_test array where K[i,j] = K(xi, test_pt[j])
    
    Output:
    predictions: vector of arguments
    """
    inner_term = kernel_mat.T @ mistake_counters
    
    return inner_term

def split_train_test(data_samples, data_labels, train_prop, test_prop, seed = 88):
    """
    Split to 80% train and 20% test as required, randomly by first shuffling
    """
    n_tot = data_samples.shape[0]; n_train = round(train_prop * n_tot)
    permute = np.random.permutation(n_tot)
    
    data_shuffled = data_samples[permute,:]
    data_shuffled_labels = data_labels[permute]
    
    train_samples = data_shuffled[:n_train, :]; train_labels = data_shuffled_labels[:n_train]
    test_samples = data_shuffled[n_train:, :]; test_labels = data_shuffled_labels[n_train:]
    
    return train_samples, train_labels, test_samples, test_labels

def split_train_test2(data_samples, data_labels, train_prop, test_prop, seed = 88):
    """
    Split to 80% train and 20% test as required, randomly by first shuffling
    """
    n_tot = data_samples.shape[0]; n_train = round(train_prop * n_tot)
    permute = np.random.permutation(n_tot)
    
    data_shuffled = data_samples[permute,:]
    data_shuffled_labels = data_labels[permute]
    
    train_samples = data_shuffled[:n_train, :]; train_labels = data_shuffled_labels[:n_train]
    test_samples = data_shuffled[n_train:, :]; test_labels = data_shuffled_labels[n_train:]
    
    return train_samples, train_labels, test_samples, test_labels, data[:n_train, :]

In [234]:
# Training

def traingen(train_data, train_labels, mistake_counters, num_classes, kernel_mat_train):
    """
    Performs the num_classes perceptron training for ONE epoch
    
    Returns:
    the updated mistake counters and the number of mistakes in that epoch
    """
    # To reduce recomputation, compute the kernel mat for all pairs of training data outside. Since this don't change
    # every epoch
    
    # initialize mistakes
    mistakes = 0
    
    # Iterate through each data point in an 'online' fashion
    m, _ = train_data.shape
    
    for i in range(m):
        kernel_mat = kernel_mat_train[:,i].reshape(-1,1)
        label = int(train_labels[i]); label_vect = -1 * np.ones((num_classes)); label_vect[label] = 1
        
        # Prediction vector
#         pred_arg = class_predict2(mistake_counters, kernel_mat)
        pred_arg = kernel_mat.T @ mistake_counters
        
        # Since training processes data one at a time, we can squeeze dimension
        pred_arg = pred_arg.reshape(-1)
        
        pred_class = np.argmax(pred_arg)
        class_mistakes = (pred_arg * label_vect) <= 0
        
        # If mistake for that perceptron, update the corresponding mistake counter. Check each perceptron independently
        # So if mistake on a perceptron, update only that perceptron
        
        # Subtlety: at 0,0,0..0 case, we treat the perceptron as predicting -1 for ALL (this case should never happen
        # in subsequent iterations). i.e. sign(0) is treated as -1. Furthermore, the 0,0,..0 case will always be treated
        # as all perceptrons making a mistake (even though if all -1, then all but one should be correct). Finally, if all
        # 3 perceptrons outputs are equal, we treat it as predicting the first class
        
        mistake_counters[i,class_mistakes] -=  (2*(pred_arg[class_mistakes] > 0) - 1)
        
        if pred_class != label:
            mistakes += 1
        
    return mistake_counters, mistakes

def testclassifiers(train_samples, test_samples, test_labels, mistake_counters, num_classes, kernel_mat):
        """
        Predict on the whole test sample batch
        """
        # Get shapes
        n_test = test_samples.shape[0]
        
        # Compute kernel matrix outside, again don't change per epoch
        
        # Bulk predict on the whole test labels
        inner_term = class_predict2(mistake_counters, kernel_mat)
        
        # Class predictions
        class_pred = np.argmax(inner_term, axis = 1)
        
        # Count mistakes
        mistakes = np.sum(test_labels != class_pred)
        
        # Return average mistakes
        return mistakes / n_test

In [239]:
def traingen23(data, classifiers,numb_class , d,kernel_result):
    mistakes = 0
    preds = np.zeros((numb_class))
    maxi = 0
    
    for i in range(data.shape[0]):

     
        preds = (classifiers@kernel_result[i].reshape(-1,1)).T[0]

        #print("preds",preds.shape)

        y_pred = np.argmax(preds)
        y_true = int(data[i,0])

        Y_true = np.ones(numb_class)*-1
        Y_true[y_true] = 1

        y_mistakes = ((preds*Y_true) <= 0)
        #print(y_mistakes.shape)

        classifiers[y_mistakes, i] = classifiers[y_mistakes, i] - mysign2(preds[y_mistakes])  

        if y_pred != y_true:
            mistakes+=1
        
            
    return mistakes, classifiers

In [242]:
num_iter = 1
start = time.time()
d = 3
for it in range(num_iter):
    print(f"Iteration {it+1}/{num_iter}")
    train_samples, train_labels, test_samples, test_labels, train = split_train_test2(data_samples, data_labels, 0.8, 0.2)
    num_classes = 10
    mistake_counters = init_mistake_counters(train_samples, num_classes).T
    
    # Pre-compute outside for speed
    kernel_mat_train = polynomial_kernel(train_samples, train_samples, d)
    kernel_mat_test = polynomial_kernel(train_samples, test_samples, d)
    
    for epoch in range(10):
        mistakes, mistake_counters = traingen23(train, mistake_counters,10,d,kernel_mat_train)
#         test_rate = testclassifiers(train_samples, test_samples, test_labels, mistake_counters, num_classes, kernel_mat_test)
        print(f"Epoch {epoch}, made {mistakes} mistakes on training data and mistake rate of {test_rate} on test data")

print(time.time() - start)

Iteration 1/1
Epoch 0, made 6645 mistakes on training data and mistake rate of 0.02795698924731183 on test data
Epoch 1, made 5754 mistakes on training data and mistake rate of 0.02795698924731183 on test data
Epoch 2, made 4919 mistakes on training data and mistake rate of 0.02795698924731183 on test data
Epoch 3, made 4323 mistakes on training data and mistake rate of 0.02795698924731183 on test data
Epoch 4, made 3839 mistakes on training data and mistake rate of 0.02795698924731183 on test data
Epoch 5, made 3504 mistakes on training data and mistake rate of 0.02795698924731183 on test data
Epoch 6, made 3158 mistakes on training data and mistake rate of 0.02795698924731183 on test data
Epoch 7, made 2950 mistakes on training data and mistake rate of 0.02795698924731183 on test data
Epoch 8, made 2697 mistakes on training data and mistake rate of 0.02795698924731183 on test data
Epoch 9, made 2542 mistakes on training data and mistake rate of 0.02795698924731183 on test data
5.3951

In [243]:
num_iter = 1
d = 3

start = time.time()
for it in range(num_iter):
    print(f"Iteration {it+1}/{num_iter}")
    train_samples, train_labels, test_samples, test_labels = split_train_test(data_samples, data_labels, 0.8, 0.2)
    num_classes = 10
    mistake_counters = init_mistake_counters(train_samples, num_classes)
    
    # Pre-compute outside for speed
    kernel_mat_train = polynomial_kernel(train_samples, train_samples, d)
    kernel_mat_test = polynomial_kernel(train_samples, test_samples, d)
    
    for epoch in range(10):
        mistake_counters, mistakes = traingen(train_samples, train_labels, mistake_counters, num_classes, kernel_mat_train)
        test_rate = testclassifiers(train_samples, test_samples, test_labels, mistake_counters, num_classes, kernel_mat_test)
        print(f"Epoch {epoch}, made {mistakes} mistakes on training data and mistake rate of {test_rate} on test data")

print(time.time() - start)

Iteration 1/1
Epoch 0, made 632 mistakes on training data and mistake rate of 0.04516129032258064 on test data
Epoch 1, made 132 mistakes on training data and mistake rate of 0.04032258064516129 on test data
Epoch 2, made 64 mistakes on training data and mistake rate of 0.03387096774193549 on test data
Epoch 3, made 30 mistakes on training data and mistake rate of 0.03333333333333333 on test data
Epoch 4, made 20 mistakes on training data and mistake rate of 0.032795698924731186 on test data
Epoch 5, made 15 mistakes on training data and mistake rate of 0.031720430107526884 on test data
Epoch 6, made 10 mistakes on training data and mistake rate of 0.02795698924731183 on test data
Epoch 7, made 11 mistakes on training data and mistake rate of 0.025806451612903226 on test data
Epoch 8, made 5 mistakes on training data and mistake rate of 0.024193548387096774 on test data
Epoch 9, made 7 mistakes on training data and mistake rate of 0.026344086021505377 on test data
5.8558189868927


In [213]:
num_iter = 1
start = time.time()

for it in range(num_iter):
    print(f"Iteration {it+1}/{num_iter}")
    train, test = train_test_split(data)
    train_samples = train[:, 1:]; train_labels = train[:,0]
    test_samples = test[:, 1:]; test_labels = test[:, 0]
    num_classes = 10
    mistake_counters = init_mistake_counters(train_samples, num_classes)
    # Pre-compute outside for speed
    kernel_mat_train = polynomial_kernel(train_samples, train_samples, 3)
    kernel_mat_test = polynomial_kernel(train_samples, test_samples, 3)
    
    
    for epoch in range(10):
        mistake_counters, mistakes = traingen(train_samples, train_labels, mistake_counters, num_classes, kernel_mat_train, d = 3)
        test_rate = testclassifiers(train_samples, test_samples, test_labels, mistake_counters, num_classes, kernel_mat_test, d = 3)
        print(f"Epoch {epoch}, made {mistakes} mistakes on training data and mistake rate of {test_rate} on test data")

print(time.time() - start)

Iteration 1/1
Epoch 0, made 590 mistakes on training data and mistake rate of 0.0553763440860215 on test data
Epoch 1, made 122 mistakes on training data and mistake rate of 0.053763440860215055 on test data
Epoch 2, made 63 mistakes on training data and mistake rate of 0.043548387096774194 on test data
Epoch 3, made 35 mistakes on training data and mistake rate of 0.043010752688172046 on test data
Epoch 4, made 19 mistakes on training data and mistake rate of 0.04032258064516129 on test data
Epoch 5, made 16 mistakes on training data and mistake rate of 0.038172043010752686 on test data
Epoch 6, made 5 mistakes on training data and mistake rate of 0.03333333333333333 on test data
Epoch 7, made 6 mistakes on training data and mistake rate of 0.03602150537634408 on test data
Epoch 8, made 5 mistakes on training data and mistake rate of 0.03870967741935484 on test data
Epoch 9, made 7 mistakes on training data and mistake rate of 0.03494623655913978 on test data
5.765453815460205
