# Supervised Learning CW2

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from scipy.spatial.distance import cdist

### 5. Repeat 1 and 2.

Load files.

In [2]:
# load zipcombo
zipcombo_data = np.loadtxt('zipcombo.dat')
digits_data = zipcombo_data[:, 0]  # First column contains the digit labels
pixels_data = zipcombo_data[:, 1:]  # Remaining columns contain the pixel values

We now implement the One VS All Gaussian Kernel Perceptron which we will use to classify data into one of k classes.

In [3]:
class OneVSAllGaussianKernelPerceptron:
    def __init__(self, classes, width):
        self.classes = classes
        self.width = width
        self.alphas = None
        self.XTrain = None

    def _getKMatrixGaussian(self, mat1, mat2, c):
        return np.exp(-c * cdist(mat1, mat2) ** 2)
    
    def _computeScoreForEachClassifier(self, alphas, matrixK, sampleIdx):
        return np.dot(alphas, matrixK[:, sampleIdx])
    
    def _sign(self, x):
        return -1 if x < 0 else 1
    
    def train(self, XTrain, yTrain, max_epochs=10, error_change_threshold=1e-4):
        num_samples, num_features = XTrain.shape
        # each row in alpha represents the coefficients for a separate classifer
        # k * m
        self.alphas = np.zeros((self.classes, num_samples))
        self.XTrain = XTrain

        matrixK = self._getKMatrixGaussian(self.XTrain, self.XTrain, self.width)
        trainErrorPerEpochList = []

        for epoch in range(max_epochs):
            mistakes = 0
            for i in range(num_samples):
                actualClassLabel = yTrain[i]

                preds = self._computeScoreForEachClassifier(self.alphas, matrixK, i)

                maxConfidence = float('-inf')
                maxConfidenceClass = -1

                for j in range(self.classes):
                    yTrue = 1 if actualClassLabel == j else -1

                    # update alphas in case of misclassification for class j
                    if yTrue * preds[j] <= 0:
                        self.alphas[j, i] = self.alphas[j, i] - self._sign(preds[j])

                    # update max confidence
                    if preds[j] > maxConfidence:
                        maxConfidence = preds[j]
                        maxConfidenceClass = j

                if maxConfidenceClass != actualClassLabel:
                    mistakes += 1

            trainErrorPerEpoch = mistakes / num_samples
            trainErrorPerEpochList.append(trainErrorPerEpoch)
            print("Epoch: {}, Train Error: {}".format(epoch, trainErrorPerEpoch))
            
            # early stopping condition based on error change threshold
            if epoch > 0 and abs(trainErrorPerEpoch - trainErrorPerEpochList[epoch - 1]) < error_change_threshold:
                print(f"Stopping early as convergence achieved at epoch {epoch}.")
                break

        return trainErrorPerEpochList[-1]

    def test(self, Xtest, yTest):
        num_samples, num_features = Xtest.shape
        matrixK = self._getKMatrixGaussian(self.XTrain, Xtest, self.width)
        mistakes = 0

        for i in range(num_samples):
            actualClassLabel = yTest[i]
            preds = self._computeScoreForEachClassifier(self.alphas, matrixK, i)
            maxConfidenceClass = np.argmax(preds)

            if maxConfidenceClass != actualClassLabel:
                mistakes += 1

        testError = mistakes / num_samples

        return testError

1. Basic Results.

In [4]:
numClasses = 10
widths = [10 ** i for i in range(-6, 1)]

allTrainErrorsForEachWidth = []
allTestErrorsForEachWidth = []

for width in widths:
    curTrainErrors = []
    curTestErrors = []

    for run in range(20):
        XTrain, XTest, yTrain, yTest = train_test_split(pixels_data, digits_data, test_size=0.2)

        model = OneVSAllGaussianKernelPerceptron(numClasses, width)
        trainError = model.train(XTrain, yTrain)
        testError = model.test(XTest, yTest)

        curTrainErrors.append(trainError)
        curTestErrors.append(testError)
    
    allTrainErrorsForEachWidth.append(curTrainErrors)
    allTestErrorsForEachWidth.append(curTestErrors)

Epoch: 0, Train Error: 0.7285560634579188
Epoch: 1, Train Error: 0.7237160527023393
Epoch: 2, Train Error: 0.7221027157838128
Epoch: 3, Train Error: 0.7231782737294972
Epoch: 4, Train Error: 0.722909384243076
Epoch: 5, Train Error: 0.7242538316751815
Epoch: 6, Train Error: 0.7238504974455499
Epoch: 7, Train Error: 0.722909384243076
Epoch: 8, Train Error: 0.6867437483194407
Epoch: 9, Train Error: 0.6581070180155956
Epoch: 0, Train Error: 0.7200860446356547
Epoch: 1, Train Error: 0.7167249260553913
Epoch: 2, Train Error: 0.7157838128529175
Epoch: 3, Train Error: 0.7163215918257596
Epoch: 4, Train Error: 0.7160527023393385
Epoch: 5, Train Error: 0.716187147082549
Epoch: 6, Train Error: 0.7165904813121807
Epoch: 7, Train Error: 0.7079860177467061
Epoch: 8, Train Error: 0.6630814735143856
Epoch: 9, Train Error: 0.6399569776821726
Epoch: 0, Train Error: 0.7268082817961817
Epoch: 1, Train Error: 0.7211616025813391
Epoch: 2, Train Error: 0.7221027157838128
Epoch: 3, Train Error: 0.721699381554

In [6]:
# calculate mean and standard deviation for train and test errors for each width
# display results
meanTrainErrorsForEachWidth = [np.mean(errors) for errors in allTrainErrorsForEachWidth]
stdTrainErrorsForEachWidth = [np.std(errors) for errors in allTrainErrorsForEachWidth]
meanTestErrorsForEachWidth = [np.mean(errors) for errors in allTestErrorsForEachWidth]
stdTestErrorsForEachWidth = [np.std(errors) for errors in allTestErrorsForEachWidth]

for idx, width in enumerate(widths):
    print(f"Width: {width}, Mean Train Error: {meanTrainErrorsForEachWidth[idx]}, Std Train Error: {stdTrainErrorsForEachWidth[idx]}, Mean Test Error: {meanTestErrorsForEachWidth[idx]}, Std Test Error: {stdTestErrorsForEachWidth[idx]}")

Width: 1e-06, Mean Train Error: 0.6695012100026889, Std Train Error: 0.030226272347960367, Mean Test Error: 0.6661290322580645, Std Test Error: 0.08938776355996025
Width: 1e-05, Mean Train Error: 0.2484942188760419, Std Train Error: 0.0044570206685134225, Mean Test Error: 0.22868279569892472, Std Test Error: 0.06612362837436161
Width: 0.0001, Mean Train Error: 0.11972304382898631, Std Train Error: 0.003693072535512434, Mean Test Error: 0.12379032258064515, Std Test Error: 0.03129423681315228
Width: 0.001, Mean Train Error: 0.04158375907502017, Std Train Error: 0.002667669656107041, Mean Test Error: 0.07096774193548389, Std Test Error: 0.024921544138323558
Width: 0.01, Mean Train Error: 0.0004436676525947836, Std Train Error: 0.000350846688644794, Mean Test Error: 0.027096774193548383, Std Test Error: 0.002764507555308052
Width: 0.1, Mean Train Error: 0.00012100026888948643, Std Train Error: 0.0001526998748534626, Mean Test Error: 0.0521505376344086, Std Test Error: 0.005008952558646071

2. Cross-Validation over a certain range. As we see the best test performance is for the width 0.1 so we will focus around that range. In particular, we choose to cross validate 0.0085, 0.009, 0.0095, 0.01, 0.0105, 0.011, 0.0115 (i.e., + or - 0.0005 from 0.01). Note this time we run the code for 2 epochs due to computational limitations of our device (it would otherwise take too long to run).

In [6]:
def createFolds(XTrain, numFolds):
    num_samples, num_features = XTrain.shape
    foldSize = num_samples // numFolds
    indices = np.arange(num_samples)

    foldsRes = []
    for i in range(numFolds):
        start = i * foldSize
        end = (i+1) * foldSize
        validationIndices = indices[start:end]
        trainIndices = np.concatenate((indices[:start], indices[end:]))
        foldsRes.append((trainIndices, validationIndices))

    return foldsRes

In [7]:
numClasses = 10
widths = [0.0085, 0.009, 0.0095, 0.01, 0.0105, 0.011, 0.0115]

cStarsData = [] # list of tuples (cStar, testError)

for run in range(20):

    meanErrorsPerWidth = []

    XTrain, XTest, yTrain, yTest = train_test_split(pixels_data, digits_data, test_size=0.2)

    # note we get 20 c*s
    for widthC in widths:

        curTrainErrorsListForC = [] # contains train errors for each fold iteration (specific to each width)
        curValidationErrorsListForC = [] # contains validation errors for each fold iteration (specific to each width)

        folds = createFolds(XTrain, 5)
        for trainIndices, validationIndices in folds:
            XTrainFold = XTrain[trainIndices]
            yTrainFold = yTrain[trainIndices]
            XValidationFold = XTrain[validationIndices]
            yValidationFold = yTrain[validationIndices]

            model = OneVSAllGaussianKernelPerceptron(numClasses, widthC)
            trainError = model.train(XTrainFold, yTrainFold, max_epochs=2)
            validationError = model.test(XValidationFold, yValidationFold)

            curTrainErrorsListForC.append(trainError)
            curValidationErrorsListForC.append(validationError)

        meanErrorsPerWidth.append(np.mean(curValidationErrorsListForC))
        print(f"Run: {run}, Width: {widthC}, Mean Validation Error: {np.mean(curValidationErrorsListForC)}")

    cStar = widths[np.argmin(meanErrorsPerWidth)]

    model = OneVSAllGaussianKernelPerceptron(numClasses, cStar)

    trainError = model.train(XTrain, yTrain, max_epochs=2)
    testError = model.test(XTest, yTest)
    cStarsData.append((cStar, testError))

Epoch: 0, Train Error: 0.08485968744748781
Epoch: 1, Train Error: 0.013107040833473365
Epoch: 0, Train Error: 0.08838850613342296
Epoch: 1, Train Error: 0.013779196773651487
Epoch: 0, Train Error: 0.09006889598386826
Epoch: 1, Train Error: 0.014787430683918669
Epoch: 0, Train Error: 0.08385145353722064
Epoch: 1, Train Error: 0.011426650983028062
Epoch: 0, Train Error: 0.08889262308855654
Epoch: 1, Train Error: 0.014787430683918669
Run: 0, Width: 0.0085, Mean Validation Error: 0.032817753866846
Epoch: 0, Train Error: 0.08401949252226516
Epoch: 1, Train Error: 0.013947235758696018
Epoch: 0, Train Error: 0.08788438917828936
Epoch: 1, Train Error: 0.01529154763905226
Epoch: 0, Train Error: 0.08670811628297766
Epoch: 1, Train Error: 0.012266845908250714
Epoch: 0, Train Error: 0.0820030247017308
Epoch: 1, Train Error: 0.011258611997983533
Epoch: 0, Train Error: 0.08469164846244329
Epoch: 1, Train Error: 0.012770962863384305
Run: 0, Width: 0.009, Mean Validation Error: 0.038601210490921314
Ep

Let's look at the results for cross-validation.

In [8]:
# We print all the width*
print("Widths* Data")
curIdx = 1
for cWidth, error in cStarsData:
    print(f"Run: {curIdx}, Width* (best width parameter): {cWidth}, Test Error: {error}")
    curIdx += 1

Widths* Data
Run: 1, Width* (best width parameter): 0.0085, Test Error: 0.023655913978494623
Run: 2, Width* (best width parameter): 0.0105, Test Error: 0.03924731182795699
Run: 3, Width* (best width parameter): 0.0115, Test Error: 0.03333333333333333
Run: 4, Width* (best width parameter): 0.011, Test Error: 0.03494623655913978
Run: 5, Width* (best width parameter): 0.0115, Test Error: 0.026344086021505377
Run: 6, Width* (best width parameter): 0.0095, Test Error: 0.03333333333333333
Run: 7, Width* (best width parameter): 0.011, Test Error: 0.02956989247311828
Run: 8, Width* (best width parameter): 0.011, Test Error: 0.027419354838709678
Run: 9, Width* (best width parameter): 0.0105, Test Error: 0.03870967741935484
Run: 10, Width* (best width parameter): 0.0115, Test Error: 0.03333333333333333
Run: 11, Width* (best width parameter): 0.0115, Test Error: 0.031720430107526884
Run: 12, Width* (best width parameter): 0.0115, Test Error: 0.03924731182795699
Run: 13, Width* (best width paramet

In [9]:
# compute mean c* with std and mean test error with std
allWidthStars = [res[0] for res in cStarsData]
meanCStar = np.mean(allWidthStars)
stdCStar = np.std(allWidthStars)

testErrorsForCStar = [res[1] for res in cStarsData]
meanTestErrorForCStar = np.mean(testErrorsForCStar)
stdTestErrorForCStar = np.std(testErrorsForCStar)

print(f"Mean c*: {meanCStar}, Std c*: {stdCStar}, Mean Test Error: {meanTestErrorForCStar}, Std Test Error: {stdTestErrorForCStar}")

Mean c*: 0.010750000000000001, Std c*: 0.0009552486587271399, Mean Test Error: 0.032983870967741934, Std Test Error: 0.004328791744783696
