# Supervised Learning CW2

In [2]:
import numpy as np
from sklearn.model_selection import train_test_split
from itertools import combinations

### 6. Repeat 1 and 2 with alternative approach.

Load files.

In [3]:
zipcombo_data = np.loadtxt('zipcombo.dat')
digits_data = zipcombo_data[:, 0]  # First column contains the digit labels
pixels_data = zipcombo_data[:, 1:]  # Remaining columns contain the pixel values

We now implement the One VS One Kernel Perceptron which we will use to classify data into one of k classes.

In [9]:
class OneVSOnePolynomialKernelPerceptron:
    def __init__(self, classes, polynomialDegree):
        self.classes = classes
        self.polynomialDegree = polynomialDegree
        self.alphas = None
        self.XTrain = None
    
    def _polynomialKernel(self, p, q, d):
        return (np.dot(p, q)) ** d

    def _getKMatrixPolynomial(self, XTrain, degree):
        num_samples, num_features = XTrain.shape
        K = np.zeros((num_samples, num_samples))
        for i in range(num_samples):
            for j in range(num_samples):
                K[i, j] = self._polynomialKernel(XTrain[i, :], XTrain[j, :], degree)
        return K
    
    def _computeScoreForEachClassifier(self, alphas, matrixK, sampleIdx):
        return np.dot(alphas, matrixK[:, sampleIdx])
    
    def _getKMatrixPolynomialTest(self, XTrain, XTest, degree):
        num_samples, num_features = XTrain.shape
        num_samples_test, num_features_test = XTest.shape
        K = np.zeros((num_samples, num_samples_test))
        for i in range(num_samples):
            for j in range(num_samples_test):
                K[i, j] = self._polynomialKernel(XTrain[i, :], XTest[j, :], degree)
        return K
    
    def _sign(self, x):
        return -1 if x < 0 else 1
    
    def train(self, XTrain, yTrain, max_epochs=10, error_change_threshold=1e-4):
        num_samples, num_features = XTrain.shape
        # each row in alpha represents the coefficients for a separate classifer
        # k*(k-1)/2 * m
        self.alphas = np.zeros((self.classes*(self.classes-1)//2, num_samples))
        self.XTrain = XTrain
        binaryPairs = list(combinations(range(self.classes), 2))

        matrixK = self._getKMatrixPolynomial(XTrain, self.polynomialDegree)
        trainErrorPerEpochList = []

        for epoch in range(max_epochs):
            mistakes = 0
            for i in range(num_samples):
                actualClassLabel = yTrain[i]

                scores = self._computeScoreForEachClassifier(self.alphas, matrixK, i)
                # note we have binary classifiers, so we need to map the class labels to 1 and -1
                predsByEachClassifier = [self._sign(score) for score in scores]

                # get the predicted label of each binary classfier for the current sample
                predsLabelsByEachClassifier = [binaryPairs[j][0] if pred == 1 else binaryPairs[j][1] for j, pred in enumerate(predsByEachClassifier)]

                # get the predicted label of the current sample i.e., the one with majority vote
                predictedClassLabel = max(set(predsLabelsByEachClassifier), key=predsLabelsByEachClassifier.count)

                if predictedClassLabel != actualClassLabel:
                    mistakes += 1

                # update the alphas
                for idx, pair in enumerate(binaryPairs):
                    # note 1st class is mapped to 1 and 2nd class is mapped to -1
                    # i.e., positive class is the first class in the pair
                    # and negative class is the second class in the pair

                    # if the actual class label is the same as the first class label in the pair
                    # and the predicted label is not the same as the first class label in the pair
                    # then we need to update the alpha for this classifier
                    if pair[0] == actualClassLabel and predsLabelsByEachClassifier[idx] != actualClassLabel:
                        self.alphas[idx, i] += 1
                    elif pair[1] == actualClassLabel and predsLabelsByEachClassifier[idx] != actualClassLabel:
                        # if the actual class label is the same as the second class label in the pair
                        # and the predicted label is not the same as the second class label in the pair
                        # then we need to update the alpha for this classifier
                        self.alphas[idx, i] -= 1

            trainErrorPerEpoch = mistakes / num_samples
            trainErrorPerEpochList.append(trainErrorPerEpoch)
            print("Epoch: {}, Train Error: {}".format(epoch, trainErrorPerEpoch))

            # early stopping condition based on error change threshold
            if epoch > 0 and abs(trainErrorPerEpoch - trainErrorPerEpochList[epoch - 1]) < error_change_threshold:
                print(f"Stopping early as convergence achieved at epoch {epoch}.")
                break

        return trainErrorPerEpochList[-1]

    def test(self, XTest, yTest):
        num_samples, num_features = XTest.shape
        binaryPairs = list(combinations(range(self.classes), 2))
        matrixK = self._getKMatrixPolynomialTest(self.XTrain, XTest, self.polynomialDegree)
        mistakes = 0

        for i in range(num_samples):
            actualClassLabel = yTest[i]

            scores = self._computeScoreForEachClassifier(self.alphas, matrixK, i)
            # note we have binary classifiers, so we need to map the class labels to 1 and -1
            predsByEachClassifier = [self._sign(score) for score in scores]

            # get the predicted label of each binary classfier for the current sample
            predsLabelsByEachClassifier = [binaryPairs[j][0] if pred == 1 else binaryPairs[j][1] for j, pred in enumerate(predsByEachClassifier)]

            # get the predicted label of the current sample i.e., the one with majority vote
            predictedClassLabel = max(set(predsLabelsByEachClassifier), key=predsLabelsByEachClassifier.count)

            if predictedClassLabel != actualClassLabel:
                mistakes += 1

        testError = mistakes / num_samples

        return testError

1. Basic Results.

In [10]:
numClasses = 10
degrees = [i for i in range(1, 8)]

allTrainErrorsForEachDegree = []
allTestErrorsForEachDegree = []

for deg in degrees:
    curTrainErrors = []
    curTestErrors = []

    for run in range(20):
        XTrain, XTest, yTrain, yTest = train_test_split(pixels_data, digits_data, test_size=0.2)

        model = OneVSOnePolynomialKernelPerceptron(numClasses, deg)
        trainError = model.train(XTrain, yTrain)
        testError = model.test(XTest, yTest)

        curTrainErrors.append(trainError)
        curTestErrors.append(testError)
    
    allTrainErrorsForEachDegree.append(curTrainErrors)
    allTestErrorsForEachDegree.append(curTestErrors)

Epoch: 0, Train Error: 0.13041140091422426
Epoch: 1, Train Error: 0.07394460876579725
Epoch: 2, Train Error: 0.06520570045711213
Epoch: 3, Train Error: 0.05324011831137403
Epoch: 4, Train Error: 0.04840010755579457
Epoch: 5, Train Error: 0.046786770637268084
Epoch: 6, Train Error: 0.04167787039526755
Epoch: 7, Train Error: 0.03562785695079322
Epoch: 8, Train Error: 0.0345522990051089
Epoch: 9, Train Error: 0.03267007260016133
Epoch: 0, Train Error: 0.13807475127722507
Epoch: 1, Train Error: 0.0790535090077978
Epoch: 2, Train Error: 0.06628125840279644
Epoch: 3, Train Error: 0.054450121000268886
Epoch: 4, Train Error: 0.050551223447163215
Epoch: 5, Train Error: 0.043694541543425655
Epoch: 6, Train Error: 0.039257865017477814
Epoch: 7, Train Error: 0.03818230707179349
Epoch: 8, Train Error: 0.03858564130142511
Epoch: 9, Train Error: 0.033342296316214035
Epoch: 0, Train Error: 0.13538585641301426
Epoch: 1, Train Error: 0.07878461952137672
Epoch: 2, Train Error: 0.0634579187953751
Epoch: 3

In [11]:
# calculate mean and standard deviation for train and test errors for each degree
# display results
meanTrainErrorsForEachDegree = [np.mean(errors) for errors in allTrainErrorsForEachDegree]
stdTrainErrorsForEachDegree = [np.std(errors) for errors in allTrainErrorsForEachDegree]
meanTestErrorsForEachDegree = [np.mean(errors) for errors in allTestErrorsForEachDegree]
stdTestErrorsForEachDegree = [np.std(errors) for errors in allTestErrorsForEachDegree]

# note the values displayed are not percentages (should you want the percentage, multiply by 100)
for deg in degrees:
    print(f"Degree: {deg}, Mean Train Error: {meanTrainErrorsForEachDegree[deg - 1]}, Std Train Error: {stdTrainErrorsForEachDegree[deg - 1]}, Mean Test Error: {meanTestErrorsForEachDegree[deg - 1]}, Std Test Error: {stdTestErrorsForEachDegree[deg - 1]}")

Degree: 1, Mean Train Error: 0.03154745899435333, Std Train Error: 0.002768150006868341, Mean Test Error: 0.0628225806451613, Std Test Error: 0.006458945206153225
Degree: 2, Mean Train Error: 0.002278838397418661, Std Train Error: 0.0015670023624889552, Mean Test Error: 0.03456989247311828, Std Test Error: 0.004485624060992721
Degree: 3, Mean Train Error: 0.0007663350363000807, Std Train Error: 0.0005226080055277443, Mean Test Error: 0.03411290322580644, Std Test Error: 0.004046480294637213
Degree: 4, Mean Train Error: 0.0006318902930895402, Std Train Error: 0.00046978771738790396, Mean Test Error: 0.031478494623655914, Std Test Error: 0.00379584741858244
Degree: 5, Mean Train Error: 0.0005108900242000539, Std Train Error: 0.00027749082334700584, Mean Test Error: 0.03206989247311828, Std Test Error: 0.003881686920893103
Degree: 6, Mean Train Error: 0.0004503898897553106, Std Train Error: 0.0002860704871061201, Mean Test Error: 0.03266129032258065, Std Test Error: 0.0035899931255773563


2. Cross-validation.

In [12]:
def createFolds(XTrain, numFolds):
    num_samples, num_features = XTrain.shape
    foldSize = num_samples // numFolds
    indices = np.arange(num_samples)

    foldsRes = []
    for i in range(numFolds):
        start = i * foldSize
        end = (i+1) * foldSize
        validationIndices = indices[start:end]
        trainIndices = np.concatenate((indices[:start], indices[end:]))
        foldsRes.append((trainIndices, validationIndices))

    return foldsRes

In [14]:
numClasses = 10
degrees = [i for i in range(1, 8)]

degreeStarsData = [] # list of tuples (degreeStar, testError)

for run in range(20):

    meanErrorsPerDegree = [] # we will use this to find d* i.e., look for best mean validation error

    XTrain, XTest, yTrain, yTest = train_test_split(pixels_data, digits_data, test_size=0.2)

    # note we get 20 d*s
    for deg in degrees:

        curTrainErrorsListForDeg = [] # contains train errors for each fold iteration (specific to each degree)
        curValidationErrorsListForDeg = [] # contains validation errors for each fold iteration (specific to each degree)

        folds = createFolds(XTrain, 5)
        for trainIndices, validationIndices in folds:
            XTrainFold = XTrain[trainIndices]
            yTrainFold = yTrain[trainIndices]
            XValidationFold = XTrain[validationIndices]
            yValidationFold = yTrain[validationIndices]

            model = OneVSOnePolynomialKernelPerceptron(numClasses, deg)
            trainError = model.train(XTrainFold, yTrainFold, max_epochs=2)
            validationError= model.test(XValidationFold, yValidationFold)

            curTrainErrorsListForDeg.append(trainError)
            curValidationErrorsListForDeg.append(validationError)

        meanErrorsPerDegree.append(np.mean(curValidationErrorsListForDeg))
        print(f"Run: {run}, Degree: {deg}, Mean Validation Error: {np.mean(curValidationErrorsListForDeg)}")

    degStar = np.argmin(meanErrorsPerDegree) + 1 # get best d* for this run

    model = OneVSOnePolynomialKernelPerceptron(numClasses, degStar)
    trainError = model.train(XTrain, yTrain, max_epochs=2)
    testError = model.test(XTest, yTest)
    degreeStarsData.append((degStar, testError))

Epoch: 0, Train Error: 0.1340951100655352
Epoch: 1, Train Error: 0.07998655688119644
Epoch: 0, Train Error: 0.14098470845236094
Epoch: 1, Train Error: 0.08015459586624096
Epoch: 0, Train Error: 0.13711981179633675
Epoch: 1, Train Error: 0.07712989413543943
Epoch: 0, Train Error: 0.13997647454209378
Epoch: 1, Train Error: 0.07931440094101831
Epoch: 0, Train Error: 0.138464123676693
Epoch: 1, Train Error: 0.07444127037472693
Run: 0, Degree: 1, Mean Validation Error: 0.09145931405514458
Epoch: 0, Train Error: 0.11040161317425642
Epoch: 1, Train Error: 0.04251386321626617
Epoch: 0, Train Error: 0.10737691144345488
Epoch: 1, Train Error: 0.0410015123508654
Epoch: 0, Train Error: 0.10452024869769787
Epoch: 1, Train Error: 0.04150562930599899
Epoch: 0, Train Error: 0.10334397580238615
Epoch: 1, Train Error: 0.04553856494706772
Epoch: 0, Train Error: 0.10384809275751974
Epoch: 1, Train Error: 0.03680053772475214
Run: 0, Degree: 2, Mean Validation Error: 0.053665097511768664
Epoch: 0, Train Err

In [15]:
# We print all the degree*
print("Degrees* Data")
curIdx = 1
for deg, error in degreeStarsData:
    print(f"Run: {curIdx}, Degree* (best degree parameter): {deg}, Test Error: {error}")
    curIdx += 1

Degrees* Data
Run: 1, Degree* (best degree parameter): 7, Test Error: 0.04408602150537634
Run: 2, Degree* (best degree parameter): 6, Test Error: 0.03387096774193549
Run: 3, Degree* (best degree parameter): 7, Test Error: 0.038172043010752686
Run: 4, Degree* (best degree parameter): 5, Test Error: 0.03924731182795699
Run: 5, Degree* (best degree parameter): 5, Test Error: 0.044623655913978495
Run: 6, Degree* (best degree parameter): 6, Test Error: 0.037096774193548385
Run: 7, Degree* (best degree parameter): 6, Test Error: 0.03387096774193549
Run: 8, Degree* (best degree parameter): 4, Test Error: 0.046236559139784944
Run: 9, Degree* (best degree parameter): 6, Test Error: 0.02956989247311828
Run: 10, Degree* (best degree parameter): 6, Test Error: 0.04247311827956989
Run: 11, Degree* (best degree parameter): 6, Test Error: 0.044623655913978495
Run: 12, Degree* (best degree parameter): 7, Test Error: 0.03333333333333333
Run: 13, Degree* (best degree parameter): 5, Test Error: 0.0295698

In [16]:
# compute mean d* with std and mean test error with std
allDegreeStars = [res[0] for res in degreeStarsData]
meanDStar = np.mean(allDegreeStars)
stdDStar = np.std(allDegreeStars)

testErrorsForDegStar = [res[1] for res in degreeStarsData]
meanTestErrorForDegStar = np.mean(testErrorsForDegStar)
stdTestErrorForDegStar = np.std(testErrorsForDegStar)

print(f"Mean d*: {meanDStar}, Std d*: {stdDStar}, Mean Test Error: {meanTestErrorForDegStar}, Std Test Error: {stdTestErrorForDegStar}")

Mean d*: 5.7, Std d*: 1.0049875621120892, Mean Test Error: 0.039112903225806454, Std Test Error: 0.005198329796607522
