In [1]:
import numpy as np
from liblinearutil import *
import csv

In [2]:
def loadData(filename):   
    with open(filename,'r') as file:  
        dataSet = [line[0].split(' ') for line in csv.reader(file)]
    return np.array(dataSet, dtype = "float32")

def nonlinearExpansion(data):
    xVec = data.tolist()[:-1]
    return [1]+xVec+[xVec[i]*xVec[j] for i in range(len(xVec)) for j in range(i, len(xVec))]

In [3]:
trainData  = loadData('hw4_train.dat')
testData   = loadData('hw4_test.dat')
inputTrain = [nonlinearExpansion(vec) for vec in trainData]
labelTrain = [vec[-1] for vec in trainData]
inputTest  = [nonlinearExpansion(vec) for vec in testData]
labelTest  = [vec[-1] for vec in testData]

In [4]:
def getW(Lambda, labels=labelTrain, inputs=inputTrain):
    c = 1/(2*Lambda)
    prob = problem(labels, inputs)
    param = parameter('-s 0 -c ' + str(c) + ' -e 0.000001')
    model = train(prob, param)
    return model.get_decfun()[0]

def Ein(w, labels=labelTrain, inputs=inputTrain):
    errorList = [np.dot(w,x)*y <= 0 for x, y in zip(inputs, labels)]
    return np.average(errorList)

In [5]:
# question 16
lambdaList = [1e-4, 1e-2, 1, 1e2, 1e4]
wList = [getW(Lambda) for Lambda in lambdaList]
outcomeList = [Ein(w, labels=labelTest, inputs=inputTest) for w in wList]
print('outcome:', [round(outcome, 4) for outcome in outcomeList])

outcome: [0.1333, 0.13, 0.1933, 0.2567, 0.4833]


In [6]:
# question 17
lambdaList = [1e-4, 1e-2, 1, 1e2, 1e4]
wList = [getW(Lambda) for Lambda in lambdaList]
outcomeList = [Ein(w) for w in wList]
print('outcome:', [round(outcome, 4) for outcome in outcomeList])

outcome: [0.09, 0.1, 0.13, 0.195, 0.535]


In [7]:
# question 18
lambdaList = [1e-4, 1e-2, 1, 1e2, 1e4]
wList = [getW(Lambda, labels=labelTrain[:120], inputs=inputTrain[:120]) for Lambda in lambdaList]
outcomeList = [Ein(w, labels=labelTrain[120:], inputs=inputTrain[120:]) for w in wList]
print('outcome:', [round(outcome, 4) for outcome in outcomeList])

outcome: [0.2, 0.1375, 0.2375, 0.2625, 0.575]


In [8]:
# question 19
w_opt = getW(1e-2)
print('ans:',Ein(w_opt, labels=labelTest, inputs=inputTest))

ans: 0.13


In [9]:
# question 20
def foldSeg(i):
    dropIdx = range(i*40,(i+1)*40)
    remInputs  = [data for idx, data in enumerate(inputTrain) if idx not in dropIdx]
    dropInputs = [data for idx, data in enumerate(inputTrain) if idx in dropIdx]
    remLabels  = [data for idx, data in enumerate(labelTrain) if idx not in dropIdx]
    dropLabels = [data for idx, data in enumerate(labelTrain) if idx in dropIdx]
    return remInputs, dropInputs, remLabels, dropLabels

def Ecv(Lambda):
    Ecv = 0
    for i in range(5):
        remInputs, dropInputs, remLabels, dropLabels = foldSeg(i)
        w = getW(Lambda, labels=remLabels,inputs=remInputs)
        Ecv += Ein(w, labels=dropLabels,inputs=dropInputs)/5.
    return Ecv

lambdaList = [1e-4, 1e-2, 1, 1e2, 1e4]
outcomeList = [Ecv(Lambda) for Lambda in lambdaList]
print('outcome:', [round(outcome, 4) for outcome in outcomeList])

outcome: [0.145, 0.12, 0.155, 0.18, 0.52]
