In [8]:
#Load Data
import numpy as np
def load_data():
    data = np.loadtxt('in.dta')
    X, Y = np.split(data,[2], axis=-1)
    r,c = X.shape
    Y = Y.reshape((1,r))[0]
    data = np.loadtxt('out.dta')
    X_t, Y_t = np.split(data,[2], axis=-1)
    r,c = X_t.shape
    Y_t = Y_t.reshape((1,r))[0]
    return X, Y, X_t, Y_t

def convertData(X,Y):
    sz = len(Y)
    X = np.concatenate((np.ones((sz,1)),
                       X,
                       (X[:,0].reshape((sz,1)))**2,
                       (X[:,1].reshape((sz,1)))**2,
                       (X[:,0] * X[:,1]).reshape((sz,1)),
                       np.absolute(X[:,0]-X[:,1]).reshape((sz,1)),
                       np.absolute(X[:,0]+X[:,1]).reshape((sz,1))
                       ),axis=1)
    return X,Y

In [9]:
#Linear Regression
from math import sqrt
from math import exp
from math import log
from math import pow
from scipy.optimize import minimize
# Lecture 9 slide 11
def gZ(z):
    return np.exp(z)/(1 + np.exp(z))

def metrics(X,Y,W):
    r,c = X.shape
    y_pr = gZ(np.dot(X,W))
    g = np.where(y_pr > 0.5, 1.0,-1.0)
    Error = g - Y
    Ein = np.count_nonzero(Error)/r
    return Ein

# Lecture 9 slide 18
def cost(W,X,Y,lambd):
    m,c = X.shape
    theta = np.zeros(m)
    J = 0
    for i in range(0,m):
        J += log( 1 + exp(-Y[i] * np.dot(X[i,:],W.T)))
    return J/m + (lambd/m) * np.dot(W,W.T)

# Lecture 9 slide 23
def gradient(W,X,Y,lambd):
    m,c = X.shape
    theta = np.zeros(c)
    for i in range(0,m):
        theta+=(Y[i] * X[i,:]) / (1 + np.exp(Y[i] * np.dot(X[i,:],W.T)))
    theta = (-(1/m) * theta )
    return  theta * 0.01

def LinerRegression(X, Y,lambd):
    X,Y = convertData(X, Y)
    r,c = X.shape
    W = np.zeros(c)
    res = minimize(cost, W,args=(X,Y,lambd), method='BFGS', jac=gradient,options={'disp': False,'maxiter':100000})
    print(res.success)
    return res.x

In [10]:
from math import sqrt
import numpy as np

def findNearest(arrEIN,arrEOUT,Ein,Eout):
    Min = 100
    arrayIndex = ['a','b','c','d','e']
    minIdx = '-'
    for ein,eout,idR in zip(arrEIN,arrEOUT,arrayIndex):
        tm = sqrt((ein-Ein)**2 + (eout-Eout)**2)
        if tm < Min:
            Min = tm
            minIdx = idR
    print('Response value = ' + minIdx)

def printError(X_train, Y_train, X_test, Y_test,W):
    X_train,Y_train = convertData(X_train, Y_train)
    EIN = metrics(X_train,Y_train,W)
    print('EIN='+str(EIN))

    X_test,Y_test = convertData(X_test,Y_test)
    EOUT = metrics(X_test,Y_test,W)
    print('EOUT='+str(EOUT))
    return EIN,EOUT

def exercise(arrEIN,arrEOUT,k = None):
    if k is None:
        lambd = 0
    else:
        lambd = pow(10,k)
    print('---')
    print('lambda='+str(lambd))
    X_train, Y_train, X_test, Y_test = load_data()
    W = LinerRegression(X_train, Y_train,lambd)
    EIN,EOUT=printError(X_train, Y_train, X_test, Y_test,W)
    findNearest(arrEIN,arrEOUT,EIN,EOUT)
    print('---')

#Ex 2
arrEIN = np.array([0.03,0.03,0.04,0.04,0.05])
arrEOUT = np.array([0.08,0.1,0.09,0.11,0.1])
exercise(arrEIN,arrEOUT)
#Ex 3
arrEIN = np.array([0.01,0.02,0.02,0.03,0.03])
arrEOUT = np.array([0.02,0.04,0.06,0.08,0.1])
exercise(arrEIN,arrEOUT,-3)
#Ex 4
arrEIN = np.array([0.2,0.2,0.3,0.3,0.4])
arrEOUT = np.array([0.2,0.3,0.3,0.4,0.4])
exercise(arrEIN,arrEOUT,3)


---
lambda=0
True
EIN=0.0
EOUT=0.092
Response value = b
---
---
lambda=0.001
False
EIN=0.05714285714285714
EOUT=0.136
Response value = e
---
---
lambda=1000.0
False
EIN=0.5714285714285714
EOUT=0.528
Response value = e
---
