In [21]:
import numpy as np
import math

In [32]:
def LR_SGD(x, y, T, r0, d, schdule_func, prior_v, select_type='MAP',epsilon=0.00001):
    
    N = x.shape[0]
    w = np.zeros(x.shape[1], dtype='float64')
    initial_grad = w

    for t in range(T):
        # shuffleing data
        idx = np.random.permutation(N)
        x = x[idx]
        y = y[idx]

        for i in range(N):
            x_new = x[i]
            y_new = y[i]

            # calculate gradient: MAP and MLE
            if select_type == 'MAP':
                grad = MAP_gradient(x_new, y_new, w, prior_v)
            else:
                grad = MLE_gradient(x_new, y_new, w)
            # update weights
            w = w - (schdule_func(r0, t,d) * grad)
            
            # check convergence
            if np.linalg.norm(initial_grad - grad) < epsilon * t:
                    return w
            initial_grad = grad

    return w

In [33]:
# initialize parameters
def schdule_func(r0,t,d):
    return r0 / (1 + (r0 / d) * t)

In [34]:
def MAP_gradient(x, y, w, v):
    top = -x * y * v
    sigmoid = 1 + np.exp(y * w.T @ x)
    prior = w / (2 * v)
    map_grad=top / sigmoid + prior
    
    return map_grad

def MLE_gradient(x, y, w):
    top = -x* y
    sigmoid = 1 + np.exp(y * w.T @ x)
    mle_grad=top/sigmoid
    
    return mle_grad

In [35]:
def test_error(x, y, w):
    error = 0.0
    num=x.shape[0]

    for i in range(num):
        x_new = x[i]
        y_new = y[i]

        if y_new == 0:
            y_new = -1

        if (y_new * np.dot(w, x_new)) <= 0:
            error += 1

    return error/num

In [36]:
train_data = []
with open('train.csv', 'r') as f:
    for term in f:
        train_data.append(term.strip().split(','))
        
test_data= []
with open('test.csv', 'r') as f:
    for term in f:
        test_data.append(term.strip().split(','))

In [37]:
train_data = np.array(train_data, dtype='float64')
test_data = np.array(test_data, dtype='float64')

In [38]:
train_x= train_data[:, :-1]
train_y = train_data[:, -1].astype(int)
# convert y label with -1,1
train_y[train_y == 0] = -1  

In [39]:
test_x= test_data[:, :-1]
test_y = test_data[:, -1].astype(int)
# convert y label with -1,1
test_y[test_y == 0] = -1 

In [40]:
T = 100
r0 = 0.01
d=0.02
var_list = [0.01, 0.1, 0.5, 1, 3, 5, 10, 100]

# MAP
for prior_v in var_list:
    w = LR_SGD(train_x, train_y, T, r0, d, schdule_func, prior_v, select_type='MAP')
    train_err = test_error(train_x,train_y, w)
    test_err = test_error(test_x,test_y, w)
    print("prior_v:",prior_v, 'train_error:', train_err, 'test_error:', test_err)


converged on iter 57
prior_v: 0.01 train_error: 0.26490825688073394 test_error: 0.272
converged on iter 47
prior_v: 0.1 train_error: 0.2855504587155963 test_error: 0.286
converged on iter 29
prior_v: 0.5 train_error: 0.09403669724770643 test_error: 0.1
converged on iter 89
prior_v: 1 train_error: 0.05733944954128441 test_error: 0.074
converged on iter 89
prior_v: 3 train_error: 0.04128440366972477 test_error: 0.048
converged on iter 29
prior_v: 5 train_error: 0.04243119266055046 test_error: 0.05
converged on iter 2
prior_v: 10 train_error: 0.04128440366972477 test_error: 0.058
converged on iter 1
prior_v: 100 train_error: 0.0481651376146789 test_error: 0.06


In [41]:
# MLE
for prior_v in var_list:
    w = LR_SGD(train_x, train_y, T, r0, d, schdule_func, prior_v, select_type='MLE')
    train_err = test_error(train_x,train_y, w)
    test_err = test_error(test_x,test_y, w)
    print("prior_v:",prior_v, 'train_error:', train_err, 'test_error:', test_err)

converged on iter 4
prior_v: 0.01 train_error: 0.04128440366972477 test_error: 0.048
converged on iter 7
prior_v: 0.1 train_error: 0.040137614678899085 test_error: 0.046
converged on iter 4
prior_v: 0.5 train_error: 0.040137614678899085 test_error: 0.05
converged on iter 5
prior_v: 1 train_error: 0.04128440366972477 test_error: 0.046
converged on iter 2
prior_v: 3 train_error: 0.04128440366972477 test_error: 0.046
converged on iter 4
prior_v: 5 train_error: 0.04243119266055046 test_error: 0.048
converged on iter 3
prior_v: 10 train_error: 0.04128440366972477 test_error: 0.05
converged on iter 6
prior_v: 100 train_error: 0.04243119266055046 test_error: 0.05
