In [1]:
import math
import numpy as np
import random

import sys


def sigmoid(x):
    return float(1) / (1 + math.exp(-x))        


def logit(w, x, y, lambd):
    return math.log(1 + math.exp(-1*y*np.dot(w, x))) + lambd * math.sqrt(sum([x ** 2 for x in w]))


def logit_loss(w, samples, lambd):
    return np.average([logit(w, s[:-1], s[-1], lambd) for s in samples]);
                                                                         
                                                                         
def logit_loss_partial_deriv(w, samples, j, lambd):
    norm = math.sqrt(sum([x ** 2 for x in w]))
    tikhonov_deriv = (lambd * w[j] / norm) if norm > 0 else 0
    return np.average([s[-1] * s[j] * (sigmoid(s[-1] * np.dot(w, s[:-1])) - 1) for s in samples]) + tikhonov_deriv


def logit_loss_gradient(w, samples, lambd):
    d = len(samples[0]) - 1
    return [logit_loss_partial_deriv(w, samples, j, lambd) for j in xrange(d)]


def logit_loss_one_var_gradient(w, samples, lambd, j):
    one_sample = [samples[j]]
    return logit_loss_gradient(w, one_sample, lambd)


In [2]:
YES_LABEL = 1
NO_LABEL = -1

def read_data_from_file(filename):
    samples = []
    classes = set()
    with open(filename) as f:
        for line in f:
            line_splitted = line.strip().split(",")
            samples.append(([float(x) for x in line_splitted[:-1]], line_splitted[-1]))
            classes.add(line_splitted[-1])
    return samples, classes


def mark_set(samples, class_label):
    marked = []
    for s in samples:
        ts = [1] + list(s[0]) # make homogenous
        ts.extend([YES_LABEL if s[1] == class_label else NO_LABEL])
        marked.append(ts)
    return marked


In [3]:
def batch_gd(samples, T, nu, lambd):
    d = len(samples[0]) -1
    w = np.zeros(d)
    w_history = [w]
    
    for t in xrange(T):
        vt = logit_loss_gradient(w, samples, lambd)
        w = np.subtract(w, np.dot(nu, vt))
        w_history.append(w)
        
    w_res = np.zeros(d)
    for w_h in w_history:
        w_res = np.add(w_res, w_h)
        
    return w_history[-1]

def stochastic_gd(samples, T, nu, lambd):
    d = len(samples[0]) - 1
    w = np.zeros(d)
    w_history = [w]
    
    for t in xrange(T):
        vt = logit_loss_one_var_gradient(w, samples, lambd, random.randint(0, len(samples) - 1))
        w = np.subtract(w, np.dot(nu, vt))
        w_history.append(w)
        
    w_res = np.zeros(d)
    for w_h in w_history:
        w_res = np.add(w_res, w_h)
        
    return w_history[-1]


def test_sample(x, y, w):
    prediction = sigmoid(np.dot(w,x))
    if (prediction >= 0.5 and y == YES_LABEL) or (prediction < 0.5 and y == NO_LABEL):
        return 1
    return 0


def test_samples(samples_for_test, w):
    predicted = sum( [test_sample(x[:-1], x[-1], w) for x in samples_for_test])
    return float(predicted) / len(samples_for_test)
    

In [4]:
T_RANGE = xrange(25, 210, 50)
LAMBDA_RANGE = np.arange(0.0001, 0.01, 0.004)
NU_RANGE = np.arange(0.0001, 0.105, 0.005)
T_RANGE_LAMBDA = xrange(25, 110, 25)
LAMBDA_RANGE_BATCH = np.arange(0.0001, 0.01, 0.004)
NU_RANGE_BATCH = np.arange(0.0001, 0.2001, 0.01)
K = 10


def k_fold(samples, k, t_range, lambda_range, nu_range, gd_func):
    block_size = int(math.floor(float(len(samples)) / k))
    
    all_logit_loss_history = []

    iters_passed = 0
    
    for T in t_range:
        for l in lambda_range:
            for nu in nu_range:              
                
                logit_loss_history = []
                
                for ind in xrange(0, len(samples), block_size):
                    fold_test = samples[ind:min(ind+block_size, len(samples))]
                    fold_train = [x for x in samples if x not in fold_test]            

                    w_fold_trained = gd_func(fold_train, T, nu, l)
                    log_loss = logit_loss(w_fold_trained, fold_test, l)
                    logit_loss_history.append([T, l, nu, log_loss])
                
                avg_loss = np.average([loss[3] for loss in logit_loss_history])
                all_logit_loss_history.append([T, l, nu, avg_loss])
                
                
                print "T=", T, ",l=", l, ",nu=", nu, "loss: ", avg_loss

    params = all_logit_loss_history[np.argmin(np.array(all_logit_loss_history)[:, -1])][:-1]
    return params[0], params[1], params[2]
    

In [5]:
data, classes = read_data_from_file("iris.data")

stochastic_log_file = open('stochastic-k-fold.log', 'w')
batch_log_file = open('batch-k-fold.log', 'w')


for c in list(classes):
    marked_data = mark_set(data, c)

    np.random.shuffle(marked_data)
    train_length = int(math.floor(len(marked_data) * 0.9))
    train_data = marked_data[:train_length]
    test_data = marked_data[train_length:]

    sys.stdout = batch_log_file
    print "batch k-fold: "
    
    T, lambd, nu = k_fold(train_data, K, T_RANGE_LAMBDA, LAMBDA_RANGE_BATCH, NU_RANGE_BATCH, batch_gd)
    w_trained = batch_gd(train_data, T, nu, lambd)
    predict = test_samples(test_data, w_trained)
    
    sys.stdout = batch_log_file
    print "batch: c=", c, ", T=", T, ", l=", lambd, ", nu=", nu
    print "batch: ", w_trained, predict
    sys.stdout = sys.__stdout__ 
    print "batch: c=", c, ", T=", T, ", l=", lambd, ", nu=", nu
    print "batch: ", w_trained, predict
   


    sys.stdout = stochastic_log_file
    print "stochastic k-fold: "
    
    T_st, lambd_st, nu_st = k_fold(train_data, K, T_RANGE, LAMBDA_RANGE, NU_RANGE, stochastic_gd)
    w_trained_st = stochastic_gd(train_data, T_st, nu_st, lambd_st)
    predict_st = test_samples(test_data, w_trained_st)
    
    sys.stdout = stochastic_log_file
    print "stochastic: c=", c, ", T=", T_st, ", l=", lambd_st, ", nu=", nu_st
    print "stochastic: ", w_trained_st, predict_st
    sys.stdout = sys.__stdout__
    print "stochastic: c=", c, ", T=", T_st, ", l=", lambd_st, ", nu=", nu_st
    print "stochastic: ", w_trained_st, predict_st

stochastic_log_file.close();
batch_log_file.close();

sys.stdout = sys.__stdout__
print "FINISHED"

