In [1]:
import random
import math
import copy
import numpy
import matplotlib.pyplot as plt

In [2]:


def randvec(mu, sigma, size):
    x = [[0,0,0,0,1] for i in range(size)]
    y = [0 for i in range(size)]
    
    for j in range(size):
        q = random.randint(0,1)
        y[j] = (1-2*q)
        for i in range(4):
            x[j][i] = random.gauss((1-2*q)*mu,sigma)

    return x,y


In [3]:
def dot(x,y):
    r = 0.0
    for i in range(len(x)):
        r = r + x[i]*y[i]
    return r
    
def grad(x,y,w):
    z = math.exp(-y*(dot(w,x)))
    scalar = -y*z/(1+z)
    c = [t*scalar for t in x]
    return numpy.array(c)
    

In [4]:
def proj_1(w):
    ans = [0.0 for i in range(5)]
    ans = numpy.array(ans)
    for i in range(5):
        if(w[i] > 1):
            ans[i] = 1
        elif (w[i] <-1):
            ans[i] = -1
        else:
            ans[i] = w[i]
    return ans

def proj_2(w):
    t = 0.0
    for i in range(5):
        t  = t + w[i]*w[i];
    t = math.sqrt(t)
    if(t>1):
        z = w/t
    else:
        z = w
    return z

In [5]:
def log_error(w, T, L):
    ans = 0.0
    for i in range(len(T)):
        ans = ans + math.log(1+math.exp(-L[i]*dot(w,T[i])))
    return ans/len(T)

def bin_error(w, T, L):
    ans = 0.0
    for i in range(len(T)):
        if (L[i]*dot(w,T[i]) < 0):
            ans = ans + 1
    return ans*1.0/len(T)

In [6]:
# Test set
sig = 0.05
Te, Le = randvec(0.25, sig, 400) 
#print Te, Le


In [7]:
#scenario 1

M = math.sqrt(5)
rho = math.sqrt(5)

In [8]:
#scenario 2

M = 1
rho = math.sqrt(2)

In [9]:
means_1_log = numpy.array([0.0 for i in range(4)])
means_1_bin = numpy.array([0.0 for i in range(4)])
std_1_log = numpy.array([0.0 for i in range(4)])
std_1_bin = numpy.array([0.0 for i in range(4)])
n_vals = [50,100,500,1000]

In [10]:
iter = 30

for k in range(4):
    l_errors = numpy.array([0.0 for i in range(iter)])
    b_errors = numpy.array([0.0 for i in range(iter)])

    n = n_vals[k] # training set sizes = 50,100, 500, 1000
    T = n
    alpha = M*1.0/(rho*math.sqrt(T))

    for count in range(iter):
        Tr, Lr = randvec(0.25, sig, n) 
        w = [0.0 for i in range(5)]
        w = numpy.array(w)
        w_hat = w/2.0
        for i in range(n):
            wn = w - alpha*(grad(Tr[i],Lr[i],w))
            wn = proj_1(wn)
            w_hat = w_hat + wn/T
            w = wn
        l_errors[count] = log_error(w_hat,Te,Le)
        b_errors[count] = bin_error(w_hat,Te,Le)
    means_1_log[k] = numpy.mean(l_errors)
    means_1_bin[k] = numpy.mean(b_errors)
    std_1_log[k] = numpy.std(l_errors)
    std_1_bin[k] = numpy.std(b_errors)

In [11]:
print means_1_log
print means_1_bin
print std_1_log
print std_1_bin

[ 0.56162029  0.51934711  0.40741382  0.37817893]
[ 0.01975  0.       0.       0.     ]
[ 0.00700851  0.00226586  0.0008901   0.00039981]
[ 0.08960178  0.          0.          0.        ]


In [12]:
print w

[ 1.          1.          1.          1.          0.03845816]


In [13]:
Le[100]*dot(w,Te[100])

0.86850439931367596

In [14]:
Te[100]

[-0.2194902131180448,
 -0.21909859981902646,
 -0.239396207829466,
 -0.2289775407167684,
 1]

In [15]:
(a, b) = ((0,1),(2,3))
print a[1]


1


In [16]:
def run(config, sig):

    iter = 30
    
    M = config[0]
    rho = config[1]
    f = config[2]
    
    means_1_log = numpy.array([0.0 for i in range(4)])
    means_1_bin = numpy.array([0.0 for i in range(4)])
    std_1_log = numpy.array([0.0 for i in range(4)])
    std_1_bin = numpy.array([0.0 for i in range(4)])
    n_vals = [50,100,500,1000]
    
    Te, Le = randvec(0.25, sig, 400) 
    
    for k in range(4):
        l_errors = numpy.array([0.0 for i in range(iter)])
        b_errors = numpy.array([0.0 for i in range(iter)])

        n = n_vals[k] # training set sizes = 50,100, 500, 1000
        T = n
        alpha = M*1.0/(rho*math.sqrt(T))

        for count in range(iter):
            Tr, Lr = randvec(0.25, sig, n) 
            w = [0.0 for i in range(5)]
            w = numpy.array(w)
            w_hat = w/2.0
            for i in range(n):
                wn = w - alpha*(grad(Tr[i],Lr[i],w))
                #wn = proj_1(wn)
                wn = f(wn)
                w_hat = w_hat + wn/T
                w = wn
            l_errors[count] = log_error(w_hat,Te,Le)
            b_errors[count] = bin_error(w_hat,Te,Le)
        means_1_log[k] = numpy.mean(l_errors)
        means_1_bin[k] = numpy.mean(b_errors)
        std_1_log[k] = numpy.std(l_errors)
        std_1_bin[k] = numpy.std(b_errors)
    return ((means_1_log, std_1_log),(means_1_bin,std_1_bin))
    

In [17]:
# Running for different scenarios and sigmas
# config = (M, rho ,f)
config_1 = (math.sqrt(5), math.sqrt(5), proj_1)
config_2 = (1, math.sqrt(2), proj_2)

configs = [config_1, config_2]
sigmas = [0.05, 0.25]

out_logs = []
out_bins = []

for config in configs:
    for sig in sigmas:
        (out_log, out_bin) = run(config, sig)
        out_logs.append(out_log)
        out_bins.append(out_bin)


In [19]:
print out_logs

[(array([ 0.51883367,  0.46875119,  0.37732521,  0.35780151]), array([ 0.00746269,  0.00272275,  0.00147451,  0.00057682])), (array([ 0.52830584,  0.48697443,  0.40113382,  0.38178233]), array([ 0.01012218,  0.00882581,  0.00227069,  0.00117306])), (array([ 0.56178347,  0.53715662,  0.50193947,  0.49384851]), array([ 0.00224057,  0.00339046,  0.0009422 ,  0.00032785])), (array([ 0.56885974,  0.54421176,  0.50969495,  0.50160426]), array([ 0.01058604,  0.00494836,  0.0017543 ,  0.00089371]))]


In [22]:
n_vals = [50,100,500,1000]

# logistic error, all scenarios

p1 = plt.errorbar(n_vals, out_logs[0][0], yerr=out_logs[0][1], xerr=None, ls = 'dashed', marker = 'o')
p2 = plt.errorbar(n_vals, out_logs[1][0], yerr=out_logs[1][1], xerr=None, ls = 'solid', marker = 's')
p3 = plt.errorbar(n_vals, out_logs[2][0], yerr=out_logs[2][1], xerr=None, ls = 'dashdot', marker = '^')
p4 = plt.errorbar(n_vals, out_logs[3][0], yerr=out_logs[3][1], xerr=None, ls = 'dotted', marker = 'D')

plt.figlegend((p1,p2,p3,p4),("Scenario 1, sigma=0.05","Scenario 1, sigma=0.25","Scenario 2, sigma=0.05","Scenario 2, sigma=0.25"),'upper right')

#p1 = plt.errorbar(n_vals, means_1_log, yerr=std_1_log, xerr=None, ls = 'dashed', label = 'log error', marker = 'o')
#p2 = plt.errorbar(n_vals, means_1_bin, yerr=std_1_bin, xerr=None, ls = 'solid',label = 'bin class error', marker = 'x')
#plt.figlegend((p1,p2),("log error","bin class error"),'upper right')
plt.xlim([0,1100])
plt.xlabel('Number of training examples')
plt.ylabel('Logistic Error')
plt.suptitle('Expected Risk \n All Plots', fontsize=14)
plt.show()

# plt.errorbar(n_vals, means_1_bin, yerr=std_1_bin, xerr=None, ls = 'solid',label = 'bin class error', marker = 'x')



In [24]:
# logistic error, scenario 1
p1 = plt.errorbar(n_vals, out_logs[0][0], yerr=out_logs[0][1], xerr=None, ls = 'dashed', marker = 'o')
p2 = plt.errorbar(n_vals, out_logs[1][0], yerr=out_logs[1][1], xerr=None, ls = 'solid', marker = 's')
plt.figlegend((p1,p2),("Scenario 1, sigma = 0.05","Scenario 1, sigma = 0.25","Scenario 2, sigma = 0.05"),'upper right')

plt.xlim([0,1100])
plt.xlabel('Number of training examples')
plt.ylabel('Logistic Error')
plt.suptitle('Expected Risk \n Scenario 1', fontsize=14)
plt.show()

In [None]:
# logistic error, scenario 2

p3 = plt.errorbar(n_vals, out_logs[2][0], yerr=out_logs[2][1], xerr=None, ls = 'dashdot', marker = '^')
p4 = plt.errorbar(n_vals, out_logs[3][0], yerr=out_logs[3][1], xerr=None, ls = 'dotted', marker = 'D')
plt.figlegend((p3,p4),("Scenario 2, sigma = 0.05","Scenario 2, sigma = 0.25"),'upper right')

plt.xlim([0,1100])
plt.xlabel('Number of training examples')
plt.ylabel('Logistic Error')
plt.suptitle('Expected Risk \n Scenario 2', fontsize=14)
plt.show()

In [None]:
n_vals = [50,100,500,1000]

# binary error, all scenarios

p1 = plt.errorbar(n_vals, out_bins[0][0], yerr=out_bins[0][1], xerr=None, ls = 'dashed', marker = 'o')
p2 = plt.errorbar(n_vals, out_bins[1][0], yerr=out_bins[1][1], xerr=None, ls = 'solid', marker = 's')
p3 = plt.errorbar(n_vals, out_bins[2][0], yerr=out_bins[2][1], xerr=None, ls = 'dashdot', marker = '^')
p4 = plt.errorbar(n_vals, out_bins[3][0], yerr=out_bins[3][1], xerr=None, ls = 'dotted', marker = 'D')

plt.figlegend((p1,p2,p3,p4),("Scenario 1, sigma=0.05","Scenario 1, sigma=0.25","Scenario 2, sigma=0.05","Scenario 2, sigma=0.25"),'upper right')

#p1 = plt.errorbar(n_vals, means_1_log, yerr=std_1_log, xerr=None, ls = 'dashed', label = 'log error', marker = 'o')
#p2 = plt.errorbar(n_vals, means_1_bin, yerr=std_1_bin, xerr=None, ls = 'solid',label = 'bin class error', marker = 'x')
#plt.figlegend((p1,p2),("log error","bin class error"),'upper right')
plt.xlim([0,1100])
plt.xlabel('Number of training examples')
plt.ylabel('Binary Error')
plt.suptitle('Expected Classification Error \n All Plots', fontsize=14)
plt.show()

# plt.errorbar(n_vals, means_1_bin, yerr=std_1_bin, xerr=None, ls = 'solid',label = 'bin class error', marker = 'x')

In [None]:
# logistic error, scenario 1
p1 = plt.errorbar(n_vals, out_bins[0][0], yerr=out_bins[0][1], xerr=None, ls = 'dashed', marker = 'o')
p2 = plt.errorbar(n_vals, out_bins[1][0], yerr=out_bins[1][1], xerr=None, ls = 'solid', marker = 's')
plt.figlegend((p1,p2),("Scenario 1, sigma = 0.05","Scenario 1, sigma = 0.25","Scenario 2, sigma = 0.05"),'upper right')

plt.xlim([0,1100])
plt.xlabel('Number of training examples')
plt.ylabel('Binary Error')
plt.suptitle('Expected Classification Error \n Scenario 1', fontsize=14)
plt.show()

In [None]:
# logistic error, scenario 2

p3 = plt.errorbar(n_vals, out_bins[2][0], yerr=out_bins[2][1], xerr=None, ls = 'dashdot', marker = '^')
p4 = plt.errorbar(n_vals, out_bins[3][0], yerr=out_bins[3][1], xerr=None, ls = 'dotted', marker = 'D')
plt.figlegend((p3,p4),("Scenario 2, sigma = 0.05","Scenario 2, sigma = 0.25"),'upper right')

plt.xlim([0,1100])
plt.xlabel('Number of training examples')
plt.ylabel('Binary Error')
plt.suptitle('Expected Classification Error \n Scenario 2', fontsize=14)
plt.show()