# Logitboost Assignment
Implement Logitboost using 1D linear regressors as weak learners. At each boosting
iteration choose the weak learner that obtains the largest reduction in the loss function
on the training set $D = {(x_i, y_i), i = 1, ..., N}, \text{ with } y_i ∈ {0, 1}$:

#### Import dependencies

In [118]:
import numpy as np
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score

#### Data loading functions

In [119]:
def get_gisette():
    path = "data/gisette/"   
    
    train_x = np.loadtxt(path+"gisette_train.data")
    train_y = np.loadtxt(path+"gisette_train.labels")
    
    valid_x = np.loadtxt(path+"gisette_valid.data")
    valid_y = np.loadtxt(path+"gisette_valid.labels")
    
    test_x = np.loadtxt(path+"gisette_test.data")

    return train_x, train_y, valid_x, valid_y, test_x



def get_dexter():
    path = "data/dexter/"

    train_x = np.loadtxt(path+"dexter_train.csv", delimiter=',')
    train_y = np.loadtxt(path+"dexter_train.labels")

    valid_x = np.loadtxt(path+"dexter_valid.csv", delimiter=',')
    valid_y = np.loadtxt(path+"dexter_valid.labels")

    return train_x, train_y, valid_x, valid_y

def get_madelon():
    path = "data/MADELON/"

    train_x = np.loadtxt(path + "madelon_train.data")
    train_y = np.loadtxt(path + "madelon_train.labels")
    test_x = np.loadtxt(path + "madelon_valid.data")
    test_y = np.loadtxt(path + "madelon_valid.labels")

    return train_x, train_y, test_x, test_y

#### Normalization function

In [120]:
def normalize(train, *args):
    mean = np.average(train, axis=0)
    standard_deviation = np.std(train, axis=0)
    columns = train, *args
    return tuple(np.divide(column-mean, standard_deviation, where=standard_deviation!=0)
                 for column in columns)

def hofx(train_x, train_y):
    h=0

    p=1/(1+np.exp(-h)) # h doesn't make sense
    w_i=(p)*(1-p)
    #mean_x = np.average(train_x, axis=0)
    #mean_y = np.average(train_y, axis=0)
    #beta_1= np.sum(w_i*(train_y-mean_y)*(train_x*mean_x))
    #beta_1 /=(np.sum(w_i*np.square(train_x-mean_x)))
    #beta_0= mean_y + beta_1 * mean_x
    beta_0, beta_1 = linear_regressor_for_each_feature(train_x, train_y, w_i)


def beta_selection(x, beta0, beta1, y):
    h_xi= x.T * beta1 + beta0
    ytilde=2*y-1
    loss=np.sum(np.ln(1+np.exp(-ytilde*h_xi)), axis=1)




#def h()
# I separated linear regression to its own function and transposed to vectors so they
# broadcast correctly
def linear_regressor_for_each_feature(x, y, w):
    mean_x = np.average(x, axis=1)[np.newaxis] # add an axis so we can tranpose this
    mean_y = np.average(y)
    beta_1= np.sum(w*(y-mean_y)[np.newaxis].T*(x-mean_x.T), axis=1) /(np.sum(w*np.square(x-mean_x.T), axis=1))
    beta_0= mean_y + beta_1 * mean_x
    beta_0 = np.ravel(beta_0) # remove extra axis
    return beta_0, beta_1

In [121]:
train_x, train_y, test_x, test_y = get_dexter()

In [122]:
# test function
linear_regressor_for_each_feature(train_x, train_y, np.ones(train_x.shape[1]))

(array([-3.51599140e-19,  1.02433401e-18,  1.76554219e-19, -3.08411675e-19,
        -7.42227191e-19,  2.11918050e-19, -3.33559739e-19,  6.79842884e-19,
         7.21325689e-19, -2.64384000e-19, -3.62693350e-19, -1.57049555e-19,
         1.99033377e-19, -1.19225897e-18,  1.08690497e-18, -2.36332291e-19,
         1.43584607e-18,  5.39691024e-19, -3.54418658e-19,  5.80869193e-19,
        -9.18237070e-19,  4.14323738e-19, -2.01680862e-19, -8.77342080e-19,
        -4.73846510e-19, -5.88507893e-19, -6.19230684e-19, -1.65439232e-19,
        -4.14983261e-19, -4.46487952e-19,  5.14764899e-19, -2.36661229e-19,
        -2.59508034e-19, -2.73579505e-19, -1.27011311e-18,  3.91194366e-19,
        -2.67816915e-19, -1.16874428e-18, -4.74069236e-19,  2.18401385e-18,
        -6.41593582e-19, -7.30968979e-19, -7.98537667e-19, -9.77616664e-19,
        -4.53719212e-19,  7.43631644e-19,  4.46661762e-19, -1.46355823e-18,
         4.34365726e-19,  5.46992896e-19, -1.28629766e-18,  4.63258915e-19,
         8.4

#### Find k for 10, 30, 100, 300, 500 features

#### Plot the error vs iteration and display final error in a table

## a) Gisette

Using the Gisette data, train a FSA classifier on the training set, starting with β(0) = 0 to select k ∈ {10, 30, 100, 300, 500} features. Plot the training loss vs iteration number for k = 30. Report in a table the misclassification errors on the training and test set for the models obtained for all these k. Plot the misclassification error on the training and test set vs k.

In [123]:
#train_x, train_y, test_x, test_y, _ = get_gisette()

In [124]:
#calibrate_k(train_x, train_y, test_x, test_y)

In [125]:
#plot_fsa(train_x, train_y, test_x, test_y, [10, 30, 100, 300, 500]);

## b) Dexter
Repeat point a) on the dexter dataset

In [126]:
#train_x, train_y, test_x, test_y = get_dexter()

In [127]:
#linear_regressor_for_each_feature(train_x, train_y, np.ones(train_x.shape[1]))
#calibrate_k(train_x, train_y, test_x, test_y)

In [128]:
#plot_fsa(train_x, train_y, test_x, test_y, [10, 30, 100, 300, 500]);

## c) Madelon
Repeat point a) on the madelon dataset

In [129]:
#train_x, train_y, test_x, test_y = get_madelon()

In [None]:
#calibrate_k(train_x, train_y, test_x, test_y)

In [131]:
#plot_fsa(train_x, train_y, test_x, test_y, [10, 30, 100, 300, 500]);
