# Logitboost Assignment
Implement Logitboost using 1D linear regressors as weak learners. At each boosting
iteration choose the weak learner that obtains the largest reduction in the loss function
on the training set $D = {(x_i, y_i), i = 1, ..., N}, \text{ with } y_i ∈ {0, 1}$:

#### Import dependencies

In [1]:
import numpy as np
from matplotlib import pyplot as plt
from sklearn.metrics import accuracy_score

#### Data loading functions

In [2]:
def get_gisette():
    path = "data/gisette/"   
    
    train_x = np.loadtxt(path+"gisette_train.data")
    train_y = np.loadtxt(path+"gisette_train.labels")
    
    valid_x = np.loadtxt(path+"gisette_valid.data")
    valid_y = np.loadtxt(path+"gisette_valid.labels")
    
    test_x = np.loadtxt(path+"gisette_test.data")

    return train_x, train_y, valid_x, valid_y, test_x



def get_dexter():
    path = "data/dexter/"

    train_x = np.loadtxt(path+"dexter_train.csv", delimiter=',')
    train_y = np.loadtxt(path+"dexter_train.labels")

    valid_x = np.loadtxt(path+"dexter_valid.csv", delimiter=',')
    valid_y = np.loadtxt(path+"dexter_valid.labels")

    return train_x, train_y, valid_x, valid_y

def get_madelon():
    path = "data/MADELON/"

    train_x = np.loadtxt(path + "madelon_train.data")
    train_y = np.loadtxt(path + "madelon_train.labels")
    test_x = np.loadtxt(path + "madelon_valid.data")
    test_y = np.loadtxt(path + "madelon_valid.labels")

    return train_x, train_y, test_x, test_y

#### Normalization function

In [3]:
def normalize(train, *args):
    mean = np.average(train, axis=0)
    standard_deviation = np.std(train, axis=0)
    columns = train, *args
    return tuple(np.divide(column-mean, standard_deviation, where=standard_deviation!=0)
                 for column in columns)


def beta_selection(x, beta0, beta1, y):
    h_xi= x * beta1 + beta0
    ytilde=2*y-1
    loss=np.sum(np.log(1+np.exp(-ytilde*h_xi.T)), axis=1)
    beta_index = np.argmin(loss)
    filter = np.ones_like(beta0) # array of ones of same length as beta0
    filter[beta_index] = 0 # set the spot at beta_index to 0
    beta0[filter==1] = 0 # all indices at which filter==1 are set to 0, to beta 0 has zeroes everywhere except beta_index
    beta1[filter==1] = 0
    return beta0, beta1



#def h()
# I separated linear regression to its own function and transposed to vectors so they
# broadcast correctly
def linear_regressor_for_each_feature(x, y, w):
    mean_x = np.average(x, axis=0) # add an axis so we can tranpose this
    mean_y = np.average(y)
    denominators = np.sum(w*np.square(x-mean_x.T), axis=0)
    beta_1= np.divide(np.sum(w*(y-mean_y)[np.newaxis].T*(x-mean_x.T), axis=0), denominators, where=denominators!=0)
    #print(np.sum(np.isnan(beta_1)))
    beta_0= mean_y + beta_1 * mean_x
    beta_0 = np.ravel(beta_0) # remove extra axis
    #print(np.sum(np.isnan(beta_0)))
    #print(np.sum(np.isnan(beta_1)))
    return beta_0, beta_1

In [4]:
train_x, train_y, test_x, test_y = get_madelon()

In [5]:
# test function
beta_selection(train_x, *linear_regressor_for_each_feature(train_x, train_y, np.ones(train_x.shape[1])), train_y)

(array([ 0.,  0., nan, ...,  0.,  0.,  0.]),
 array([ 0.,  0., nan, ...,  0.,  0.,  0.]))

In [5]:
def train_logit(x_train, y_train, x_test, y_test,):
    interations=500
    x_train = np.hstack((np.ones((x_train.shape[0], 1)), x_train))
    x_test = np.hstack((np.ones((x_test.shape[0], 1)), x_test))
    y_train[y_train==-1] = 0
    y_test[y_test==-1] = 0

    h=0
    for x in range(500):
        p=1/(1+np.exp(-h)) # h doesn't make sense
        w_i=(p)*(1-p)
        Beta0, Beta1 = linear_regressor_for_each_feature(x_train, y_test, w_i)
        newbetas=beta_selection(x_train, Beta0, Beta1, y_train)
        h=h+newbetas


#### Plot the error vs iteration and display final error in a table

## a) Gisette

Using the Gisette data, train a FSA classifier on the training set, starting with β(0) = 0 to select k ∈ {10, 30, 100, 300, 500} features. Plot the training loss vs iteration number for k = 30. Report in a table the misclassification errors on the training and test set for the models obtained for all these k. Plot the misclassification error on the training and test set vs k.

In [6]:
#train_x, train_y, test_x, test_y, _ = get_gisette()

In [7]:
#calibrate_k(train_x, train_y, test_x, test_y)

In [8]:
#plot_fsa(train_x, train_y, test_x, test_y, [10, 30, 100, 300, 500]);

## b) Dexter
Repeat point a) on the dexter dataset

In [9]:
#train_x, train_y, test_x, test_y = get_dexter()

In [10]:
#linear_regressor_for_each_feature(train_x, train_y, np.ones(train_x.shape[1]))
#calibrate_k(train_x, train_y, test_x, test_y)

In [11]:
#plot_fsa(train_x, train_y, test_x, test_y, [10, 30, 100, 300, 500]);

## c) Madelon
Repeat point a) on the madelon dataset

In [12]:
#train_x, train_y, test_x, test_y = get_madelon()

In [13]:
#calibrate_k(train_x, train_y, test_x, test_y)