# Project 1 - Functions tests

In [1]:
from preprocessing_functions import *
%matplotlib inline 
import numpy as np   # generic stuff
import matplotlib.pyplot as plt

#### REMOVE THIS LINE BEFORE SUBMISSION
import pandas as pd
#######################################################################

from lib.helpers import * #the helper provided for the project
# choose which implementations you would like
from lib.implementations import *

%load_ext autoreload
%autoreload 2
%reload_ext autoreload

In [2]:
DATA_FOLDER = 'data/' 

y_train, tx_train, ids_train = load_csv_data(DATA_FOLDER+'train.csv',sub_sample=False)
size_train = int(3*len(y_train)/4)
y_test = y_train[:size_train]
tx_test = tx_train[:size_train,:]
ids_test = ids_train[:size_train]

y_train = y_train[size_train:]
tx_train = tx_train[size_train:,:]
ids_train = ids_train[size_train:]

#y_test, tx_test, ids_test = load_csv_data(DATA_FOLDER+'test.csv',sub_sample=False)

In [3]:
def build_k_indices(y, k_fold, seed):
    """Build k indices for k-fold."""
    num_row = y.shape[0]
    interval = int(num_row / k_fold)
    np.random.seed(seed)
    indices = np.random.permutation(num_row)
    k_indices = [indices[k * interval: (k + 1) * interval]
                 for k in range(k_fold)]
    return np.array(k_indices)

In [4]:
def cross_validation_one_fold_LS(y_cross_val_train, y_cross_val_test, tx_cross_val_train, tx_cross_val_test, \
                                 degrees, len_kept_data, stdize=False):
    
    accuracies_train_by_deg = np.zeros(len(degrees))
    accuracies_test_by_deg = np.zeros(len(degrees))
    
    # For each degree, compute the least squares weights, the predictions and the accuracies
    previous_deg = 1
    for deg_id, deg in enumerate(degrees):
        print('++ Degree', deg, '++')
        
        # Add powers of the chosen columns
        len_data = tx_cross_val_train.shape[1]
        tx_cross_val_train = add_powers(tx_cross_val_train, range(previous_deg+1, deg+1), range(len_kept_data))
        tx_cross_val_test = add_powers(tx_cross_val_test, range(previous_deg+1, deg+1), range(len_kept_data))
        if stdize: 
            tx_cross_val_train[:,len_data:] = standardize(tx_cross_val_train[:,len_data:])[0]
            tx_cross_val_test[:,len_data:] = standardize(tx_cross_val_test[:,len_data:])[0]
    
        # Compute the best weights on the training set
        weights, loss = least_squares(y_cross_val_train, tx_cross_val_train, 'mse') 

        # Compute the predictions
        y_predicted_cross_val_train = predict_labels(weights, tx_cross_val_train)
        y_predicted_cross_val_test = predict_labels(weights, tx_cross_val_test)

        # Compute the accuracies for each degree
        accuracies_train_by_deg[deg_id] = \
            np.sum(y_predicted_cross_val_train == y_cross_val_train)/len(y_cross_val_train)
        accuracies_test_by_deg[deg_id] = \
            np.sum(y_predicted_cross_val_test == y_cross_val_test)/len(y_cross_val_test)
        
        # Update the previous degree to the actual degree
        previous_deg = deg
        
    return accuracies_train_by_deg, accuracies_test_by_deg

In [23]:
def cross_validation_least_squares(y_single_jet_train, tx_single_jet_train, degrees, k_fold, seed):
    
    # Get the indices so that we get the k'th subgroup in test, others in train, for each k
    k_indices = build_k_indices(y_single_jet_train, k_fold, seed)
    
    # Initialize matrix of computed accuracies for each degree and each fold
    accuracies_train_by_fold = np.zeros([len(degrees), k_fold])
    accuracies_test_by_fold = np.zeros([len(degrees), k_fold])
    
    # Preprocess training dataset
    tx_single_jet_train_preprocessed, len_kept_data, unique_cols = \
        preprocess_data(tx_single_jet_train, [], 'none')
    
    for k in range(k_fold):
        print('--- Fold', k, '---')
        # Create the testing set for this fold number
        k_index = k_indices[k] # Indices of the testing set for fold k
        y_cross_val_test = y_single_jet_train[k_index]
        tx_cross_val_test = tx_single_jet_train_preprocessed[k_index,:]
        
        # Create the training set for this fold number
        mask = np.ones(len(y_single_jet_train), dtype=bool) # set all elements to True
        mask[k_index] = False # set test elements to False
        y_cross_val_train = y_single_jet_train[mask] # select only True elements (ie train elements)
        tx_cross_val_train = tx_single_jet_train_preprocessed[mask,:]
        
        # Compute the accuracies for each degree
        accuracies_train_by_fold[:,k], accuracies_test_by_fold[:,k] = cross_validation_one_fold_LS\
            (y_cross_val_train, y_cross_val_test, tx_cross_val_train, tx_cross_val_test, degrees, len_kept_data, \
             False)
    
    # Compute the mean accuracies over the folds, for each degree
    mean_accuracies_train_by_deg = np.mean(accuracies_train_by_fold, axis=1)
    mean_accuracies_test_by_deg = np.mean(accuracies_test_by_fold, axis=1)
    
    # Get the index of the best accuracy in the testing set
    max_id_deg_test = np.unravel_index(mean_accuracies_test_by_deg.argmax(), mean_accuracies_test_by_deg.shape)
    
    # Find the optimal degree and the corresponding accuracies in the training and testing sets
    best_deg = degrees[max_id_deg_test[0]]
    best_accuracy_test = mean_accuracies_test_by_deg[max_id_deg_test[0]]
    corresponding_accuracy_train = mean_accuracies_train_by_deg[max_id_deg_test[0]]
    
    print('Best accuracy test =', best_accuracy_test, 'with degree =', best_deg)
    print('Corresponding accuracy train =', corresponding_accuracy_train)
    
    return best_deg, best_accuracy_test, corresponding_accuracy_train

In [24]:
degrees = range(6,11)
k_fold = 5
seed = 1

In [25]:
mask_jets_train = split_jets_mask(tx_train)
mask_jets_test = split_jets_mask(tx_test)
len_mask = len(mask_jets_train)

y_predicted_train = np.zeros(len(y_train))
y_predicted_test = np.zeros(tx_test.shape[0])
best_degrees = np.zeros(len_mask)

In [None]:
for jet_id in range(len_mask):
    print('***** Jet ', jet_id, '*****')
    # SEPARATE THE WHOLE DATA SET TO GET ONLY THE PART THAT HAVE THE RIGHT NUMBER OF JETS
    tx_single_jet_train = tx_train[mask_jets_train[jet_id]]
    tx_single_jet_test = tx_test[mask_jets_test[jet_id]]
    y_single_jet_train = y_train[mask_jets_train[jet_id]]
    
    # CALL CROSS VALIDATION FOR A SINGLE JET ON TRAIN PART, FIND BEST DEG, BEST ACCURACY ON TESTING CROSS VAL
    best_deg, best_accuracy, corresponding_accuracy_train = cross_validation_least_squares(y_single_jet_train, \
                                                                                           tx_single_jet_train, \
                                                                                           degrees, k_fold, seed)
    
    # KEEP IN MEMORY THE BEST DEGREE FOR THIS JET
    best_degrees[jet_id] = best_deg
    
    # PREPROCESS FULL TRAINING AND TESTING DATA
    tx_single_jet_train_preprocessed, len_kept_data, unique_cols = \
        preprocess_data(tx_single_jet_train, [], 'none')
    tx_single_jet_test_preprocessed = preprocess_data(tx_single_jet_test, unique_cols, 'none')[0]
    
    # ADD POWERS TO THE CHOSEN COLUMNS
    tx_single_jet_train_preprocessed = add_powers(tx_single_jet_train_preprocessed, range(2,best_deg+1), \
                                                  range(len_kept_data))
    tx_single_jet_test_preprocessed = add_powers(tx_single_jet_test_preprocessed, range(2,best_deg+1), \
                                                 range(len_kept_data))
    
    # COMPUTE THE BEST WEIGHTS AND FULL ACCURACY ON TRAINING FULL SET - ONE JET
    weights, loss = least_squares(y_single_jet_train, tx_single_jet_train_preprocessed, 'mse')
    
    # COMPUTE THE PREDICTIONS ON THE FULL TESTING SET - SINGLE JET
    y_predicted_single_jet_train = predict_labels(weights, tx_single_jet_train_preprocessed)
    y_predicted_single_jet_test = predict_labels(weights, tx_single_jet_test_preprocessed)
    
    # ADD THE PREDICTIONS TO y_predicted_test AND y_predicted_train
    y_predicted_train[mask_jets_train[jet_id]] = y_predicted_single_jet_train
    y_predicted_test[mask_jets_test[jet_id]] = y_predicted_single_jet_test
    
    # COMPUTE THE ACCURACY train ON JET
    accuracy_train_single_jet = np.sum(y_predicted_single_jet_train == y_single_jet_train)/len(y_single_jet_train)
    
    # PRINT ACCURACY train ON JET
    print('Accuracy full train on jet', jet_id, '=', accuracy_train_single_jet)

***** Jet  0 *****
--- Fold 0 ---
++ Degree 6 ++
++ Degree 7 ++
++ Degree 8 ++
++ Degree 9 ++
++ Degree 10 ++
--- Fold 1 ---
++ Degree 6 ++
++ Degree 7 ++
++ Degree 8 ++
++ Degree 9 ++
++ Degree 10 ++
--- Fold 2 ---
++ Degree 6 ++
++ Degree 7 ++
++ Degree 8 ++
++ Degree 9 ++
++ Degree 10 ++
--- Fold 3 ---
++ Degree 6 ++
++ Degree 7 ++
++ Degree 8 ++
++ Degree 9 ++
++ Degree 10 ++
--- Fold 4 ---
++ Degree 6 ++
++ Degree 7 ++
++ Degree 8 ++
++ Degree 9 ++
++ Degree 10 ++
Best accuracy test = 0.843811434303 with degree = 7
Corresponding accuracy train = 0.847024018453
Accuracy full train on jet 0 = 0.846076700899
***** Jet  1 *****
--- Fold 0 ---
++ Degree 6 ++
++ Degree 7 ++
++ Degree 8 ++
++ Degree 9 ++
++ Degree 10 ++
--- Fold 1 ---
++ Degree 6 ++
++ Degree 7 ++
++ Degree 8 ++
++ Degree 9 ++
++ Degree 10 ++
--- Fold 2 ---
++ Degree 6 ++
++ Degree 7 ++
++ Degree 8 ++
++ Degree 9 ++
++ Degree 10 ++
--- Fold 3 ---
++ Degree 6 ++
++ Degree 7 ++
++ Degree 8 ++
++ Degree 9 ++
++ Degree 10 ++

In [14]:
# CREATE CSV SUBMISSION
#create_csv_submission(ids_test, y_predicted_test, 'output/trial.csv')

# COMPUTE ACCURACY ON FULL train
total_accuracy_train = np.sum(y_predicted_train == y_train)/len(y_train)*100
print('Total accuracy train =', total_accuracy_train, 'with degrees =', best_degrees)

Total accuracy train = 97.84 with degrees = [ 6.  7.  6.  6.]


In [21]:
total_accuracy_test = np.sum(y_predicted_test == y_test)/len(y_test)*100
print('Total accuracy test =', total_accuracy_test, 'with degrees =', best_degrees)

Total accuracy test = 65.2533333333 with degrees = [ 6.  7.  6.  6.]


## Gradient descent 

In [22]:
def cross_validation_one_fold_GD(y_cross_val_train, y_cross_val_test, tx_cross_val_train, tx_cross_val_test, \
                                 degrees, gammas, len_kept_data, max_iters, stdize=False):
    
    accuracies_train_by_deg = np.zeros([len(degrees), len(gammas)])
    accuracies_test_by_deg = np.zeros([len(degrees), len(gammas)])
    
    # For each degree, compute the least squares weights, the predictions and the accuracies
    previous_deg = 1
    for deg_id, deg in enumerate(degrees):
        print('++ Degree', deg, '++')
                
        # Add powers of the chosen columns
        len_data = tx_cross_val_train.shape[1]
        tx_cross_val_train = add_powers(tx_cross_val_train, range(previous_deg+1, deg+1), range(len_kept_data))
        tx_cross_val_test = add_powers(tx_cross_val_test, range(previous_deg+1, deg+1), range(len_kept_data))
        if stdize: 
            tx_cross_val_train[:,len_data:] = standardize(tx_cross_val_train[:,len_data:])[0]
            tx_cross_val_test[:,len_data:] = standardize(tx_cross_val_test[:,len_data:])[0]
                
        for gamma_id, single_gamma in enumerate(gammas):
            print('>> Gamma', single_gamma, '<<')
            
            # Compute the best weights on the training set
            initial_w = np.zeros(tx_cross_val_train.shape[1])
            weights, loss = least_squares_GD(y_cross_val_train, tx_cross_val_train, initial_w, max_iters, \
                                             single_gamma, fct='mse');

            # Compute the predictions
            y_predicted_cross_val_train = predict_labels(weights, tx_cross_val_train)
            y_predicted_cross_val_test = predict_labels(weights, tx_cross_val_test)

            # Compute the accuracies for each degree
            accuracies_train_by_deg[deg_id, gamma_id] = \
                np.sum(y_predicted_cross_val_train == y_cross_val_train)/len(y_cross_val_train)
            accuracies_test_by_deg[deg_id, gamma_id] = \
                np.sum(y_predicted_cross_val_test == y_cross_val_test)/len(y_cross_val_test)

        # Update the previous degree to the actual degree
        previous_deg = deg
        
    return accuracies_train_by_deg, accuracies_test_by_deg

In [23]:
def cross_validation_GD(y_single_jet_train, tx_single_jet_train, degrees, gammas, k_fold, seed, max_iters):
    
    # Get the indices so that we get the k'th subgroup in test, others in train, for each k
    k_indices = build_k_indices(y_single_jet_train, k_fold, seed)
    
    # Initialize matrix of computed accuracies for each degree and each fold
    accuracies_train_by_fold = np.zeros([len(degrees), len(gammas), k_fold])
    accuracies_test_by_fold = np.zeros([len(degrees), len(gammas), k_fold])
    
    # Preprocess training dataset
    tx_single_jet_train_preprocessed, len_kept_data, unique_cols = \
        preprocess_data(tx_single_jet_train, [], 'after')
    
    for k in range(k_fold):
        print('--- Fold', k, '---')
        # Create the testing set for this fold number
        k_index = k_indices[k] # Indices of the testing set for fold k
        y_cross_val_test = y_single_jet_train[k_index]
        tx_cross_val_test = tx_single_jet_train_preprocessed[k_index,:]
        
        # Create the training set for this fold number
        mask = np.ones(len(y_single_jet_train), dtype=bool) # set all elements to True
        mask[k_index] = False # set test elements to False
        y_cross_val_train = y_single_jet_train[mask] # select only True elements (ie train elements)
        tx_cross_val_train = tx_single_jet_train_preprocessed[mask,:]
        
        # Compute the accuracies for each degree
        accuracies_train_by_fold[:,:,k], accuracies_test_by_fold[:,:,k] = cross_validation_one_fold_GD\
            (y_cross_val_train, y_cross_val_test, tx_cross_val_train, tx_cross_val_test, \
                                 degrees, gammas, len_kept_data, max_iters, True)
    
    # Compute the mean accuracies over the folds, for each degree
    mean_accuracies_train_by_deg = np.mean(accuracies_train_by_fold, axis=2)
    mean_accuracies_test_by_deg = np.mean(accuracies_test_by_fold, axis=2)
    
    # Get the index of the best accuracy in the testing set
    max_id_deg_test, max_id_gamma_test = \
        np.unravel_index(mean_accuracies_test_by_deg.argmax(), mean_accuracies_test_by_deg.shape)
    
    # Find the optimal degree and the corresponding accuracies in the training and testing sets
    best_deg = degrees[max_id_deg_test]
    best_gamma = gammas[max_id_gamma_test]
    best_accuracy_test = mean_accuracies_test_by_deg[max_id_deg_test, max_id_gamma_test]
    corresponding_accuracy_train = mean_accuracies_train_by_deg[max_id_deg_test, max_id_gamma_test]
    
    print('Best accuracy test =', best_accuracy_test, 'with degree =', best_deg, 'and gamma =', best_gamma)
    print('Corresponding accuracy train =', corresponding_accuracy_train)
    
    return best_deg, best_gamma, best_accuracy_test, corresponding_accuracy_train

In [24]:
degrees = range(6,11)
gammas = np.logspace(-5,-1,5)
k_fold = 5
seed = 1
max_iters = 300

In [25]:
mask_jets_train = split_jets_mask(tx_train)
mask_jets_test = split_jets_mask(tx_test)
len_mask = len(mask_jets_train)

y_predicted_train = np.zeros(len(y_train))
y_predicted_test = np.zeros(tx_test.shape[0])
best_degrees = np.zeros(len_mask)
best_gammas = np.zeros(len_mask)

In [26]:
for jet_id in range(len_mask):
    print('***** Jet ', jet_id, '*****')
    # SEPARATE THE WHOLE DATA SET TO GET ONLY THE PART THAT HAVE THE RIGHT NUMBER OF JETS
    tx_single_jet_train = tx_train[mask_jets_train[jet_id]]
    tx_single_jet_test = tx_test[mask_jets_test[jet_id]]
    y_single_jet_train = y_train[mask_jets_train[jet_id]]
    
    # CALL CROSS VALIDATION FOR A SINGLE JET ON TRAIN PART, FIND BEST DEG, BEST ACCURACY ON TESTING CROSS VAL
    best_deg, best_gamma, best_accuracy_test, corresponding_accuracy_train = \
        cross_validation_GD(y_single_jet_train, tx_single_jet_train, degrees, gammas, k_fold, seed, max_iters)
    
    # KEEP IN MEMORY THE BEST DEGREE FOR THIS JET
    best_degrees[jet_id] = best_deg
    best_gammas[jet_id] = best_gamma
    
    # PREPROCESS FULL TRAINING AND TESTING DATA
    tx_single_jet_train_preprocessed, len_kept_data, unique_cols = \
        preprocess_data(tx_single_jet_train, [], 'after')
    tx_single_jet_test_preprocessed = preprocess_data(tx_single_jet_test, unique_cols, 'after')[0]
    
    # ADD POWERS TO THE CHOSEN COLUMNS
    len_data = tx_single_jet_train_preprocessed.shape[1]
    tx_single_jet_train_preprocessed = add_powers(tx_single_jet_train_preprocessed, range(2,best_deg+1), \
                                                  range(len_kept_data))
    tx_single_jet_test_preprocessed = add_powers(tx_single_jet_test_preprocessed, range(2,best_deg+1), \
                                                 range(len_kept_data))
    tx_single_jet_train_preprocessed[:,len_data:] = standardize(tx_single_jet_train_preprocessed[:,len_data:])[0]
    tx_single_jet_test_preprocessed[:,len_data:] = standardize(tx_single_jet_test_preprocessed[:,len_data:])[0]
    
    # COMPUTE THE BEST WEIGHTS AND FULL ACCURACY ON TRAINING FULL SET - ONE JET
    initial_w = np.zeros(tx_single_jet_train_preprocessed.shape[1])
    weights, loss = least_squares_GD(y_single_jet_train, tx_single_jet_train_preprocessed, initial_w, max_iters, \
                                             best_gamma, fct='mse');
    
    # COMPUTE THE PREDICTIONS ON THE FULL TESTING SET - SINGLE JET
    y_predicted_single_jet_train = predict_labels(weights, tx_single_jet_train_preprocessed)
    y_predicted_single_jet_test = predict_labels(weights, tx_single_jet_test_preprocessed)
    
    # ADD THE PREDICTIONS TO y_predicted_test AND y_predicted_train
    y_predicted_train[mask_jets_train[jet_id]] = y_predicted_single_jet_train
    y_predicted_test[mask_jets_test[jet_id]] = y_predicted_single_jet_test
    
    # COMPUTE THE ACCURACY train ON JET
    accuracy_train_single_jet = np.sum(y_predicted_single_jet_train == y_single_jet_train)/len(y_single_jet_train)
    
    # PRINT ACCURACY train ON JET
    print('Accuracy full train on jet', jet_id, '=', accuracy_train_single_jet)

***** Jet  0 *****
--- Fold 0 ---
++ Degree 6 ++
>> Gamma 1e-05 <<
Gradient Descent(0/299): loss=0.4999999999999999, w0=-4.468678085971062e-07, w1=-2.8951081650510586e-06
Gradient Descent(1/299): loss=0.49998009864094545, w0=-8.935129252750068e-07, w1=-5.7897363107493205e-06
Gradient Descent(2/299): loss=0.4999602032022705, w0=-1.3399354445383042e-06, w1=-8.683884561513866e-06
Gradient Descent(3/299): loss=0.49994031368090047, w0=-1.7861354608539977e-06, w1=-1.1577553041724759e-05
Gradient Descent(4/299): loss=0.49992043007376225, w0=-2.2321130686514964e-06, w1=-1.4470741875723064e-05
Gradient Descent(5/299): loss=0.49990055237778475, w0=-2.677868362322635e-06, w1=-1.7363451187810854e-05
Gradient Descent(6/299): loss=0.4998806805898989, w0=-3.1234014362216873e-06, w1=-2.025568110225123e-05
Gradient Descent(7/299): loss=0.49986081470703747, w0=-3.568712384665381e-06, w1=-2.314743174326833e-05
Gradient Descent(8/299): loss=0.4998409547261352, w0=-4.013801301932913e-06, w1=-2.603870323504

Gradient Descent(133/299): loss=0.4777631061550427, w0=-0.0004330348509917416, w1=-0.0034958890125139637
Gradient Descent(134/299): loss=0.4776218997905834, w0=-0.0004352310869277964, w1=-0.003519379331533181
Gradient Descent(135/299): loss=0.4774810049985013, w0=-0.00043741487111861996, w1=-0.0035428353146701253
Gradient Descent(136/299): loss=0.47734042040639946, w0=-0.00043958625869325074, w1=-0.0035662570443541013
Gradient Descent(137/299): loss=0.47720014464972327, w0=-0.00044174530455723333, w1=-0.003589644602767453
Gradient Descent(138/299): loss=0.4770601763717084, w0=-0.00044389206339348814, w1=-0.0036129980718463935
Gradient Descent(139/299): loss=0.4769205142233307, w0=-0.0004460265896631774, w1=-0.003636317533281831
Gradient Descent(140/299): loss=0.476781156863255, w0=-0.00044814893760656855, w1=-0.0036596030685201956
Gradient Descent(141/299): loss=0.4766421029577852, w0=-0.0004502591612438934, w1=-0.0036828547587642595
Gradient Descent(142/299): loss=0.476503351180814, w

Gradient Descent(202/299): loss=0.19602303633387985, w0=0.012080450258335264, w1=-0.047759994643453665
Gradient Descent(203/299): loss=0.1958584399655177, w0=0.012146984395371568, w1=-0.04778048034918043
Gradient Descent(204/299): loss=0.19569562994252132, w0=0.01221336493463504, w1=-0.047800568836137654
Gradient Descent(205/299): loss=0.1955345771312722, w0=0.012279592589329955, w1=-0.04782026363112197
Gradient Descent(206/299): loss=0.1953752529377108, w0=0.012345668079959984, w1=-0.047839568216281825
Gradient Descent(207/299): loss=0.19521762929670197, w0=0.012411592133919597, w1=-0.0478584860300604
Gradient Descent(208/299): loss=0.19506167866162044, w0=0.012477365485098089, w1=-0.04787702046811211
Gradient Descent(209/299): loss=0.19490737399415275, w0=0.012542988873495893, w1=-0.04789517488419371
Gradient Descent(210/299): loss=0.1947546887543094, w0=0.012608463044852794, w1=-0.047912952591030546
Gradient Descent(211/299): loss=0.1946035968906437, w0=0.01267378875028773, w1=-0.04

Gradient Descent(56/299): loss=0.4895724627209279, w0=-0.00022109186595251765, w1=-0.001576593612049372
Gradient Descent(57/299): loss=0.4894007237453892, w0=-0.00022440873767821847, w1=-0.001602980315958779
Gradient Descent(58/299): loss=0.48922944373669, w0=-0.00022770787946159443, w1=-0.0016293253376602067
Gradient Descent(59/299): loss=0.48905862040816644, w0=-0.00023098936992794737, w1=-0.001655628784127119
Gradient Descent(60/299): loss=0.48888825148743137, w0=-0.0002342532873775746, w1=-0.001681890761993048
Gradient Descent(61/299): loss=0.4887183347162742, w0=-0.00023749970978707022, w1=-0.0017081113775527942
Gradient Descent(62/299): loss=0.4885488678505619, w0=-0.00024072871481062118, w1=-0.0017342907367636252
Gradient Descent(63/299): loss=0.4883798486601411, w0=-0.00024394037978129842, w1=-0.0017604289452464686
Gradient Descent(64/299): loss=0.48821127492874017, w0=-0.0002471347817123427, w1=-0.0017865261082871
Gradient Descent(65/299): loss=0.48804314445387253, w0=-0.00025

Gradient Descent(153/299): loss=2.0596638122967546e+140, w0=1.0836569803569184e+69, w1=8.963701402023777e+68
Gradient Descent(154/299): loss=1.762445636947949e+141, w0=-3.1699435505696777e+69, w1=-2.622086874687867e+69
Gradient Descent(155/299): loss=1.5081172979065384e+142, w0=9.272807074512334e+69, w1=7.67020148268006e+69
Gradient Descent(156/299): loss=1.2904896108929407e+143, w0=-2.7125073260586443e+70, w1=-2.2437086792523044e+70
Gradient Descent(157/299): loss=1.1042665170238104e+144, w0=7.934701902884956e+70, w1=6.563359057411768e+70
Gradient Descent(158/299): loss=9.449162010503466e+144, w0=-2.321081078115585e+71, w1=-1.9199320533387727e+71
Gradient Descent(159/299): loss=8.085608077784059e+145, w0=6.789690951373241e+71, w1=5.616238662541265e+71
Gradient Descent(160/299): loss=6.918820728743479e+146, w0=-1.9861392887053635e+72, w1=-1.6428777601671562e+72
Gradient Descent(161/299): loss=5.920405715436277e+147, w0=5.8099099095831604e+72, w1=4.8057917354077904e+72
Gradient Descent(

Gradient Descent(264/299): loss=0.4607652918882375, w0=-0.000610087714331797, w1=-0.006295766548122365
Gradient Descent(265/299): loss=0.4606545236871765, w0=-0.0006109389975258811, w1=-0.006315329134119397
Gradient Descent(266/299): loss=0.46054394412122823, w0=-0.0006117835064619456, w1=-0.006334866525518219
Gradient Descent(267/299): loss=0.460433552472979, w0=-0.0006126212749155066, w1=-0.006354378779544638
Gradient Descent(268/299): loss=0.46032334802879266, w0=-0.0006134523365134581, w1=-0.00637386595325278
Gradient Descent(269/299): loss=0.46021333007878695, w0=-0.0006142767247346922, w1=-0.006393328103525689
Gradient Descent(270/299): loss=0.4601034979168093, w0=-0.0006150944729107181, w1=-0.006412765287075924
Gradient Descent(271/299): loss=0.4599938508404138, w0=-0.0006159056142262784, w1=-0.006432177560446149
Gradient Descent(272/299): loss=0.45988438815083643, w0=-0.000616710181719962, w1=-0.00645156498000973
Gradient Descent(273/299): loss=0.45977510915297287, w0=-0.000617

Gradient Descent(239/299): loss=3.2079896175823038e+230, w0=1.3556795942266818e+114, w1=1.1141613170124682e+114
Gradient Descent(240/299): loss=3.0236221721754938e+231, w0=-4.1620217240801137e+114, w1=-3.4205454041526506e+114
Gradient Descent(241/299): loss=2.8498505699533174e+232, w0=1.2777668783600756e+115, w1=1.0501289789204627e+115
Gradient Descent(242/299): loss=2.686065853664424e+233, w0=-3.922824780053031e+115, w1=-3.2239620939682152e+115
Gradient Descent(243/299): loss=2.5316940636435456e+234, w0=1.2043319102736687e+116, w1=9.897766647701628e+115
Gradient Descent(244/299): loss=2.3861942264535105e+235, w0=-3.697374803683727e+116, w1=-3.0386766890231467e+116
Gradient Descent(245/299): loss=2.2490564591226834e+236, w0=1.1351173478255523e+117, w1=9.328928786734747e+116
Gradient Descent(246/299): loss=2.1198001823344067e+237, w0=-3.484881738391196e+117, w1=-2.8640398836226876e+117
Gradient Descent(247/299): loss=1.9979724363068408e+238, w0=1.0698806386702173e+118, w1=8.792782796933

Gradient Descent(70/299): loss=0.4237140586297597, w0=-0.0006252606203721327, w1=-0.013171245814291504
Gradient Descent(71/299): loss=0.4230516956944954, w0=-0.0006224131344985909, w1=-0.01329314026576646
Gradient Descent(72/299): loss=0.4223946020681337, w0=-0.0006195417690955639, w1=-0.013413988978456064
Gradient Descent(73/299): loss=0.4217426661777172, w0=-0.0006166499955300273, w1=-0.01353380879054979
Gradient Descent(74/299): loss=0.4210957800470137, w0=-0.0006137410833472386, w1=-0.013652616149496278
Gradient Descent(75/299): loss=0.42045383914986934, w0=-0.0006108181096350726, w1=-0.013770427123584682
Gradient Descent(76/299): loss=0.41981674227083565, w0=-0.0006078839679821027, w1=-0.013887257413123457
Gradient Descent(77/299): loss=0.41918439137264085, w0=-0.0006049413770466718, w1=-0.014003122361231937
Gradient Descent(78/299): loss=0.41855669147010677, w0=-0.0006019928887534648, w1=-0.014118036964259452
Gradient Descent(79/299): loss=0.4179335505101411, w0=-0.00059904089613

Gradient Descent(272/299): loss=7.951733879008248e+273, w0=-6.755428360297914e+135, w1=-5.500410510990637e+135
Gradient Descent(273/299): loss=8.244895009374536e+274, w0=2.175276855482839e+136, w1=1.771155734628322e+136
Gradient Descent(274/299): loss=8.548864279156118e+275, w0=-7.004484609456544e+136, w1=-5.703197297799164e+136
Gradient Descent(275/299): loss=8.864040158223384e+276, w0=2.2554740340499485e+137, w1=1.8364539482153121e+137
Gradient Descent(276/299): loss=9.190835807063815e+277, w0=-7.262722958096765e+137, w1=-5.913460341301989e+137
Gradient Descent(277/299): loss=9.529679618389385e+278, w0=2.338627887963426e+138, w1=1.904159548467564e+138
Gradient Descent(278/299): loss=9.881015778711699e+279, w0=-7.530481927942777e+138, w1=-6.131475272939583e+138
Gradient Descent(279/299): loss=1.0245304850621072e+281, w0=2.4248474226678487e+139, w1=1.9743612898890386e+139
Gradient Descent(280/299): loss=1.0623024376532811e+282, w0=-7.808112521193209e+139, w1=-6.357527886014717e+139
Gra

Gradient Descent(70/299): loss=0.42330938140001534, w0=-0.0006008199129222994, w1=-0.013160824916725182
Gradient Descent(71/299): loss=0.4226463883125681, w0=-0.0005979181230311994, w1=-0.013282797143228301
Gradient Descent(72/299): loss=0.4219886701088193, w0=-0.0005950006640423847, w1=-0.013403727969235849
Gradient Descent(73/299): loss=0.42133611493435724, w0=-0.0005920708255421886, w1=-0.0135236340447876
Gradient Descent(74/299): loss=0.420688614576115, w0=-0.0005891316974090628, w1=-0.013642531631906929
Gradient Descent(75/299): loss=0.42004606431202135, w0=-0.0005861861793492534, w1=-0.013760436616288592
Gradient Descent(76/299): loss=0.41940836276817794, w0=-0.0005832369900067099, w1=-0.013877364518570145
Gradient Descent(77/299): loss=0.41877541178311706, w0=-0.0005802866756658557, w1=-0.013993330505203357
Gradient Descent(78/299): loss=0.4181471162787238, w0=-0.0005773376185650394, w1=-0.014108349398941308
Gradient Descent(79/299): loss=0.41752338413743717, w0=-0.0005743920448

Gradient Descent(171/299): loss=1.9245868699304003e+178, w0=1.0495158282754281e+88, w1=8.49285765387416e+87
Gradient Descent(172/299): loss=2.192587911308464e+179, w0=-3.542409314528551e+88, w1=-2.8665768775955587e+88
Gradient Descent(173/299): loss=2.4979084207250487e+180, w0=1.1956621723636774e+89, w1=9.675498318775018e+88
Gradient Descent(174/299): loss=2.8457451790863663e+181, w0=-4.0356940813083073e+89, w1=-3.2657511629390247e+89
Gradient Descent(175/299): loss=3.242018625303594e+182, w0=1.3621595710191143e+90, w1=1.1022823121722036e+90
Gradient Descent(176/299): loss=3.693473626542306e+183, w0=-4.597669346427424e+90, w1=-3.7205109486487374e+90
Gradient Descent(177/299): loss=4.207794280850595e+184, w0=1.5518419331197237e+91, w1=1.2557764527434998e+91
Gradient Descent(178/299): loss=4.79373470619154e+185, w0=-5.237900344573589e+91, w1=-4.238596582648935e+91
Gradient Descent(179/299): loss=5.461268042006075e+186, w0=1.767937792770515e+92, w1=1.430644837398685e+92
Gradient Descent(1

Gradient Descent(59/299): loss=0.42952892491827566, w0=-0.0006708561717394469, w1=-0.012622899209406135
Gradient Descent(60/299): loss=0.428793458277119, w0=-0.0006662161868897389, w1=-0.012769896001986211
Gradient Descent(61/299): loss=0.42806501418620235, w0=-0.0006614631063210475, w1=-0.012915578420204788
Gradient Descent(62/299): loss=0.42734341819629784, w0=-0.0006566053604654444, w1=-0.013059969129692811
Gradient Descent(63/299): loss=0.42662850215526127, w0=-0.0006516509967902899, w1=-0.013203090235187375
Gradient Descent(64/299): loss=0.4259201039173696, w0=-0.0006466076953024107, w1=-0.013344963297837095
Gradient Descent(65/299): loss=0.42521806706899246, w0=-0.00064148278344369, w1=-0.013485609351906187
Gradient Descent(66/299): loss=0.42452224066954725, w0=-0.0006362832504017684, w1=-0.01362504892089942
Gradient Descent(67/299): loss=0.4238324790067597, w0=-0.0006310157608586308, w1=-0.013763302033129218
Gradient Descent(68/299): loss=0.42314864136532154, w0=-0.0006256866681

>> Gamma 1e-05 <<
Gradient Descent(0/299): loss=0.4999999999999999, w0=-4.89337814920366e-07, w1=-3.062178596808333e-06
Gradient Descent(1/299): loss=0.4999780024429258, w0=-9.784161825040893e-07, w1=-6.123854110474375e-06
Gradient Descent(2/299): loss=0.49995601226245245, w0=-1.467235215246718e-06, w1=-9.185026679546804e-06
Gradient Descent(3/299): loss=0.4999340294543642, w0=-1.955795025597646e-06, w1=-1.2245696442527116e-05
Gradient Descent(4/299): loss=0.499912054014448, w0=-2.444095725960131e-06, w1=-1.530586353786964e-05
Gradient Descent(5/299): loss=0.4998900859384937, w0=-2.932137428691313e-06, w1=-1.8365528103981564e-05
Gradient Descent(6/299): loss=0.499868125222294, w0=-3.419920246102234e-06, w1=-2.142469027922294e-05
Gradient Descent(7/299): loss=0.49984617186164454, w0=-3.907444290457856e-06, w1=-2.4483350201906722e-05
Gradient Descent(8/299): loss=0.4998242258523439, w0=-4.394709673977079e-06, w1=-2.7541508010298754e-05
Gradient Descent(9/299): loss=0.4998022871901932, w0

Gradient Descent(264/299): loss=0.33730128772011164, w0=-4.377660680776885e-05, w1=-0.029630135819362163
Gradient Descent(265/299): loss=0.33698944975909073, w0=-4.19821310930627e-05, w1=-0.029681298949748063
Gradient Descent(266/299): loss=0.33667838531851974, w0=-4.018464216054214e-05, w1=-0.02973230427529553
Gradient Descent(267/299): loss=0.33636809115314814, w0=-3.838401192000666e-05, w1=-0.029783152854862077
Gradient Descent(268/299): loss=0.3360585640472232, w0=-3.658011432390689e-05, w1=-0.02983384573688499
Gradient Descent(269/299): loss=0.3357498008140254, w0=-3.477282534420122e-05, w1=-0.02988438395950856
Gradient Descent(270/299): loss=0.33544179829541465, w0=-3.296202294936917e-05, w1=-0.029934768550709437
Gradient Descent(271/299): loss=0.33513455336138465, w0=-3.1147587081584395e-05, w1=-0.029985000528420198
Gradient Descent(272/299): loss=0.3348280629096265, w0=-2.9329399634050102e-05, w1=-0.03003508090065111
Gradient Descent(273/299): loss=0.3345223238651015, w0=-2.750

Gradient Descent(168/299): loss=0.49634447935466325, w0=-7.903591099793505e-05, w1=-0.0005103963336138907
Gradient Descent(169/299): loss=0.4963233316401969, w0=-7.948219390576869e-05, w1=-0.0005133745155518361
Gradient Descent(170/299): loss=0.49630219092555583, w0=-7.992823156568796e-05, w1=-0.0005163522122123071
Gradient Descent(171/299): loss=0.49628105720671684, w0=-8.037402408695975e-05, w1=-0.0005193294237307307
Gradient Descent(172/299): loss=0.49625993047965944, w0=-8.081957157880492e-05, w1=-0.0005223061502424871
Gradient Descent(173/299): loss=0.49623881074036597, w0=-8.126487415039828e-05, w1=-0.0005252823918829088
Gradient Descent(174/299): loss=0.4962176979848214, w0=-8.170993191086864e-05, w1=-0.0005282581487872812
Gradient Descent(175/299): loss=0.49619659220901385, w0=-8.215474496929879e-05, w1=-0.0005312334210908425
Gradient Descent(176/299): loss=0.4961754934089339, w0=-8.259931343472558e-05, w1=-0.0005342082089287837
Gradient Descent(177/299): loss=0.496154401580575

Gradient Descent(2/299): loss=0.49563653763996324, w0=-0.00013898693056109822, w1=-0.0009035314965489413
Gradient Descent(3/299): loss=0.4935629399850715, w0=-0.00018033602005498053, w1=-0.001194909594957058
Gradient Descent(4/299): loss=0.4915560019604319, w0=-0.0002193785725269842, w1=-0.0014816152138076638
Gradient Descent(5/299): loss=0.48961199725418764, w0=-0.00025621770570538794, w1=-0.0017637774770125564
Gradient Descent(6/299): loss=0.4877274514898854, w0=-0.0002909521874934823, w1=-0.0020415210238002002
Gradient Descent(7/299): loss=0.485899123548157, w0=-0.0003236766166315822, w1=-0.002314966180677796
Gradient Descent(8/299): loss=0.48412398833862036, w0=-0.0003544815958732354, w1=-0.0025842291265096473
Gradient Descent(9/299): loss=0.4823992209067597, w0=-0.0003834538979860426, w1=-0.0028494220509928214
Gradient Descent(10/299): loss=0.480722181769808, w0=-0.0004106766248746166, w1=-0.0031106533067995165
Gradient Descent(11/299): loss=0.4790904033841725, w0=-0.0004362293601

Gradient Descent(194/299): loss=0.1852528758917948, w0=0.00782223741264297, w1=-0.060214592315709234
Gradient Descent(195/299): loss=0.1850848469925432, w0=0.007873848426494088, w1=-0.060281625411386014
Gradient Descent(196/299): loss=0.18491896119359447, w0=0.007925389562424955, w1=-0.06034804087885278
Gradient Descent(197/299): loss=0.18475518113493372, w0=0.007976860512009363, w1=-0.060413843930157224
Gradient Descent(198/299): loss=0.18459347016851035, w0=0.008028260980078685, w1=-0.0604790397212105
Gradient Descent(199/299): loss=0.18443379234407847, w0=0.00807959068440217, w1=-0.06054363335266069
Gradient Descent(200/299): loss=0.18427611239532937, w0=0.00813084935537467, w1=-0.06060762987074685
Gradient Descent(201/299): loss=0.1841203957263084, w0=0.00818203673571166, w1=-0.06067103426813416
Gradient Descent(202/299): loss=0.18396660839811102, w0=0.008233152580151325, w1=-0.060733851484730754
Gradient Descent(203/299): loss=0.18381471711585154, w0=0.008284196655163544, w1=-0.06

Gradient Descent(204/299): loss=0.4955395261827168, w0=-9.489797013774428e-05, w1=-0.0006172328343120543
Gradient Descent(205/299): loss=0.49551841549403297, w0=-9.533502403598762e-05, w1=-0.0006201929390815964
Gradient Descent(206/299): loss=0.4954973118703671, w0=-9.577183466809056e-05, w1=-0.000623152560418965
Gradient Descent(207/299): loss=0.49547621530758357, w0=-9.620840214500352e-05, w1=-0.000626111698461363
Gradient Descent(208/299): loss=0.4954551258015493, w0=-9.664472657762885e-05, w1=-0.0006290703533459441
Gradient Descent(209/299): loss=0.49543404334813457, w0=-9.708080807682084e-05, w1=-0.0006320285252098129
Gradient Descent(210/299): loss=0.4954129679432122, w0=-9.751664675338574e-05, w1=-0.0006349862141900247
Gradient Descent(211/299): loss=0.49539189958265817, w0=-9.795224271808179e-05, w1=-0.0006379434204235858
Gradient Descent(212/299): loss=0.4953708382623513, w0=-9.83875960816192e-05, w1=-0.0006409001440474532
Gradient Descent(213/299): loss=0.49534978397817336, w

Gradient Descent(82/299): loss=0.23471336141404048, w0=0.0020260081573465834, w1=-0.046862907220044624
Gradient Descent(83/299): loss=0.2337176597647132, w0=0.0020771325093343553, w1=-0.04706130057300719
Gradient Descent(84/299): loss=0.23274015838502382, w0=0.0021284635441367827, w1=-0.04725756154851309
Gradient Descent(85/299): loss=0.23178048407516735, w0=0.0021799924875400686, w1=-0.04745172485483811
Gradient Descent(86/299): loss=0.23083827216351577, w0=0.002231710839362423, w1=-0.04764382413473549
Gradient Descent(87/299): loss=0.22991316626959277, w0=0.002283610364637433, w1=-0.0478338920168091
Gradient Descent(88/299): loss=0.22900481807608108, w0=0.002335683085032948, w1=-0.04802196016367446
Gradient Descent(89/299): loss=0.2281128871093776, w0=0.002387921270509107, w1=-0.048208059317143774
Gradient Descent(90/299): loss=0.22723704052824328, w0=0.0024403174312174, w1=-0.04839221934065184
Gradient Descent(91/299): loss=0.22637695292012838, w0=0.0024928643096411886, w1=-0.048574

Gradient Descent(210/299): loss=0.4953431416970542, w0=-9.742193356650516e-05, w1=-0.0006348770493442571
Gradient Descent(211/299): loss=0.4953217740900443, w0=-9.785666151507498e-05, w1=-0.0006378332513833324
Gradient Descent(212/299): loss=0.4953004138176499, w0=-9.829114321999212e-05, w1=-0.000640788966545558
Gradient Descent(213/299): loss=0.4952790608754724, w0=-9.872537879613807e-05, w1=-0.000643744194972257
Gradient Descent(214/299): loss=0.49525771525911666, w0=-9.915936835834322e-05, w1=-0.0006466989368047005
Gradient Descent(215/299): loss=0.49523637696419054, w0=-9.959311202138698e-05, w1=-0.0006496531921841072
Gradient Descent(216/299): loss=0.49521504598630517, w0=-0.00010002660989999771, w1=-0.0006526069612516438
Gradient Descent(217/299): loss=0.49519372232107484, w0=-0.00010045986210885283, w1=-0.000655560244148425
Gradient Descent(218/299): loss=0.49517240596411705, w0=-0.00010089286876257874, w1=-0.000658513041015513
Gradient Descent(219/299): loss=0.4951510969110525,

Gradient Descent(127/299): loss=0.20302641923492104, w0=0.0042631526823471684, w1=-0.054242622734640394
Gradient Descent(128/299): loss=0.2025738625102041, w0=0.004318287764839239, w1=-0.0543708205341357
Gradient Descent(129/299): loss=0.20212877634766263, w0=0.004373439687136409, w1=-0.05449780490465339
Gradient Descent(130/299): loss=0.20169101784664437, w0=0.004428605810666467, w1=-0.054623588511464115
Gradient Descent(131/299): loss=0.20126044699699883, w0=0.004483783565295741, w1=-0.05474818382160365
Gradient Descent(132/299): loss=0.20083692661726948, w0=0.004538970447586681, w1=-0.054871603108952755
Gradient Descent(133/299): loss=0.2004203222943404, w0=0.004594164019107507, w1=-0.05499385845912846
Gradient Descent(134/299): loss=0.2000105023244959, w0=0.00464936190479198, w1=-0.05511496177419581
Gradient Descent(135/299): loss=0.19960733765585387, w0=0.004704561791347442, w1=-0.05523492477720848
Gradient Descent(136/299): loss=0.19921070183213463, w0=0.0047597614257093275, w1=-

Gradient Descent(263/299): loss=0.49493886298781564, w0=-3.215230942400009e-05, w1=-0.0007449190196379779
Gradient Descent(264/299): loss=0.49492020769081774, w0=-3.225222678158949e-05, w1=-0.0007476865953490924
Gradient Descent(265/299): loss=0.49490155667179403, w0=-3.235198521960954e-05, w1=-0.0007504537715734696
Gradient Descent(266/299): loss=0.49488290992877854, w0=-3.245158480558996e-05, w1=-0.0007532205484074224
Gradient Descent(267/299): loss=0.4948642674598067, w0=-3.2551025607033625e-05, w1=-0.0007559869259472338
Gradient Descent(268/299): loss=0.494845629262915, w0=-3.26503076914166e-05, w1=-0.0007587529042891571
Gradient Descent(269/299): loss=0.49482699533614105, w0=-3.274943112618812e-05, w1=-0.0007615184835294161
Gradient Descent(270/299): loss=0.4948083656775238, w0=-3.284839597877065e-05, w1=-0.0007642836637642048
Gradient Descent(271/299): loss=0.4947897402851034, w0=-3.294720231655982e-05, w1=-0.0007670484450896873
Gradient Descent(272/299): loss=0.49477111915692107

Gradient Descent(155/299): loss=0.36559109530240397, w0=0.002853903831460103, w1=-0.022095161041971817
Gradient Descent(156/299): loss=0.36509812559606647, w0=0.0028746923644052515, w1=-0.02217026301169822
Gradient Descent(157/299): loss=0.3646071173902941, w0=0.0028954437086162054, w1=-0.02224496607361514
Gradient Descent(158/299): loss=0.3641180540532177, w0=0.0029161580081978754, w1=-0.022319273945703905
Gradient Descent(159/299): loss=0.36363091922562685, w0=0.002936835408398455, w1=-0.022393190295654453
Gradient Descent(160/299): loss=0.36314569681473824, w0=0.0029574760554466365, w1=-0.022466718741739628
Gradient Descent(161/299): loss=0.3626623709881286, w0=0.002978080096397297, w1=-0.022539862853670448
Gradient Descent(162/299): loss=0.36218092616782865, w0=0.0029986476789852835, w1=-0.022612626153432815
Gradient Descent(163/299): loss=0.361701347024573, w0=0.003019178951486955, w1=-0.022685012116106205
Gradient Descent(164/299): loss=0.3612236184722005, w0=0.003039674062589145

Gradient Descent(81/299): loss=3.589312592691689e+70, w0=1.2887052692982747e+34, w1=1.163249994557546e+34
Gradient Descent(82/299): loss=2.8600344309121866e+71, w0=-3.63775623458827e+34, w1=-3.283621182359785e+34
Gradient Descent(83/299): loss=2.278931337063909e+72, w0=1.0268655477362645e+35, w1=9.269003326617656e+34
Gradient Descent(84/299): loss=1.8158970335876194e+73, w0=-2.8986352716595544e+35, w1=-2.616453540085483e+35
Gradient Descent(85/299): loss=1.44694225006387e+74, w0=8.182265396508159e+35, w1=7.385723023495712e+35
Gradient Descent(86/299): loss=1.1529518669258146e+75, w0=-2.3096892414672224e+36, w1=-2.084841322197243e+36
Gradient Descent(87/299): loss=9.186945832765918e+75, w0=6.519788999298458e+36, w1=5.885088467186915e+36
Gradient Descent(88/299): loss=7.320338008490837e+76, w0=-1.8404055330131923e+37, w1=-1.6612423160394207e+37
Gradient Descent(89/299): loss=5.832988409209114e+77, w0=5.195095311075297e+37, w1=4.689353521170054e+37
Gradient Descent(90/299): loss=4.6478391

Gradient Descent(70/299): loss=0.48708765388179676, w0=-6.140660752072757e-05, w1=-0.0019418036865621294
Gradient Descent(71/299): loss=0.4869172569380411, w0=-6.173487717117737e-05, w1=-0.0019677791394419586
Gradient Descent(72/299): loss=0.4867472141437835, w0=-6.204978457280495e-05, w1=-0.0019937186460633207
Gradient Descent(73/299): loss=0.4865775239375312, w0=-6.235138841402077e-05, w1=-0.0020196222922942665
Gradient Descent(74/299): loss=0.4864081847671706, w0=-6.263974714190945e-05, w1=-0.002045490163733583
Gradient Descent(75/299): loss=0.48623919508990215, w0=-6.291491896320336e-05, w1=-0.002071322345711756
Gradient Descent(76/299): loss=0.48607055337217475, w0=-6.317696184525226e-05, w1=-0.0020971189232919254
Gradient Descent(77/299): loss=0.48590225808962184, w0=-6.342593351698916e-05, w1=-0.0021228799812708404
Gradient Descent(78/299): loss=0.4857343077269969, w0=-6.36618914698923e-05, w1=-0.0021486056041798117
Gradient Descent(79/299): loss=0.48556670077811015, w0=-6.38848

Gradient Descent(19/299): loss=3388990926345418.0, w0=3967785.397158653, w1=3552490.5965803936
Gradient Descent(20/299): loss=3.0087648793604656e+16, w0=-11822439.340088986, w1=-10585024.491247268
Gradient Descent(21/299): loss=2.6711980928892938e+17, w0=35226217.92432658, w1=31539208.268129297
Gradient Descent(22/299): loss=2.371504433730314e+18, w0=-104960270.10259312, w1=-93974432.19820812
Gradient Descent(23/299): loss=2.1054347463685546e+19, w0=312740309.90692717, w1=280006834.915063
Gradient Descent(24/299): loss=1.8692166070477282e+20, w0=-931843080.4300697, w1=-834310203.7304229
Gradient Descent(25/299): loss=1.6595008371023641e+21, w0=2776525759.987952, w1=2485916160.025291
Gradient Descent(26/299): loss=1.4733140171983983e+22, w0=-8272954382.073937, w1=-7407052110.478922
Gradient Descent(27/299): loss=1.3080163292134496e+23, w0=24650149188.274208, w1=22070101094.98703
Gradient Descent(28/299): loss=1.161264128025082e+24, w0=-73447746347.8815, w1=-65760218111.57204
Gradient De

Gradient Descent(294/299): loss=0.45512664899804717, w0=0.00012891567618537747, w1=-0.0069970835694753725
Gradient Descent(295/299): loss=0.45500764479419065, w0=0.00013056671611746793, w1=-0.007016751477111792
Gradient Descent(296/299): loss=0.4548887960101132, w0=0.00013222261165376663, w1=-0.0070363975572867495
Gradient Descent(297/299): loss=0.45477010216866076, w0=0.00013388333897508488, w1=-0.007056021854113466
Gradient Descent(298/299): loss=0.4546515627949216, w0=0.00013554887436436657, w1=-0.007075624411578093
Gradient Descent(299/299): loss=0.45453317741621246, w0=0.00013721919420626059, w1=-0.007095205273540159
>> Gamma 0.001 <<
Gradient Descent(0/299): loss=0.4999999999999999, w0=-1.4431974266238262e-05, w1=-0.00028765035373453306
Gradient Descent(1/299): loss=0.49798938451648644, w0=-2.6970143461260562e-05, w1=-0.0005710000600934762
Gradient Descent(2/299): loss=0.49602808648563956, w0=-3.769789313502429e-05, w1=-0.0008501601633347786
Gradient Descent(3/299): loss=0.494113

Gradient Descent(8/299): loss=0.4998372748981013, w0=-1.2919066012583472e-06, w1=-2.5872996178635447e-05
Gradient Descent(9/299): loss=0.4998169573798979, w0=-1.434484880491554e-06, w1=-2.874561751058072e-05
Gradient Descent(10/299): loss=0.49979664499039966, w0=-1.5768700991913078e-06, w1=-3.161780805566466e-05
Gradient Descent(11/299): loss=0.49977633772692065, w0=-1.7190623443000421e-06, w1=-3.448956792760151e-05
Gradient Descent(12/299): loss=0.49975603558677734, w0=-1.8610617027225511e-06, w1=-3.7360897240066087e-05
Gradient Descent(13/299): loss=0.4997357385672877, w0=-2.002868261326005e-06, w1=-4.023179610669386e-05
Gradient Descent(14/299): loss=0.49971544666577167, w0=-2.1444821069399674e-06, w1=-4.310226464108092e-05
Gradient Descent(15/299): loss=0.499695159879551, w0=-2.2859033263564094e-06, w1=-4.5972302956784015e-05
Gradient Descent(16/299): loss=0.49967487820594936, w0=-2.427132006329728e-06, w1=-4.884191116732055e-05
Gradient Descent(17/299): loss=0.4996546016422922, w0

Gradient Descent(161/299): loss=0.3588692446720873, w0=0.0032371006697902, w1=-0.022746879154829917
Gradient Descent(162/299): loss=0.35837873312358637, w0=0.0032591288106347403, w1=-0.02282097238922786
Gradient Descent(163/299): loss=0.35789025670465613, w0=0.0032811137359885795, w1=-0.02289467451468306
Gradient Descent(164/299): loss=0.3574037989984169, w0=0.003303055515176189, w1=-0.02296798891845335
Gradient Descent(165/299): loss=0.3569193438191445, w0=0.0033249542190832287, w1=-0.023040918945336344
Gradient Descent(166/299): loss=0.35643687520747735, w0=0.003346809920049553, w1=-0.023113467898374967
Gradient Descent(167/299): loss=0.35595637742575054, w0=0.0033686226917675715, w1=-0.02318563903954806
Gradient Descent(168/299): loss=0.3554778349534504, w0=0.0033903926091857243, w1=-0.023257435590446535
Gradient Descent(169/299): loss=0.35500123248278936, w0=0.0034121197484168367, w1=-0.023328860732935355
Gradient Descent(170/299): loss=0.35452655491439444, w0=0.0034338041866511337

Gradient Descent(101/299): loss=0.4979467964909128, w0=-1.3704585996187816e-05, w1=-0.00029118773947919624
Gradient Descent(102/299): loss=0.49792672830004836, w0=-1.3828944510035898e-05, w1=-0.00029402057137290047
Gradient Descent(103/299): loss=0.49790666512485293, w0=-1.3953111940782466e-05, w1=-0.00029685298101903154
Gradient Descent(104/299): loss=0.49788660696265963, w0=-1.4077088376665131e-05, w1=-0.0002996849685308475
Gradient Descent(105/299): loss=0.4978665538108033, w0=-1.4200873905882204e-05, w1=-0.00030251653402156613
Gradient Descent(106/299): loss=0.49784650566662075, w0=-1.4324468616592715e-05, w1=-0.0003053476776043645
Gradient Descent(107/299): loss=0.49782646252745066, w0=-1.4447872596916437e-05, w1=-0.0003081783993923795
Gradient Descent(108/299): loss=0.4978064243906337, w0=-1.4571085934933894e-05, w1=-0.0003110086994987075
Gradient Descent(109/299): loss=0.49778639125351243, w0=-1.4694108718686386e-05, w1=-0.0003138385780364044
Gradient Descent(110/299): loss=0.49

Gradient Descent(285/299): loss=0.30904771292662464, w0=0.005743842979533724, w1=-0.029772986678758273
Gradient Descent(286/299): loss=0.30873043213235585, w0=0.00576101577957287, w1=-0.029815549669194794
Gradient Descent(287/299): loss=0.30841409821793253, w0=0.005778153013526717, w1=-0.029857949185913276
Gradient Descent(288/299): loss=0.308098706435328, w0=0.005795254761824016, w1=-0.029900186204092384
Gradient Descent(289/299): loss=0.30778425207440774, w0=0.005812321104751865, w1=-0.029942261690916506
Gradient Descent(290/299): loss=0.30747073046251594, w0=0.005829352122456685, w1=-0.02998417660566246
Gradient Descent(291/299): loss=0.3071581369640677, w0=0.005846347894945174, w1=-0.03002593189978502
Gradient Descent(292/299): loss=0.306846466980147, w0=0.005863308502085245, w1=-0.03006752851700128
Gradient Descent(293/299): loss=0.3065357159481108, w0=0.005880234023606938, w1=-0.03010896739337389
Gradient Descent(294/299): loss=0.30622587934119794, w0=0.005897124539103321, w1=-0.

Gradient Descent(208/299): loss=6.296354543536608e+221, w0=-5.377737127740927e+109, w1=-4.7246508486694015e+109
Gradient Descent(209/299): loss=7.547032990161764e+222, w0=1.8618432138953817e+110, w1=1.6357361677727496e+110
Gradient Descent(210/299): loss=9.04614035959248e+223, w0=-6.445945703159485e+110, w1=-5.6631334171781636e+110
Gradient Descent(211/299): loss=1.0843023412263359e+225, w0=2.2316710503860387e+111, w1=1.9606511571134738e+111
Gradient Descent(212/299): loss=1.2996830918527534e+226, w0=-7.72633823876301e+111, w1=-6.788031778007932e+111
Gradient Descent(213/299): loss=1.5578460684106708e+227, w0=2.6749597602856845e+112, w1=2.3501057417619306e+112
Gradient Descent(214/299): loss=1.8672893323579038e+228, w0=-9.261061964966788e+112, w1=-8.136374693112037e+112
Gradient Descent(215/299): loss=2.2381989603728064e+229, w0=3.206301268240183e+113, w1=2.816919765366886e+113
Gradient Descent(216/299): loss=2.6827843438103743e+230, w0=-1.110063604110166e+114, w1=-9.752546144700239e+1

Gradient Descent(123/299): loss=0.47976364140514766, w0=0.00012102279627677171, w1=-0.003003189579739897
Gradient Descent(124/299): loss=0.4796198115990597, w0=0.00012244346492957514, w1=-0.003025346480772379
Gradient Descent(125/299): loss=0.47947626481730615, w0=0.00012386973794220092, w1=-0.003047473342691001
Gradient Descent(126/299): loss=0.4793330000244722, w0=0.000125301583830706, w1=-0.003069570229834668
Gradient Descent(127/299): loss=0.4791900161900997, w0=0.00012673897123905833, w1=-0.003091637206365291
Gradient Descent(128/299): loss=0.4790473122886584, w0=0.00012818186893866099, w1=-0.0031136743362683472
Gradient Descent(129/299): loss=0.47890488729951797, w0=0.00012963024582787777, w1=-0.0031356816833534354
Gradient Descent(130/299): loss=0.47876274020692, w0=0.00013108407093156076, w1=-0.0031576593112548315
Gradient Descent(131/299): loss=0.4786208699999499, w0=0.0001325433134005794, w1=-0.003179607283432042
Gradient Descent(132/299): loss=0.47847927567250964, w0=0.00013

Gradient Descent(9/299): loss=62482.42468035159, w0=15.417305099650795, w1=16.04333831334216
Gradient Descent(10/299): loss=403959.38255782565, w0=-39.16464677125071, w1=-40.936392818359955
Gradient Descent(11/299): loss=2611672.139519464, w0=99.62205790077707, w1=103.94104136352124
Gradient Descent(12/299): loss=16884950.09042407, w0=-253.2633639315154, w1=-264.4377736477677
Gradient Descent(13/299): loss=109164374.74525377, w0=644.0115002676098, w1=672.2266475741372
Gradient Descent(14/299): loss=705768199.143696, w0=-1637.4618181908522, w1=-1709.4070681873368
Gradient Descent(15/299): loss=4562924055.989837, w0=4163.581970391346, w1=4346.307623389803
Gradient Descent(16/299): loss=29500161625.5486, w0=-10586.57431979559, w1=-11051.398321536042
Gradient Descent(17/299): loss=190724089483.28885, w0=26918.257601548532, w1=28099.93729659167
Gradient Descent(18/299): loss=1233067085228.9724, w0=-68444.27722975894, w1=-71449.12264650356
Gradient Descent(19/299): loss=7972010461790.017, w0

Gradient Descent(297/299): loss=0.4579903130488575, w0=0.0004423273069235697, w1=-0.006454751202430075
Gradient Descent(298/299): loss=0.45788330203462096, w0=0.00044440674019206086, w1=-0.006472520296226913
Gradient Descent(299/299): loss=0.45777645092836816, w0=0.00044648790935011346, w1=-0.006490268405457239
>> Gamma 0.001 <<
Gradient Descent(0/299): loss=0.4999999999999999, w0=4.346813614025415e-06, w1=-0.0002644430043794572
Gradient Descent(1/299): loss=0.4981039384219629, w0=9.81353984198294e-06, w1=-0.0005249085536562358
Gradient Descent(2/299): loss=0.49625459319462095, w0=1.6344987677463878e-05, w1=-0.0007814904267903421
Gradient Descent(3/299): loss=0.49444997266798274, w0=2.3888192766091066e-05, w1=-0.001034279603920257
Gradient Descent(4/299): loss=0.4926881927841662, w0=3.239233262878333e-05, w1=-0.0012833643605411685
Gradient Descent(5/299): loss=0.4909674703734497, w0=4.180864505000777e-05, w1=-0.0015288303583381856
Gradient Descent(6/299): loss=0.489286116902111, w0=5.2

Gradient Descent(280/299): loss=4.4879902919593525e+238, w0=-1.3201169260584026e+118, w1=-1.351906760053973e+118
Gradient Descent(281/299): loss=3.261686947260793e+239, w0=3.5588327859121693e+118, w1=3.6445333032291753e+118
Gradient Descent(282/299): loss=2.370460061152865e+240, w0=-9.594067425450944e+118, w1=-9.825102877521132e+118
Gradient Descent(283/299): loss=1.722752977945913e+241, w0=2.5864134479278874e+119, w1=2.6486970627581576e+119
Gradient Descent(284/299): loss=1.2520260820501205e+242, w0=-6.972574015767664e+119, w1=-7.1404810898364e+119
Gradient Descent(285/299): loss=9.099211147513626e+242, w0=1.8796990266311806e+120, w1=1.9249641988585066e+120
Gradient Descent(286/299): loss=6.612932804999013e+243, w0=-5.06738031425428e+120, w1=-5.189408276931463e+120
Gradient Descent(287/299): loss=4.806007858756156e+244, w0=1.3660880218315005e+121, w1=1.3989848892075055e+121
Gradient Descent(288/299): loss=3.492809048500437e+245, w0=-3.6827638102117327e+121, w1=-3.771448719753109e+121


Gradient Descent(235/299): loss=0.347302450960008, w0=0.003651205859666052, w1=-0.023860935317953594
Gradient Descent(236/299): loss=0.3469601210772059, w0=0.003660441629798324, w1=-0.02390447456064151
Gradient Descent(237/299): loss=0.3466186718488397, w0=0.003669654944781967, w1=-0.02394784448506662
Gradient Descent(238/299): loss=0.34627809892105055, w0=0.0036788461519110895, w1=-0.0239910464854485
Gradient Descent(239/299): loss=0.34593839799064235, w0=0.0036880155943124094, w1=-0.0240340819403043
Gradient Descent(240/299): loss=0.3455995648041559, w0=0.003697163610990338, w1=-0.024076952212653883
Gradient Descent(241/299): loss=0.3452615951569637, w0=0.0037062905368717286, w1=-0.024119658650221882
Gradient Descent(242/299): loss=0.34492448489238475, w0=0.003715396702850282, w1=-0.02416220258563675
Gradient Descent(243/299): loss=0.34458822990081917, w0=0.0037244824358306098, w1=-0.024204585336626856
Gradient Descent(244/299): loss=0.3442528261189009, w0=0.0037335480587719423, w1=-

Gradient Descent(185/299): loss=0.4964554313482436, w0=1.0174496966675343e-05, w1=-0.00048504027420023164
Gradient Descent(186/299): loss=0.4964367271507639, w0=1.0240195982356702e-05, w1=-0.0004876114945055517
Gradient Descent(187/299): loss=0.49641802771413146, w0=1.0306008829589979e-05, w1=-0.0004901823301549
Gradient Descent(188/299): loss=0.4963993330361862, w0=1.0371935449145987e-05, w1=-0.0004927527812420419
Gradient Descent(189/299): loss=0.4963806431147692, w0=1.0437975781820879e-05, w1=-0.0004953228478607132
Gradient Descent(190/299): loss=0.49636195794772286, w0=1.0504129768436138e-05, w1=-0.0004978925301046196
Gradient Descent(191/299): loss=0.49634327753289076, w0=1.0570397349838564e-05, w1=-0.0005004618280674371
Gradient Descent(192/299): loss=0.4963246018681178, w0=1.063677846690027e-05, w1=-0.0005030307418428119
Gradient Descent(193/299): loss=0.4963059309512501, w0=1.0703273060518665e-05, w1=-0.0005055992715243606
Gradient Descent(194/299): loss=0.49628726478013496, w0

Gradient Descent(33/299): loss=0.3169812642097108, w0=0.004708759872686894, w1=-0.0278931795106405
Gradient Descent(34/299): loss=0.31426807737608364, w0=0.004792941979931823, w1=-0.028198930000216737
Gradient Descent(35/299): loss=0.31161472098903753, w0=0.004877010242413798, w1=-0.02849701658884077
Gradient Descent(36/299): loss=0.30901941958962564, w0=0.00496103584841492, w1=-0.028787846878374334
Gradient Descent(37/299): loss=0.30648049228777063, w0=0.005045079985444096, w1=-0.02907179289908975
Gradient Descent(38/299): loss=0.3039963431086515, w0=0.0051291949486117665, w1=-0.029349194855391333
Gradient Descent(39/299): loss=0.3015654527352794, w0=0.005213425123013724, w1=-0.029620364432684936
Gradient Descent(40/299): loss=0.2991863714139597, w0=0.005297807855887912, w1=-0.029885587721959213
Gradient Descent(41/299): loss=0.2968577128316, w0=0.005382374232161657, w1=-0.030145127810657146
Gradient Descent(42/299): loss=0.29457814880788513, w0=0.005467149765161767, w1=-0.03039922708

Gradient Descent(159/299): loss=0.4968966296612628, w0=8.592079387538672e-06, w1=-0.00041801766989274974
Gradient Descent(160/299): loss=0.496877524386135, w0=8.655853494058161e-06, w1=-0.000420598490204106
Gradient Descent(161/299): loss=0.4968584241403131, w0=8.719749186557971e-06, w1=-0.0004231789209403931
Gradient Descent(162/299): loss=0.49683932892142657, w0=8.78376640043428e-06, w1=-0.0004257589621990496
Gradient Descent(163/299): loss=0.49682023872710634, w0=8.84790507111169e-06, w1=-0.00042833861407748174
Gradient Descent(164/299): loss=0.4968011535549848, w0=8.912165134043223e-06, w1=-0.00043091787667306374
Gradient Descent(165/299): loss=0.4967820734026957, w0=8.976546524710304e-06, w1=-0.00043349675008313765
Gradient Descent(166/299): loss=0.49676299826787434, w0=9.04104917862275e-06, w1=-0.00043607523440501337
Gradient Descent(167/299): loss=0.4967439281481575, w0=9.105673031318755e-06, w1=-0.00043865332973596873
Gradient Descent(168/299): loss=0.49672486304118324, w0=9.17

Gradient Descent(271/299): loss=0.33410306658604794, w0=0.0041754541045048044, w1=-0.025461715062154142
Gradient Descent(272/299): loss=0.33378860049233805, w0=0.004184476838067864, w1=-0.025500298613040775
Gradient Descent(273/299): loss=0.33347489658402873, w0=0.00419348586447952, w1=-0.025538751168976054
Gradient Descent(274/299): loss=0.3331619517509156, w0=0.004202481413111516, w1=-0.025577073670993444
Gradient Descent(275/299): loss=0.33284976291110463, w0=0.004211463710519235, w1=-0.025615267050330983
Gradient Descent(276/299): loss=0.3325383270105776, w0=0.004220432980473467, w1=-0.025653332228551522
Gradient Descent(277/299): loss=0.3322276410227669, w0=0.004229389443991819, w1=-0.025691270117661282
Gradient Descent(278/299): loss=0.3319177019481365, w0=0.004238333319369796, w1=-0.02572908162022677
Gradient Descent(279/299): loss=0.3316085068137732, w0=0.004247264822211535, w1=-0.02576676762949008
Gradient Descent(280/299): loss=0.3313000526729838, w0=0.004256184165460214, w1=

Gradient Descent(32/299): loss=0.49942315729473036, w0=6.3928042278688375e-06, w1=-8.209894535662142e-05
Gradient Descent(33/299): loss=0.49940518316243016, w0=6.586348932476842e-06, w1=-8.458186925980083e-05
Gradient Descent(34/299): loss=0.4993872121843159, w0=6.779883234618137e-06, w1=-8.706450408315523e-05
Gradient Descent(35/299): loss=0.4993692443595442, w0=6.973407133260165e-06, w1=-8.954684986982402e-05
Gradient Descent(36/299): loss=0.49935127968727155, w0=7.166920627371217e-06, w1=-9.202890666293948e-05
Gradient Descent(37/299): loss=0.4993333181666548, w0=7.360423715920426e-06, w1=-9.451067450562667e-05
Gradient Descent(38/299): loss=0.4993153597968513, w0=7.553916397877773e-06, w1=-9.699215344100355e-05
Gradient Descent(39/299): loss=0.49929740457701854, w0=7.74739867221408e-06, w1=-9.947334351218087e-05
Gradient Descent(40/299): loss=0.49927945250631384, w0=7.940870537901018e-06, w1=-0.00010195424476226225
Gradient Descent(41/299): loss=0.49926150358389554, w0=8.1343319939

Gradient Descent(238/299): loss=0.3388499461677275, w0=0.002475989423861719, w1=-0.024031610076383027
Gradient Descent(239/299): loss=0.33850834813233327, w0=0.0024820732392406147, w1=-0.024073022330498437
Gradient Descent(240/299): loss=0.3381677526287734, w0=0.0024881486147855557, w1=-0.024114258639109647
Gradient Descent(241/299): loss=0.3378281539223589, w0=0.0024942157189365784, w1=-0.02415532044184112
Gradient Descent(242/299): loss=0.33748954634150996, w0=0.0025002747184130467, w1=-0.024196209161686685
Gradient Descent(243/299): loss=0.3371519242767262, w0=0.0025063257782318815, w1=-0.024236926205235487
Gradient Descent(244/299): loss=0.3368152821795777, w0=0.0025123690617255843, w1=-0.02427747296289459
Gradient Descent(245/299): loss=0.3364796145617166, w0=0.002518404730560056, w1=-0.02431785080910825
Gradient Descent(246/299): loss=0.3361449159939094, w0=0.0025244329447522175, w1=-0.024358061102573977
Gradient Descent(247/299): loss=0.3358111811050881, w0=0.002530453862687426,

Gradient Descent(252/299): loss=0.49548753570019277, w0=4.880528025154874e-05, w1=-0.000621397491670482
Gradient Descent(253/299): loss=0.4954700270657033, w0=4.899717599580412e-05, w1=-0.0006238174147386431
Gradient Descent(254/299): loss=0.4954525215049097, w0=4.9189063618351885e-05, w1=-0.000626237056480011
Gradient Descent(255/299): loss=0.495435019016975, w0=4.9380943117321894e-05, w1=-0.000628656416937192
Gradient Descent(256/299): loss=0.4954175196010623, w0=4.957281449084509e-05, w1=-0.0006310754961527855
Gradient Descent(257/299): loss=0.4954000232563347, w0=4.97646777370535e-05, w1=-0.0006334942941693833
Gradient Descent(258/299): loss=0.4953825299819559, w0=4.995653285408024e-05, w1=-0.0006359128110295702
Gradient Descent(259/299): loss=0.4953650397770899, w0=5.014837984005952e-05, w1=-0.0006383310467759237
Gradient Descent(260/299): loss=0.4953475526409006, w0=5.034021869312664e-05, w1=-0.000640749001451014
Gradient Descent(261/299): loss=0.49533006857255246, w0=5.053204941

Gradient Descent(166/299): loss=0.1839254688700641, w0=0.014642356337808147, w1=-0.04062794632998614
Gradient Descent(167/299): loss=0.1836682765685867, w0=0.014737628743187297, w1=-0.040652967473472794
Gradient Descent(168/299): loss=0.1834145588825956, w0=0.014832841070253587, w1=-0.040677542365691995
Gradient Descent(169/299): loss=0.18316425368830852, w0=0.01492799201659823, w1=-0.04070167691173766
Gradient Descent(170/299): loss=0.18291730007918056, w0=0.015023080316274282, w1=-0.04072537692513126
Gradient Descent(171/299): loss=0.1826736383407091, w0=0.015118104738761198, w1=-0.04074864812960387
Gradient Descent(172/299): loss=0.18243320992579054, w0=0.015213064087959721, w1=-0.040771496160832824
Gradient Descent(173/299): loss=0.18219595743061523, w0=0.015307957201216233, w1=-0.040793926568134624
Gradient Descent(174/299): loss=0.18196182457108878, w0=0.015402782948375767, w1=-0.040815944816115377
Gradient Descent(175/299): loss=0.18173075615976642, w0=0.015497540230862885, w1=-

Gradient Descent(19/299): loss=0.4965070657377114, w0=3.873724166933635e-05, w1=-0.0004929756540885484
Gradient Descent(20/299): loss=0.4963265568946741, w0=4.067158882536298e-05, w1=-0.0005173223890430422
Gradient Descent(21/299): loss=0.4961463745235256, w0=4.260566663190808e-05, w1=-0.0005416406803904975
Gradient Descent(22/299): loss=0.49596651771091366, w0=4.453947112036261e-05, w1=-0.0005659305717155261
Gradient Descent(23/299): loss=0.49578698554642175, w0=4.647299833914105e-05, w1=-0.0005901921065269597
Gradient Descent(24/299): loss=0.4956077771225601, w0=4.840624435362464e-05, w1=-0.0006144253282579949
Gradient Descent(25/299): loss=0.4954288915347545, w0=5.033920524610492e-05, w1=-0.0006386302802663372
Gradient Descent(26/299): loss=0.4952503278813368, w0=5.22718771157274e-05, w1=-0.0006628070058343446
Gradient Descent(27/299): loss=0.4950720852635349, w0=5.420425607843539e-05, w1=-0.0006869555481691709
Gradient Descent(28/299): loss=0.49489416278546217, w0=5.613633826691404

Gradient Descent(90/299): loss=0.22036670581607037, w0=0.007524356235581593, w1=-0.0366523238610354
Gradient Descent(91/299): loss=0.2195092854292972, w0=0.007614746954043126, w1=-0.036739579448391846
Gradient Descent(92/299): loss=0.21866697120958772, w0=0.007705408980153952, w1=-0.036825407089029226
Gradient Descent(93/299): loss=0.21783945661640172, w0=0.007796331881308979, w1=-0.03690983351202161
Gradient Descent(94/299): loss=0.2170264419713227, w0=0.007887505499487253, w1=-0.0369928847141152
Gradient Descent(95/299): loss=0.2162276342777164, w0=0.007978919947288424, w1=-0.03707458598923022
Gradient Descent(96/299): loss=0.2154427470467648, w0=0.008070565603690105, w1=-0.037154961956378026
Gradient Descent(97/299): loss=0.2146715001295388, w0=0.008162433109567208, w1=-0.037234036586096815
Gradient Descent(98/299): loss=0.2139136195547992, w0=0.008254513363010606, w1=-0.03731183322550148
Gradient Descent(99/299): loss=0.21316883737223735, w0=0.008346797514478971, w1=-0.037388374622

Gradient Descent(227/299): loss=0.49579428303088025, w0=4.422672112104536e-05, w1=-0.0005607609288013962
Gradient Descent(228/299): loss=0.49577614592903146, w0=4.442074031131361e-05, w1=-0.0005631874969539522
Gradient Descent(229/299): loss=0.49575801217564885, w0=4.4614759459283544e-05, w1=-0.0005656137810985026
Gradient Descent(230/299): loss=0.4957398817697676, w0=4.480877855970705e-05, w1=-0.0005680397812790976
Gradient Descent(231/299): loss=0.49572175471042285, w0=4.50027976073382e-05, w1=-0.0005704654975397797
Gradient Descent(232/299): loss=0.49570363099665055, w0=4.519681659693325e-05, w1=-0.0005728909299245832
Gradient Descent(233/299): loss=0.49568551062748667, w0=4.5390835523250586e-05, w1=-0.0005753160784775351
Gradient Descent(234/299): loss=0.49566739360196754, w0=4.5584854381050794e-05, w1=-0.000577740943242654
Gradient Descent(235/299): loss=0.4956492799191298, w0=4.577887316509662e-05, w1=-0.0005801655242639512
Gradient Descent(236/299): loss=0.49563116957801057, w0=

Gradient Descent(30/299): loss=0.31739398233472677, w0=0.0031455766914769887, w1=-0.026823199309940003
Gradient Descent(31/299): loss=0.3145490434082041, w0=0.0032018481468374872, w1=-0.027130532635130905
Gradient Descent(32/299): loss=0.3117725258976509, w0=0.0032583992183784637, w1=-0.027427889310698964
Gradient Descent(33/299): loss=0.3090617055733796, w0=0.00331532449176617, w1=-0.02771586047422923
Gradient Descent(34/299): loss=0.30641406825847345, w0=0.003372706520364095, w1=-0.027994985536780797
Gradient Descent(35/299): loss=0.303827285332033, w0=0.0034306169768397073, w1=-0.0282657578101649
Gradient Descent(36/299): loss=0.3012991926914246, w0=0.00348911770159444, w1=-0.028528629422191306
Gradient Descent(37/299): loss=0.29882777261926075, w0=0.003548261657099535, w1=-0.028784015621154308
Gradient Descent(38/299): loss=0.29641113810049513, w0=0.0036080937965029965, w1=-0.029032298555061734
Gradient Descent(39/299): loss=0.294047519214662, w0=0.0036686518541694853, w1=-0.029273

Gradient Descent(178/299): loss=0.4966297128180974, w0=3.47968283402917e-05, w1=-0.0004414930355566694
Gradient Descent(179/299): loss=0.49661110251392904, w0=3.499169522606948e-05, w1=-0.00044393338433397765
Gradient Descent(180/299): loss=0.4965924957639578, w0=3.518656687500613e-05, w1=-0.0004463734459346127
Gradient Descent(181/299): loss=0.4965738925671219, w0=3.538144327941375e-05, w1=-0.00044881322040366244
Gradient Descent(182/299): loss=0.4965552929223602, w0=3.557632443160753e-05, w1=-0.0004512527077862065
Gradient Descent(183/299): loss=0.4965366968286114, w0=3.5771210323905705e-05, w1=-0.00045369190812731634
Gradient Descent(184/299): loss=0.4965181042848151, w0=3.59661009486296e-05, w1=-0.0004561308214720552
Gradient Descent(185/299): loss=0.4964995152899108, w0=3.616099629810357e-05, w1=-0.0004585694478654783
Gradient Descent(186/299): loss=0.49648092984283876, w0=3.6355896364655074e-05, w1=-0.00046100778735263257
Gradient Descent(187/299): loss=0.4964623479425394, w0=3.6

Gradient Descent(63/299): loss=0.24948503305140632, w0=0.005326553274875439, w1=-0.03360387637970552
Gradient Descent(64/299): loss=0.24805999159763767, w0=0.005405610306575933, w1=-0.03374193352802368
Gradient Descent(65/299): loss=0.24666216473547659, w0=0.005485309242853694, w1=-0.03387740569933381
Gradient Descent(66/299): loss=0.24529095601876125, w0=0.005565632125200309, w1=-0.03401036146119956
Gradient Descent(67/299): loss=0.24394578475249992, w0=0.005646561172573864, w1=-0.03414086638699613
Gradient Descent(68/299): loss=0.2426260853801053, w0=0.005728078816059133, w1=-0.03426898324350981
Gradient Descent(69/299): loss=0.2413313069110843, w0=0.005810167728574747, w1=-0.03439477216403654
Gradient Descent(70/299): loss=0.24006091238529859, w0=0.005892810850129893, w1=-0.03451829080825958
Gradient Descent(71/299): loss=0.23881437837035902, w0=0.0059759914090872146, w1=-0.03463959451006154
Gradient Descent(72/299): loss=0.23759119448910346, w0=0.006059692939846684, w1=-0.034758736

KeyboardInterrupt: 

In [None]:
# CREATE CSV SUBMISSION
#create_csv_submission(ids_test, y_predicted_test, 'output/trial.csv')

# COMPUTE ACCURACY ON FULL train
total_accuracy_train = np.sum(y_predicted_train == y_train)/len(y_train)*100
print('Total accuracy train =', total_accuracy_train, 'with degrees =', best_degrees, 'and gammas =', best_gammas)

In [None]:
total_accuracy_test = np.sum(y_predicted_test == y_test)/len(y_test)*100
print('Total accuracy test =', total_accuracy_test, 'with degrees =', best_degrees)

In [None]:
np.sum(y_predicted_test==-1)

In [None]:
np.sum(y_predicted_test==1)

In [None]:
y_predicted_test[:200]

## Ridge regression   

In [26]:
def cross_validation_one_fold_ridge(y_cross_val_train, y_cross_val_test, tx_cross_val_train, tx_cross_val_test, \
                                    degrees, lambdas, len_kept_data, stdize=False):
    
    accuracies_train_by_deg = np.zeros([len(degrees), len(lambdas)])
    accuracies_test_by_deg = np.zeros([len(degrees), len(lambdas)])
    
    # For each degree, compute the least squares weights, the predictions and the accuracies
    previous_deg = 1
    for deg_id, deg in enumerate(degrees):
        print('++ Degree', deg, '++')
                
        # Add powers of the chosen columns
        len_data = tx_cross_val_train.shape[1]
        tx_cross_val_train = add_powers(tx_cross_val_train, range(previous_deg+1, deg+1), range(len_kept_data))
        tx_cross_val_test = add_powers(tx_cross_val_test, range(previous_deg+1, deg+1), range(len_kept_data))
        if stdize: 
            tx_cross_val_train[:,len_data:] = standardize(tx_cross_val_train[:,len_data:])[0]
            tx_cross_val_test[:,len_data:] = standardize(tx_cross_val_test[:,len_data:])[0]
                
        for lambda_id, single_lambda in enumerate(lambdas):
            print('>> Lambda', single_lambda, '<<')
            
            # Compute the best weights on the training set
            weights, loss = ridge_regression(y_cross_val_train, tx_cross_val_train, single_lambda, 'mse');

            # Compute the predictions
            y_predicted_cross_val_train = predict_labels(weights, tx_cross_val_train)
            y_predicted_cross_val_test = predict_labels(weights, tx_cross_val_test)

            # Compute the accuracies for each degree
            accuracies_train_by_deg[deg_id, lambda_id] = \
                np.sum(y_predicted_cross_val_train == y_cross_val_train)/len(y_cross_val_train)
            accuracies_test_by_deg[deg_id, lambda_id] = \
                np.sum(y_predicted_cross_val_test == y_cross_val_test)/len(y_cross_val_test)

        # Update the previous degree to the actual degree
        previous_deg = deg
        
    return accuracies_train_by_deg, accuracies_test_by_deg

In [27]:
def cross_validation_ridge(y_single_jet_train, tx_single_jet_train, degrees, lambdas, k_fold, seed):
    
    # Get the indices so that we get the k'th subgroup in test, others in train, for each k
    k_indices = build_k_indices(y_single_jet_train, k_fold, seed)
    
    # Initialize matrix of computed accuracies for each degree and each fold
    accuracies_train_by_fold = np.zeros([len(degrees), len(lambdas), k_fold])
    accuracies_test_by_fold = np.zeros([len(degrees), len(lambdas), k_fold])
    
    # Preprocess training dataset
    tx_single_jet_train_preprocessed, len_kept_data, unique_cols = \
        preprocess_data(tx_single_jet_train, [], 'before')
    
    for k in range(k_fold):
        print('--- Fold', k, '---')
        # Create the testing set for this fold number
        k_index = k_indices[k] # Indices of the testing set for fold k
        y_cross_val_test = y_single_jet_train[k_index]
        tx_cross_val_test = tx_single_jet_train_preprocessed[k_index,:]
        
        # Create the training set for this fold number
        mask = np.ones(len(y_single_jet_train), dtype=bool) # set all elements to True
        mask[k_index] = False # set test elements to False
        y_cross_val_train = y_single_jet_train[mask] # select only True elements (ie train elements)
        tx_cross_val_train = tx_single_jet_train_preprocessed[mask,:]
        
        # Compute the accuracies for each degree
        accuracies_train_by_fold[:,:,k], accuracies_test_by_fold[:,:,k] = \
            cross_validation_one_fold_ridge(y_cross_val_train, y_cross_val_test, tx_cross_val_train, \
                                            tx_cross_val_test, degrees, lambdas, len_kept_data, False)
    
    # Compute the mean accuracies over the folds, for each degree
    mean_accuracies_train_by_deg = np.mean(accuracies_train_by_fold, axis=2)
    mean_accuracies_test_by_deg = np.mean(accuracies_test_by_fold, axis=2)
    
    # Get the index of the best accuracy in the testing set
    max_id_deg_test, max_id_lambda_test = \
        np.unravel_index(mean_accuracies_test_by_deg.argmax(), mean_accuracies_test_by_deg.shape)
    
    # Find the optimal degree and the corresponding accuracies in the training and testing sets
    best_deg = degrees[max_id_deg_test]
    best_lambda = lambdas[max_id_lambda_test]
    best_accuracy_test = mean_accuracies_test_by_deg[max_id_deg_test, max_id_lambda_test]
    corresponding_accuracy_train = mean_accuracies_train_by_deg[max_id_deg_test, max_id_lambda_test]
    
    print('Best accuracy test =', best_accuracy_test, 'with degree =', best_deg, 'and lambda =', best_lambda)
    print('Corresponding accuracy train =', corresponding_accuracy_train)
    
    return best_deg, best_lambda, best_accuracy_test, corresponding_accuracy_train

In [28]:
degrees = range(6,11)
lambdas = np.logspace(-9,-2,7)
k_fold = 5
seed = 1

In [29]:
mask_jets_train = split_jets_mask(tx_train)
mask_jets_test = split_jets_mask(tx_test)
len_mask = len(mask_jets_train)

y_predicted_train = np.zeros(len(y_train))
y_predicted_test = np.zeros(tx_test.shape[0])
best_degrees = np.zeros(len_mask)
best_lambdas = np.zeros(len_mask)

In [30]:
for jet_id in range(len_mask):
    print('***** Jet ', jet_id, '*****')
    # SEPARATE THE WHOLE DATA SET TO GET ONLY THE PART THAT HAVE THE RIGHT NUMBER OF JETS
    tx_single_jet_train = tx_train[mask_jets_train[jet_id]]
    tx_single_jet_test = tx_test[mask_jets_test[jet_id]]
    y_single_jet_train = y_train[mask_jets_train[jet_id]]
    
    # CALL CROSS VALIDATION FOR A SINGLE JET ON TRAIN PART, FIND BEST DEG, BEST ACCURACY ON TESTING CROSS VAL
    best_deg, best_lambda, best_accuracy_test, corresponding_accuracy_train = \
        cross_validation_ridge(y_single_jet_train, tx_single_jet_train, degrees, lambdas, k_fold, seed)
    
    # KEEP IN MEMORY THE BEST DEGREE FOR THIS JET
    best_degrees[jet_id] = best_deg
    best_lambdas[jet_id] = best_lambda
    
    # PREPROCESS FULL TRAINING AND TESTING DATA
    tx_single_jet_train_preprocessed, len_kept_data, unique_cols = \
        preprocess_data(tx_single_jet_train, [], 'before')
    tx_single_jet_test_preprocessed = preprocess_data(tx_single_jet_test, unique_cols, 'before')[0]
    
    # ADD POWERS TO THE CHOSEN COLUMNS
    len_data = tx_single_jet_train_preprocessed.shape[1]
    tx_single_jet_train_preprocessed = add_powers(tx_single_jet_train_preprocessed, range(2,best_deg+1), \
                                                  range(len_kept_data))
    tx_single_jet_test_preprocessed = add_powers(tx_single_jet_test_preprocessed, range(2,best_deg+1), \
                                                 range(len_kept_data))
    tx_single_jet_train_preprocessed[:,len_data:] = standardize(tx_single_jet_train_preprocessed[:,len_data:])[0]
    tx_single_jet_test_preprocessed[:,len_data:] = standardize(tx_single_jet_test_preprocessed[:,len_data:])[0]
    
    # COMPUTE THE BEST WEIGHTS AND FULL ACCURACY ON TRAINING FULL SET - ONE JET
    initial_w = np.zeros(tx_single_jet_train_preprocessed.shape[1])
    weights, loss = ridge_regression(y_single_jet_train, tx_single_jet_train_preprocessed, \
                                             best_lambda, fct='mse');
    
    # COMPUTE THE PREDICTIONS ON THE FULL TESTING SET - SINGLE JET
    y_predicted_single_jet_train = predict_labels(weights, tx_single_jet_train_preprocessed)
    y_predicted_single_jet_test = predict_labels(weights, tx_single_jet_test_preprocessed)
    
    # ADD THE PREDICTIONS TO y_predicted_test AND y_predicted_train
    y_predicted_train[mask_jets_train[jet_id]] = y_predicted_single_jet_train
    y_predicted_test[mask_jets_test[jet_id]] = y_predicted_single_jet_test
    
    # COMPUTE THE ACCURACY train ON JET
    accuracy_train_single_jet = np.sum(y_predicted_single_jet_train == y_single_jet_train)/len(y_single_jet_train)
    
    # PRINT ACCURACY train ON JET
    print('Accuracy full train on jet', jet_id, '=', accuracy_train_single_jet)

***** Jet  0 *****
--- Fold 0 ---
++ Degree 6 ++
>> Lambda 1e-09 <<
>> Lambda 1.46779926762e-08 <<
>> Lambda 2.15443469003e-07 <<
>> Lambda 3.16227766017e-06 <<
>> Lambda 4.64158883361e-05 <<
>> Lambda 0.000681292069058 <<
>> Lambda 0.01 <<
++ Degree 7 ++
>> Lambda 1e-09 <<
>> Lambda 1.46779926762e-08 <<
>> Lambda 2.15443469003e-07 <<
>> Lambda 3.16227766017e-06 <<
>> Lambda 4.64158883361e-05 <<
>> Lambda 0.000681292069058 <<
>> Lambda 0.01 <<
++ Degree 8 ++
>> Lambda 1e-09 <<
>> Lambda 1.46779926762e-08 <<
>> Lambda 2.15443469003e-07 <<
>> Lambda 3.16227766017e-06 <<
>> Lambda 4.64158883361e-05 <<
>> Lambda 0.000681292069058 <<
>> Lambda 0.01 <<
++ Degree 9 ++
>> Lambda 1e-09 <<
>> Lambda 1.46779926762e-08 <<
>> Lambda 2.15443469003e-07 <<
>> Lambda 3.16227766017e-06 <<
>> Lambda 4.64158883361e-05 <<
>> Lambda 0.000681292069058 <<
>> Lambda 0.01 <<
++ Degree 10 ++
>> Lambda 1e-09 <<
>> Lambda 1.46779926762e-08 <<
>> Lambda 2.15443469003e-07 <<
>> Lambda 3.16227766017e-06 <<
>> Lambda 

>> Lambda 1e-09 <<
>> Lambda 1.46779926762e-08 <<
>> Lambda 2.15443469003e-07 <<
>> Lambda 3.16227766017e-06 <<
>> Lambda 4.64158883361e-05 <<
>> Lambda 0.000681292069058 <<
>> Lambda 0.01 <<
++ Degree 10 ++
>> Lambda 1e-09 <<
>> Lambda 1.46779926762e-08 <<
>> Lambda 2.15443469003e-07 <<
>> Lambda 3.16227766017e-06 <<
>> Lambda 4.64158883361e-05 <<
>> Lambda 0.000681292069058 <<
>> Lambda 0.01 <<
--- Fold 3 ---
++ Degree 6 ++
>> Lambda 1e-09 <<
>> Lambda 1.46779926762e-08 <<
>> Lambda 2.15443469003e-07 <<
>> Lambda 3.16227766017e-06 <<
>> Lambda 4.64158883361e-05 <<
>> Lambda 0.000681292069058 <<
>> Lambda 0.01 <<
++ Degree 7 ++
>> Lambda 1e-09 <<
>> Lambda 1.46779926762e-08 <<
>> Lambda 2.15443469003e-07 <<
>> Lambda 3.16227766017e-06 <<
>> Lambda 4.64158883361e-05 <<
>> Lambda 0.000681292069058 <<
>> Lambda 0.01 <<
++ Degree 8 ++
>> Lambda 1e-09 <<
>> Lambda 1.46779926762e-08 <<
>> Lambda 2.15443469003e-07 <<
>> Lambda 3.16227766017e-06 <<
>> Lambda 4.64158883361e-05 <<
>> Lambda 0.0

>> Lambda 1e-09 <<
>> Lambda 1.46779926762e-08 <<
>> Lambda 2.15443469003e-07 <<
>> Lambda 3.16227766017e-06 <<
>> Lambda 4.64158883361e-05 <<
>> Lambda 0.000681292069058 <<
>> Lambda 0.01 <<
++ Degree 8 ++
>> Lambda 1e-09 <<
>> Lambda 1.46779926762e-08 <<
>> Lambda 2.15443469003e-07 <<
>> Lambda 3.16227766017e-06 <<
>> Lambda 4.64158883361e-05 <<
>> Lambda 0.000681292069058 <<
>> Lambda 0.01 <<
++ Degree 9 ++
>> Lambda 1e-09 <<
>> Lambda 1.46779926762e-08 <<
>> Lambda 2.15443469003e-07 <<
>> Lambda 3.16227766017e-06 <<
>> Lambda 4.64158883361e-05 <<
>> Lambda 0.000681292069058 <<
>> Lambda 0.01 <<
++ Degree 10 ++
>> Lambda 1e-09 <<
>> Lambda 1.46779926762e-08 <<
>> Lambda 2.15443469003e-07 <<
>> Lambda 3.16227766017e-06 <<
>> Lambda 4.64158883361e-05 <<
>> Lambda 0.000681292069058 <<
>> Lambda 0.01 <<
--- Fold 1 ---
++ Degree 6 ++
>> Lambda 1e-09 <<
>> Lambda 1.46779926762e-08 <<
>> Lambda 2.15443469003e-07 <<
>> Lambda 3.16227766017e-06 <<
>> Lambda 4.64158883361e-05 <<
>> Lambda 0.0

In [31]:
# CREATE CSV SUBMISSION
#create_csv_submission(ids_test, y_predicted_test, 'output/trial.csv')

# COMPUTE ACCURACY ON FULL train
total_accuracy_train = np.sum(y_predicted_train == y_train)/len(y_train)*100
print('Total accuracy train =', total_accuracy_train, 'with degrees =', best_degrees, 'and lambda =', best_lambdas)

Total accuracy train = 82.1136 with degrees = [ 10.  10.   9.  10.] and lambda = [  4.64158883e-05   1.00000000e-02   6.81292069e-04   1.00000000e-02]


In [32]:
total_accuracy_test = np.sum(y_predicted_test == y_test)/len(y_test)*100
print('Total accuracy test =', total_accuracy_test, 'with degrees =', best_degrees, 'and lambda =', best_lambdas)

Total accuracy test = 79.9717333333 with degrees = [ 10.  10.   9.  10.] and lambda = [  4.64158883e-05   1.00000000e-02   6.81292069e-04   1.00000000e-02]


In [33]:
np.sum(y_predicted_test==-1)

130484

In [34]:
np.sum(y_predicted_test==1)

57016

In [35]:
y_predicted_test[:200]

array([-1., -1., -1., -1., -1., -1.,  1.,  1., -1.,  1., -1., -1.,  1.,
       -1., -1., -1.,  1., -1., -1., -1., -1., -1., -1.,  1., -1., -1.,
       -1.,  1.,  1.,  1., -1., -1.,  1.,  1., -1., -1.,  1.,  1., -1.,
       -1., -1., -1.,  1., -1., -1., -1.,  1., -1.,  1.,  1., -1.,  1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1.,  1., -1.,  1., -1.,  1.,  1., -1.,
        1.,  1., -1., -1., -1.,  1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1.,  1.,  1., -1., -1., -1., -1., -1., -1., -1., -1.,  1.,
       -1., -1.,  1.,  1., -1., -1.,  1., -1.,  1., -1., -1.,  1., -1.,
       -1., -1.,  1.,  1., -1., -1., -1., -1.,  1., -1., -1.,  1., -1.,
       -1., -1., -1.,  1., -1.,  1., -1., -1., -1., -1., -1., -1.,  1.,
       -1.,  1., -1., -1.,  1.,  1.,  1.,  1., -1., -1., -1., -1., -1.,
       -1., -1.,  1., -1., -1., -1., -1., -1.,  1., -1.,  1.,  1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1

##  SGD

In [36]:
def cross_validation_one_fold_SGD(y_cross_val_train, y_cross_val_test, tx_cross_val_train, tx_cross_val_test, \
                                 degrees, gammas, len_kept_data, max_iters, batch_size, stdize=False):
    
    accuracies_train_by_deg = np.zeros([len(degrees), len(gammas)])
    accuracies_test_by_deg = np.zeros([len(degrees), len(gammas)])
    
    # For each degree, compute the least squares weights, the predictions and the accuracies
    previous_deg = 1
    for deg_id, deg in enumerate(degrees):
        print('++ Degree', deg, '++')
                
        # Add powers of the chosen columns
        len_data = tx_cross_val_train.shape[1]
        tx_cross_val_train = add_powers(tx_cross_val_train, range(previous_deg+1, deg+1), range(len_kept_data))
        tx_cross_val_test = add_powers(tx_cross_val_test, range(previous_deg+1, deg+1), range(len_kept_data))
        if stdize: 
            tx_cross_val_train[:,len_data:] = standardize(tx_cross_val_train[:,len_data:])[0]
            tx_cross_val_test[:,len_data:] = standardize(tx_cross_val_test[:,len_data:])[0]
                
        for gamma_id, single_gamma in enumerate(gammas):
            print('>> Gamma', single_gamma, '<<')
            
            # Compute the best weights on the training set
            initial_w = np.zeros(tx_cross_val_train.shape[1])
            weights, loss = least_squares_SGD(y_cross_val_train, tx_cross_val_train, initial_w, max_iters, \
                                             single_gamma, batch_size);

            # Compute the predictions
            y_predicted_cross_val_train = predict_labels(weights, tx_cross_val_train)
            y_predicted_cross_val_test = predict_labels(weights, tx_cross_val_test)

            # Compute the accuracies for each degree
            accuracies_train_by_deg[deg_id, gamma_id] = \
                np.sum(y_predicted_cross_val_train == y_cross_val_train)/len(y_cross_val_train)
            accuracies_test_by_deg[deg_id, gamma_id] = \
                np.sum(y_predicted_cross_val_test == y_cross_val_test)/len(y_cross_val_test)

        # Update the previous degree to the actual degree
        previous_deg = deg
        
    return accuracies_train_by_deg, accuracies_test_by_deg

In [37]:
def cross_validation_SGD(y_single_jet_train, tx_single_jet_train, degrees, gammas, k_fold, seed, max_iters, \
                        batch_size):
    
    # Get the indices so that we get the k'th subgroup in test, others in train, for each k
    k_indices = build_k_indices(y_single_jet_train, k_fold, seed)
    
    # Initialize matrix of computed accuracies for each degree and each fold
    accuracies_train_by_fold = np.zeros([len(degrees), len(gammas), k_fold])
    accuracies_test_by_fold = np.zeros([len(degrees), len(gammas), k_fold])
    
    # Preprocess training dataset
    tx_single_jet_train_preprocessed, len_kept_data, unique_cols = \
        preprocess_data(tx_single_jet_train, [], 'after')
    
    for k in range(k_fold):
        print('--- Fold', k, '---')
        # Create the testing set for this fold number
        k_index = k_indices[k] # Indices of the testing set for fold k
        y_cross_val_test = y_single_jet_train[k_index]
        tx_cross_val_test = tx_single_jet_train_preprocessed[k_index,:]
        
        # Create the training set for this fold number
        mask = np.ones(len(y_single_jet_train), dtype=bool) # set all elements to True
        mask[k_index] = False # set test elements to False
        y_cross_val_train = y_single_jet_train[mask] # select only True elements (ie train elements)
        tx_cross_val_train = tx_single_jet_train_preprocessed[mask,:]
        
        # Compute the accuracies for each degree
        accuracies_train_by_fold[:,:,k], accuracies_test_by_fold[:,:,k] = \
            cross_validation_one_fold_SGD(y_cross_val_train, y_cross_val_test, tx_cross_val_train, \
                                          tx_cross_val_test, degrees, gammas, len_kept_data, max_iters, \
                                          batch_size, True)
    
    # Compute the mean accuracies over the folds, for each degree
    mean_accuracies_train_by_deg = np.mean(accuracies_train_by_fold, axis=2)
    mean_accuracies_test_by_deg = np.mean(accuracies_test_by_fold, axis=2)
    
    # Get the index of the best accuracy in the testing set
    max_id_deg_test, max_id_gamma_test = \
        np.unravel_index(mean_accuracies_test_by_deg.argmax(), mean_accuracies_test_by_deg.shape)
    
    # Find the optimal degree and the corresponding accuracies in the training and testing sets
    best_deg = degrees[max_id_deg_test]
    best_gamma = gammas[max_id_gamma_test]
    best_accuracy_test = mean_accuracies_test_by_deg[max_id_deg_test, max_id_gamma_test]
    corresponding_accuracy_train = mean_accuracies_train_by_deg[max_id_deg_test, max_id_gamma_test]
    
    print('Best accuracy test =', best_accuracy_test, 'with degree =', best_deg, 'and gamma =', best_gamma)
    print('Corresponding accuracy train =', corresponding_accuracy_train)
    
    return best_deg, best_gamma, best_accuracy_test, corresponding_accuracy_train

In [38]:
degrees = range(3,10)
gammas = np.logspace(-6,-1,5)
k_fold = 5
seed = 1
max_iters = 300
batch_size = 1

In [39]:
mask_jets_train = split_jets_mask(tx_train)
mask_jets_test = split_jets_mask(tx_test)
len_mask = len(mask_jets_train)

y_predicted_train = np.zeros(len(y_train))
y_predicted_test = np.zeros(tx_test.shape[0])
best_degrees = np.zeros(len_mask)
best_gammas = np.zeros(len_mask)

In [40]:
for jet_id in range(len_mask):
    print('***** Jet ', jet_id, '*****')
    # SEPARATE THE WHOLE DATA SET TO GET ONLY THE PART THAT HAVE THE RIGHT NUMBER OF JETS
    tx_single_jet_train = tx_train[mask_jets_train[jet_id]]
    tx_single_jet_test = tx_test[mask_jets_test[jet_id]]
    y_single_jet_train = y_train[mask_jets_train[jet_id]]
    
    # CALL CROSS VALIDATION FOR A SINGLE JET ON TRAIN PART, FIND BEST DEG, BEST ACCURACY ON TESTING CROSS VAL
    best_deg, best_gamma, best_accuracy_test, corresponding_accuracy_train = \
        cross_validation_SGD(y_single_jet_train, tx_single_jet_train, degrees, gammas, k_fold, seed, max_iters, \
                            batch_size)
    
    # KEEP IN MEMORY THE BEST DEGREE FOR THIS JET
    best_degrees[jet_id] = best_deg
    best_gammas[jet_id] = best_gamma
    
    # PREPROCESS FULL TRAINING AND TESTING DATA
    tx_single_jet_train_preprocessed, len_kept_data, unique_cols = \
        preprocess_data(tx_single_jet_train, [], 'after')
    tx_single_jet_test_preprocessed = preprocess_data(tx_single_jet_test, unique_cols, 'after')[0]
    
    # ADD POWERS TO THE CHOSEN COLUMNS
    len_data = tx_single_jet_train_preprocessed.shape[1]
    tx_single_jet_train_preprocessed = add_powers(tx_single_jet_train_preprocessed, range(2,best_deg+1), \
                                                  range(len_kept_data))
    tx_single_jet_test_preprocessed = add_powers(tx_single_jet_test_preprocessed, range(2,best_deg+1), \
                                                 range(len_kept_data))
    tx_single_jet_train_preprocessed[:,len_data:] = standardize(tx_single_jet_train_preprocessed[:,len_data:])[0]
    tx_single_jet_test_preprocessed[:,len_data:] = standardize(tx_single_jet_test_preprocessed[:,len_data:])[0]
    
    # COMPUTE THE BEST WEIGHTS AND FULL ACCURACY ON TRAINING FULL SET - ONE JET
    initial_w = np.zeros(tx_single_jet_train_preprocessed.shape[1])
    weights, loss = least_squares_SGD(y_single_jet_train, tx_single_jet_train_preprocessed, initial_w, max_iters, \
                                             best_gamma, batch_size);
    
    # COMPUTE THE PREDICTIONS ON THE FULL TESTING SET - SINGLE JET
    y_predicted_single_jet_train = predict_labels(weights, tx_single_jet_train_preprocessed)
    y_predicted_single_jet_test = predict_labels(weights, tx_single_jet_test_preprocessed)
    
    # ADD THE PREDICTIONS TO y_predicted_test AND y_predicted_train
    y_predicted_train[mask_jets_train[jet_id]] = y_predicted_single_jet_train
    y_predicted_test[mask_jets_test[jet_id]] = y_predicted_single_jet_test
    
    # COMPUTE THE ACCURACY train ON JET
    accuracy_train_single_jet = np.sum(y_predicted_single_jet_train == y_single_jet_train)/len(y_single_jet_train)
    
    # PRINT ACCURACY train ON JET
    print('Accuracy full train on jet', jet_id, '=', accuracy_train_single_jet)

***** Jet  0 *****
--- Fold 0 ---
++ Degree 3 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 4 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 5 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 6 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 7 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 8 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 9 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
--- Fol

>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 5 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 6 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 7 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 8 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 9 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
--- Fold 4 ---
++ Degree 3 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 4 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017

>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 5 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 6 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 7 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 8 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 9 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
--- Fold 2 ---
++ Degree 3 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <<
>> Gamma 0.1 <<
++ Degree 4 ++
>> Gamma 1e-06 <<
>> Gamma 1.77827941004e-05 <<
>> Gamma 0.000316227766017 <<
>> Gamma 0.0056234132519 <

In [41]:
# CREATE CSV SUBMISSION
#create_csv_submission(ids_test, y_predicted_test, 'output/trial.csv')

# COMPUTE ACCURACY ON FULL train
total_accuracy_train = np.sum(y_predicted_train == y_train)/len(y_train)*100
print('Total accuracy train =', total_accuracy_train, 'with degrees =', best_degrees, 'and gammas =', best_gammas)

Total accuracy train = 67.7168 with degrees = [ 8.  9.  3.  4.] and gammas = [ 0.00031623  0.00031623  0.00031623  0.00031623]


In [42]:
total_accuracy_test = np.sum(y_predicted_test == y_test)/len(y_test)*100
print('Total accuracy test =', total_accuracy_test, 'with degrees =', best_degrees)

Total accuracy test = 67.8229333333 with degrees = [ 8.  9.  3.  4.]


In [43]:
np.sum(y_predicted_test==-1)

100295

In [44]:
np.sum(y_predicted_test==1)

87205

In [45]:
y_predicted_test[:200]

array([-1., -1., -1.,  1.,  1.,  1., -1.,  1., -1.,  1., -1.,  1.,  1.,
        1., -1.,  1., -1.,  1., -1., -1., -1.,  1., -1., -1., -1.,  1.,
        1.,  1.,  1.,  1., -1.,  1.,  1., -1.,  1.,  1.,  1.,  1., -1.,
       -1.,  1., -1.,  1.,  1.,  1., -1.,  1.,  1.,  1.,  1., -1.,  1.,
       -1., -1., -1.,  1., -1., -1.,  1., -1.,  1., -1., -1.,  1.,  1.,
       -1.,  1., -1.,  1., -1., -1., -1., -1.,  1., -1.,  1., -1., -1.,
        1.,  1., -1., -1.,  1.,  1.,  1., -1., -1.,  1., -1.,  1., -1.,
        1.,  1.,  1.,  1., -1.,  1., -1., -1.,  1., -1., -1., -1.,  1.,
        1.,  1.,  1.,  1., -1., -1., -1.,  1., -1., -1., -1.,  1., -1.,
       -1., -1.,  1.,  1.,  1., -1., -1., -1.,  1.,  1.,  1., -1., -1.,
        1., -1., -1., -1., -1.,  1., -1.,  1., -1.,  1.,  1., -1., -1.,
        1.,  1., -1., -1.,  1., -1.,  1., -1., -1., -1., -1.,  1., -1.,
       -1., -1., -1.,  1., -1., -1., -1., -1., -1., -1.,  1.,  1.,  1.,
        1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,  1., -1

## LOGISTIC REGRESSION

In [46]:
def cross_validation_one_fold_logreg(y_cross_val_train, y_cross_val_test, tx_cross_val_train, tx_cross_val_test, \
                                    degrees, gammas, len_kept_data, max_iters, stdize=False):
    
    accuracies_train_by_deg = np.zeros([len(degrees), len(gammas)])
    accuracies_test_by_deg = np.zeros([len(degrees), len(gammas)])
    
    # For each degree, compute the least squares weights, the predictions and the accuracies
    previous_deg = 1
    for deg_id, deg in enumerate(degrees):
        print('++ Degree', deg, '++')
                
        # Add powers of the chosen columns
        len_data = tx_cross_val_train.shape[1]
        tx_cross_val_train = add_powers(tx_cross_val_train, range(previous_deg+1, deg+1), range(len_kept_data))
        tx_cross_val_test = add_powers(tx_cross_val_test, range(previous_deg+1, deg+1), range(len_kept_data))
        if stdize: 
            tx_cross_val_train[:,len_data:] = standardize(tx_cross_val_train[:,len_data:])[0]
            tx_cross_val_test[:,len_data:] = standardize(tx_cross_val_test[:,len_data:])[0]
                
        for gamma_id, single_gamma in enumerate(gammas):
            print('>> Gamma', single_gamma, '<<')
            
            # Compute the best weights on the training set
            initial_w = np.zeros(tx_cross_val_train.shape[1])
            weights, loss = logistic_regression(y_cross_val_train, tx_cross_val_train, initial_w, max_iters, \
                                                single_gamma);

            # Compute the predictions
            y_predicted_cross_val_train = predict_labels(weights, tx_cross_val_train)
            y_predicted_cross_val_test = predict_labels(weights, tx_cross_val_test)

            # Compute the accuracies for each degree
            accuracies_train_by_deg[deg_id, gamma_id] = \
                np.sum(y_predicted_cross_val_train == y_cross_val_train)/len(y_cross_val_train)
            accuracies_test_by_deg[deg_id, gamma_id] = \
                np.sum(y_predicted_cross_val_test == y_cross_val_test)/len(y_cross_val_test)

        # Update the previous degree to the actual degree
        previous_deg = deg
        
    return accuracies_train_by_deg, accuracies_test_by_deg

In [47]:
def cross_validation_logreg(y_single_jet_train, tx_single_jet_train, degrees, gammas, k_fold, seed, max_iters):
    
    # Get the indices so that we get the k'th subgroup in test, others in train, for each k
    k_indices = build_k_indices(y_single_jet_train, k_fold, seed)
    
    # Initialize matrix of computed accuracies for each degree and each fold
    accuracies_train_by_fold = np.zeros([len(degrees), len(gammas), k_fold])
    accuracies_test_by_fold = np.zeros([len(degrees), len(gammas), k_fold])
    
    # Preprocess training dataset
    tx_single_jet_train_preprocessed, len_kept_data, unique_cols = \
        preprocess_data(tx_single_jet_train, [], 'before')
    
    for k in range(k_fold):
        print('--- Fold', k, '---')
        # Create the testing set for this fold number
        k_index = k_indices[k] # Indices of the testing set for fold k
        y_cross_val_test = y_single_jet_train[k_index]
        tx_cross_val_test = tx_single_jet_train_preprocessed[k_index,:]
        
        # Create the training set for this fold number
        mask = np.ones(len(y_single_jet_train), dtype=bool) # set all elements to True
        mask[k_index] = False # set test elements to False
        y_cross_val_train = y_single_jet_train[mask] # select only True elements (ie train elements)
        tx_cross_val_train = tx_single_jet_train_preprocessed[mask,:]
        
        # Compute the accuracies for each degree
        accuracies_train_by_fold[:,:,k], accuracies_test_by_fold[:,:,k] = \
            cross_validation_one_fold_logreg(y_cross_val_train, y_cross_val_test, tx_cross_val_train, \
                                            tx_cross_val_test, degrees, gammas, len_kept_data, max_iters, False)
    
    # Compute the mean accuracies over the folds, for each degree
    mean_accuracies_train_by_deg = np.mean(accuracies_train_by_fold, axis=2)
    mean_accuracies_test_by_deg = np.mean(accuracies_test_by_fold, axis=2)
    
    # Get the index of the best accuracy in the testing set
    max_id_deg_test, max_id_gamma_test = \
        np.unravel_index(mean_accuracies_test_by_deg.argmax(), mean_accuracies_test_by_deg.shape)
    
    # Find the optimal degree and the corresponding accuracies in the training and testing sets
    best_deg = degrees[max_id_deg_test]
    best_gamma = gammas[max_id_gamma_test]
    best_accuracy_test = mean_accuracies_test_by_deg[max_id_deg_test, max_id_gamma_test]
    corresponding_accuracy_train = mean_accuracies_train_by_deg[max_id_deg_test, max_id_gamma_test]
    
    print('Best accuracy test =', best_accuracy_test, 'with degree =', best_deg, 'and gamma =', best_gamma)
    print('Corresponding accuracy train =', corresponding_accuracy_train)
    
    return best_deg, best_gamma, best_accuracy_test, corresponding_accuracy_train

In [48]:
degrees = range(6,10)
gammas = np.logspace(-9,-2,7)
k_fold = 5
seed = 1
max_iters = 300

In [49]:
mask_jets_train = split_jets_mask(tx_train)
mask_jets_test = split_jets_mask(tx_test)
len_mask = len(mask_jets_train)

y_predicted_train = np.zeros(len(y_train))
y_predicted_test = np.zeros(tx_test.shape[0])
best_degrees = np.zeros(len_mask)
best_lambdas = np.zeros(len_mask)

In [50]:
for jet_id in range(len_mask):
    print('***** Jet ', jet_id, '*****')
    # SEPARATE THE WHOLE DATA SET TO GET ONLY THE PART THAT HAVE THE RIGHT NUMBER OF JETS
    tx_single_jet_train = tx_train[mask_jets_train[jet_id]]
    tx_single_jet_test = tx_test[mask_jets_test[jet_id]]
    y_single_jet_train = y_train[mask_jets_train[jet_id]]
    
    # CALL CROSS VALIDATION FOR A SINGLE JET ON TRAIN PART, FIND BEST DEG, BEST ACCURACY ON TESTING CROSS VAL
    best_deg, best_gamma, best_accuracy_test, corresponding_accuracy_train = \
        cross_validation_logreg(y_single_jet_train, tx_single_jet_train, degrees, gammas, k_fold, seed, max_iters)
    
    # KEEP IN MEMORY THE BEST DEGREE FOR THIS JET
    best_degrees[jet_id] = best_deg
    best_gammas[jet_id] = best_gamma
    
    # PREPROCESS FULL TRAINING AND TESTING DATA
    tx_single_jet_train_preprocessed, len_kept_data, unique_cols = \
        preprocess_data(tx_single_jet_train, [], 'before')
    tx_single_jet_test_preprocessed = preprocess_data(tx_single_jet_test, unique_cols, 'before')[0]
    
    # ADD POWERS TO THE CHOSEN COLUMNS
    len_data = tx_single_jet_train_preprocessed.shape[1]
    tx_single_jet_train_preprocessed = add_powers(tx_single_jet_train_preprocessed, range(2,best_deg+1), \
                                                  range(len_kept_data))
    tx_single_jet_test_preprocessed = add_powers(tx_single_jet_test_preprocessed, range(2,best_deg+1), \
                                                 range(len_kept_data))
    tx_single_jet_train_preprocessed[:,len_data:] = standardize(tx_single_jet_train_preprocessed[:,len_data:])[0]
    tx_single_jet_test_preprocessed[:,len_data:] = standardize(tx_single_jet_test_preprocessed[:,len_data:])[0]
    
    # COMPUTE THE BEST WEIGHTS AND FULL ACCURACY ON TRAINING FULL SET - ONE JET
    initial_w = np.zeros(tx_single_jet_train_preprocessed.shape[1])
    weights, loss = logistic_regression(y_single_jet_train, tx_single_jet_train_preprocessed, initial_w,\
                                        max_iters, best_gamma);
    
    # COMPUTE THE PREDICTIONS ON THE FULL TESTING SET - SINGLE JET
    y_predicted_single_jet_train = predict_labels(weights, tx_single_jet_train_preprocessed)
    y_predicted_single_jet_test = predict_labels(weights, tx_single_jet_test_preprocessed)
    
    # ADD THE PREDICTIONS TO y_predicted_test AND y_predicted_train
    y_predicted_train[mask_jets_train[jet_id]] = y_predicted_single_jet_train
    y_predicted_test[mask_jets_test[jet_id]] = y_predicted_single_jet_test
    
    # COMPUTE THE ACCURACY train ON JET
    accuracy_train_single_jet = np.sum(y_predicted_single_jet_train == y_single_jet_train)/len(y_single_jet_train)
    
    # PRINT ACCURACY train ON JET
    print('Accuracy full train on jet', jet_id, '=', accuracy_train_single_jet)

***** Jet  0 *****
--- Fold 0 ---
++ Degree 6 ++
>> Gamma 1e-09 <<
>> Gamma 1.46779926762e-08 <<
>> Gamma 2.15443469003e-07 <<
>> Gamma 3.16227766017e-06 <<
>> Gamma 4.64158883361e-05 <<
>> Gamma 0.000681292069058 <<
>> Gamma 0.01 <<
++ Degree 7 ++
>> Gamma 1e-09 <<
>> Gamma 1.46779926762e-08 <<
>> Gamma 2.15443469003e-07 <<
>> Gamma 3.16227766017e-06 <<
>> Gamma 4.64158883361e-05 <<
>> Gamma 0.000681292069058 <<
>> Gamma 0.01 <<
++ Degree 8 ++
>> Gamma 1e-09 <<
>> Gamma 1.46779926762e-08 <<
>> Gamma 2.15443469003e-07 <<
>> Gamma 3.16227766017e-06 <<
>> Gamma 4.64158883361e-05 <<
>> Gamma 0.000681292069058 <<
>> Gamma 0.01 <<
++ Degree 9 ++
>> Gamma 1e-09 <<
>> Gamma 1.46779926762e-08 <<
>> Gamma 2.15443469003e-07 <<
>> Gamma 3.16227766017e-06 <<
>> Gamma 4.64158883361e-05 <<
>> Gamma 0.000681292069058 <<
>> Gamma 0.01 <<
--- Fold 1 ---
++ Degree 6 ++
>> Gamma 1e-09 <<
>> Gamma 1.46779926762e-08 <<
>> Gamma 2.15443469003e-07 <<
>> Gamma 3.16227766017e-06 <<
>> Gamma 4.64158883361e-05 <

>> Gamma 2.15443469003e-07 <<
>> Gamma 3.16227766017e-06 <<
>> Gamma 4.64158883361e-05 <<
>> Gamma 0.000681292069058 <<
>> Gamma 0.01 <<
Best accuracy test = 0.644868488912 with degree = 6 and gamma = 2.15443469003e-07
Corresponding accuracy train = 0.643638004383
Accuracy full train on jet 1 = 0.66692450495
***** Jet  2 *****
--- Fold 0 ---
++ Degree 6 ++
>> Gamma 1e-09 <<
>> Gamma 1.46779926762e-08 <<
>> Gamma 2.15443469003e-07 <<
>> Gamma 3.16227766017e-06 <<
>> Gamma 4.64158883361e-05 <<
>> Gamma 0.000681292069058 <<
>> Gamma 0.01 <<
++ Degree 7 ++
>> Gamma 1e-09 <<
>> Gamma 1.46779926762e-08 <<
>> Gamma 2.15443469003e-07 <<
>> Gamma 3.16227766017e-06 <<
>> Gamma 4.64158883361e-05 <<
>> Gamma 0.000681292069058 <<
>> Gamma 0.01 <<
++ Degree 8 ++
>> Gamma 1e-09 <<
>> Gamma 1.46779926762e-08 <<
>> Gamma 2.15443469003e-07 <<
>> Gamma 3.16227766017e-06 <<
>> Gamma 4.64158883361e-05 <<
>> Gamma 0.000681292069058 <<
>> Gamma 0.01 <<
++ Degree 9 ++
>> Gamma 1e-09 <<
>> Gamma 1.46779926762e

>> Gamma 4.64158883361e-05 <<
>> Gamma 0.000681292069058 <<
>> Gamma 0.01 <<
++ Degree 8 ++
>> Gamma 1e-09 <<
>> Gamma 1.46779926762e-08 <<
>> Gamma 2.15443469003e-07 <<
>> Gamma 3.16227766017e-06 <<
>> Gamma 4.64158883361e-05 <<
>> Gamma 0.000681292069058 <<
>> Gamma 0.01 <<
++ Degree 9 ++
>> Gamma 1e-09 <<
>> Gamma 1.46779926762e-08 <<
>> Gamma 2.15443469003e-07 <<
>> Gamma 3.16227766017e-06 <<
>> Gamma 4.64158883361e-05 <<
>> Gamma 0.000681292069058 <<
>> Gamma 0.01 <<
Best accuracy test = 0.710431654676 with degree = 8 and gamma = 1e-09
Corresponding accuracy train = 0.709238031018
Accuracy full train on jet 3 = 0.701672361086


In [51]:
# CREATE CSV SUBMISSION
#create_csv_submission(ids_test, y_predicted_test, 'output/trial.csv')

# COMPUTE ACCURACY ON FULL train
total_accuracy_train = np.sum(y_predicted_train == y_train)/len(y_train)*100
print('Total accuracy train =', total_accuracy_train, 'with degrees =', best_degrees, 'and gamma =', best_gammas)

Total accuracy train = 69.496 with degrees = [ 6.  6.  9.  8.] and gamma = [  1.00000000e-09   2.15443469e-07   1.46779927e-08   1.00000000e-09]


In [52]:
total_accuracy_test = np.sum(y_predicted_test == y_test)/len(y_test)*100
print('Total accuracy test =', total_accuracy_test, 'with degrees =', best_degrees, 'and gamma =', best_gammas)

Total accuracy test = 69.4848 with degrees = [ 6.  6.  9.  8.] and gamma = [  1.00000000e-09   2.15443469e-07   1.46779927e-08   1.00000000e-09]


In [53]:
np.sum(y_predicted_test==-1)

177881

In [54]:
np.sum(y_predicted_test==1)

9619

In [55]:
y_predicted_test[:200]

array([-1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1.,  1., -1.,  1., -1., -1.,  1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1.,  1., -1., -1., -1., -1., -1., -1., -1., -1.,  1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
        1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1.,  1., -1., -1., -1., -1., -1.,  1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1.,  1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1.,  1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1.,  1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,  1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1

## LOGISTIC REGULARIZED

In [56]:
def cross_validation_one_fold_logistic_regularized(y_cross_val_train, y_cross_val_test, tx_cross_val_train, tx_cross_val_test, \
                                 degrees, gammas, lambdas, len_kept_data, max_iters, stdize=False):
    
    accuracies_train_by_deg = np.zeros([len(degrees), len(gammas),len(lambdas)])
    accuracies_test_by_deg = np.zeros([len(degrees), len(gammas),len(lambdas)])
    
    # For each degree, compute the least squares weights, the predictions and the accuracies
    previous_deg = 1
    for deg_id, deg in enumerate(degrees):
        print('++ Degree', deg, '++')
                
        # Add powers of the chosen columns
        len_data = tx_cross_val_train.shape[1]
        tx_cross_val_train = add_powers(tx_cross_val_train, range(previous_deg+1, deg+1), range(len_kept_data))
        tx_cross_val_test = add_powers(tx_cross_val_test, range(previous_deg+1, deg+1), range(len_kept_data))
        if stdize: 
            tx_cross_val_train[:,len_data:] = standardize(tx_cross_val_train[:,len_data:])[0]
            tx_cross_val_test[:,len_data:] = standardize(tx_cross_val_test[:,len_data:])[0]
                
        for gamma_id, single_gamma in enumerate(gammas):
            print('>> Gamma', single_gamma, '<<')
            
            
            for lambda_id, single_lambda in enumerate(lambdas):
                
                print('>> Lambda', single_lambda, '<<')
            # Compute the best weights on the training set
                initial_w = np.zeros(tx_cross_val_train.shape[1])
                weights, loss = reg_logistic_regression(y_cross_val_train, tx_cross_val_train, single_lambda,initial_w, max_iters, \
                                                 single_gamma);

                # Compute the predictions
                y_predicted_cross_val_train = predict_labels(weights, tx_cross_val_train)
                y_predicted_cross_val_test = predict_labels(weights, tx_cross_val_test)

                # Compute the accuracies for each degree
                accuracies_train_by_deg[deg_id, gamma_id,lambda_id] = \
                    np.sum(y_predicted_cross_val_train == y_cross_val_train)/len(y_cross_val_train)
                accuracies_test_by_deg[deg_id, gamma_id,lambda_id] = \
                    np.sum(y_predicted_cross_val_test == y_cross_val_test)/len(y_cross_val_test)

        # Update the previous degree to the actual degree
        previous_deg = deg
        
    return accuracies_train_by_deg, accuracies_test_by_deg

In [57]:
def cross_validation_logistic_regularized(y_single_jet_train, tx_single_jet_train, degrees, gammas, lambdas, k_fold, seed, max_iters):
    
    # Get the indices so that we get the k'th subgroup in test, others in train, for each k
    k_indices = build_k_indices(y_single_jet_train, k_fold, seed)
    
    # Initialize matrix of computed accuracies for each degree and each fold
    accuracies_train_by_fold = np.zeros([len(degrees), len(gammas),len(lambdas), k_fold])
    accuracies_test_by_fold = np.zeros([len(degrees), len(gammas),len(lambdas), k_fold])
    
    # Preprocess training dataset
    tx_single_jet_train_preprocessed, len_kept_data, unique_cols = \
        preprocess_data(tx_single_jet_train, [], 'after')
    
    for k in range(k_fold):
        print('--- Fold', k, '---')
        # Create the testing set for this fold number
        k_index = k_indices[k] # Indices of the testing set for fold k
        y_cross_val_test = y_single_jet_train[k_index]
        tx_cross_val_test = tx_single_jet_train_preprocessed[k_index,:]
        
        # Create the training set for this fold number
        mask = np.ones(len(y_single_jet_train), dtype=bool) # set all elements to True
        mask[k_index] = False # set test elements to False
        y_cross_val_train = y_single_jet_train[mask] # select only True elements (ie train elements)
        tx_cross_val_train = tx_single_jet_train_preprocessed[mask,:]
        
        # Compute the accuracies for each degree
        accuracies_train_by_fold[:,:,:,k], accuracies_test_by_fold[:,:,:,k] = cross_validation_one_fold_logistic_regularized\
            (y_cross_val_train, y_cross_val_test, tx_cross_val_train, tx_cross_val_test, \
                                 degrees, gammas, lambdas, len_kept_data, max_iters, True)
    
    # Compute the mean accuracies over the folds, for each degree
    mean_accuracies_train_by_deg = np.mean(accuracies_train_by_fold, axis=3)
    mean_accuracies_test_by_deg = np.mean(accuracies_test_by_fold, axis=3)
    
    # Get the index of the best accuracy in the testing set
    max_id_deg_test, max_id_gamma_test,max_id_lambda = \
        np.unravel_index(mean_accuracies_test_by_deg.argmax(), mean_accuracies_test_by_deg.shape)
    
    # Find the optimal degree and the corresponding accuracies in the training and testing sets
    best_deg = degrees[max_id_deg_test]
    best_gamma = gammas[max_id_gamma_test]
    best_lambda=lambdas[max_id_lambda]
    best_accuracy_test = mean_accuracies_test_by_deg[max_id_deg_test, max_id_gamma_test,max_id_lambda]
    corresponding_accuracy_train = mean_accuracies_train_by_deg[max_id_deg_test, max_id_gamma_test,max_id_lambda]
    
    print('Best accuracy test =', best_accuracy_test, 'with degree =', best_deg)
    print('Corresponding accuracy train =', corresponding_accuracy_train)
    
    return best_deg, best_gamma, best_lambda, best_accuracy_test, corresponding_accuracy_train                        


In [58]:
degrees = range(6,11)
gammas = np.logspace(-8,-2,6)
lambdas = np.logspace(-8,-2,6)

k_fold = 5
seed = 1
max_iters = 300

In [None]:
mask_jets_train = split_jets_mask(tx_train)
mask_jets_test = split_jets_mask(tx_test)
len_mask = len(mask_jets_train)

y_predicted_train = np.zeros(len(y_train))
y_predicted_test = np.zeros(tx_test.shape[0])
best_degrees = np.zeros(len_mask)
best_gammas = np.zeros(len_mask)
best_lambdas=np.zeros(len_mask)

In [None]:
for jet_id in range(len_mask):
    print('** Jet ', jet_id, '**')
    # SEPARATE THE WHOLE DATA SET TO GET ONLY THE PART THAT HAVE THE RIGHT NUMBER OF JETS
    tx_single_jet_train = tx_train[mask_jets_train[jet_id]]
    tx_single_jet_test = tx_test[mask_jets_test[jet_id]]
    y_single_jet_train = y_train[mask_jets_train[jet_id]]
    
    # CALL CROSS VALIDATION FOR A SINGLE JET ON TRAIN PART, FIND BEST DEG, BEST ACCURACY ON TESTING CROSS VAL
    best_deg, best_gamma, best_lambda, best_accuracy_test, corresponding_accuracy_train = \
        cross_validation_logistic_regularized(y_single_jet_train, tx_single_jet_train, degrees, gammas, lambdas, k_fold, seed, max_iters)
    
    # KEEP IN MEMORY THE BEST DEGREE FOR THIS JET
    best_degrees[jet_id] = best_deg
    best_gammas[jet_id] = best_gamma
    best_lambdas[jet_id]=best_lambda
    # PREPROCESS FULL TRAINING AND TESTING DATA
    tx_single_jet_train_preprocessed, len_kept_data, unique_cols = \
        preprocess_data(tx_single_jet_train, [], 'after')
    tx_single_jet_test_preprocessed = preprocess_data(tx_single_jet_test, unique_cols, 'after')[0]
    
    # ADD POWERS TO THE CHOSEN COLUMNS
    len_data = tx_single_jet_train_preprocessed.shape[1]
    tx_single_jet_train_preprocessed = add_powers(tx_single_jet_train_preprocessed, range(2,best_deg+1), \
                                                  range(len_kept_data))
    tx_single_jet_test_preprocessed = add_powers(tx_single_jet_test_preprocessed, range(2,best_deg+1), \
                                                 range(len_kept_data))
    tx_single_jet_train_preprocessed[:,len_data:] = standardize(tx_single_jet_train_preprocessed[:,len_data:])[0]
    tx_single_jet_test_preprocessed[:,len_data:] = standardize(tx_single_jet_test_preprocessed[:,len_data:])[0]
    
    # COMPUTE THE BEST WEIGHTS AND FULL ACCURACY ON TRAINING FULL SET - ONE JET
    initial_w = np.zeros(tx_single_jet_train_preprocessed.shape[1])
    weights, loss = reg_logistic_regression(y_single_jet_train, tx_single_jet_train_preprocessed , best_lambda, initial_w, max_iters, best_gamma)
    
    # COMPUTE THE PREDICTIONS ON THE FULL TESTING SET - SINGLE JET
    y_predicted_single_jet_train = predict_labels(weights, tx_single_jet_train_preprocessed)
    y_predicted_single_jet_test = predict_labels(weights, tx_single_jet_test_preprocessed)
    
    # ADD THE PREDICTIONS TO y_predicted_test AND y_predicted_train
    y_predicted_train[mask_jets_train[jet_id]] = y_predicted_single_jet_train
    y_predicted_test[mask_jets_test[jet_id]] = y_predicted_single_jet_test
    
    # COMPUTE THE ACCURACY train ON JET
    accuracy_train_single_jet = np.sum(y_predicted_single_jet_train == y_single_jet_train)/len(y_single_jet_train)
    
    # PRINT ACCURACY train ON JET
    print('Accuracy full train on jet', jet_id, '=', accuracy_train_single_jet)

** Jet  0 **
--- Fold 0 ---
++ Degree 6 ++
>> Gamma 1e-08 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 1.58489319246e-07 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 2.51188643151e-06 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 3.98107170553e-05 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 0.00063095734448 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 0.01 <<
>> 

>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 2.51188643151e-06 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 3.98107170553e-05 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 0.00063095734448 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 0.01 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
++ Degree 9 ++
>> Gamma 1e-08 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<

>> Lambda 0.01 <<
>> Gamma 3.98107170553e-05 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 0.00063095734448 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 0.01 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
--- Fold 3 ---
++ Degree 6 ++
>> Gamma 1e-08 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 1.58489319246e-07 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 2.51188643151e-06 <

>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 0.01 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
++ Degree 8 ++
>> Gamma 1e-08 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 1.58489319246e-07 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 2.51188643151e-06 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 3.98107170553e-05 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <

>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
++ Degree 10 ++
>> Gamma 1e-08 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 1.58489319246e-07 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 2.51188643151e-06 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 3.98107170553e-05 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 0.00063095734448 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.5118

>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 1.58489319246e-07 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 2.51188643151e-06 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 3.98107170553e-05 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 0.00063095734448 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 0.01 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
++ Degree 8 ++
>

>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 3.98107170553e-05 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 0.00063095734448 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 0.01 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
++ Degree 10 ++
>> Gamma 1e-08 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 1.58489319246e-07 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <

>> Lambda 0.01 <<
>> Gamma 3.98107170553e-05 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 0.00063095734448 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<
>> Lambda 2.51188643151e-06 <<
>> Lambda 3.98107170553e-05 <<
>> Lambda 0.00063095734448 <<
>> Lambda 0.01 <<
>> Gamma 0.01 <<
>> Lambda 1e-08 <<
>> Lambda 1.58489319246e-07 <<


In [None]:
# CREATE CSV SUBMISSION
#create_csv_submission(ids_test, y_predicted_test, 'output/trial.csv')

# COMPUTE ACCURACY ON FULL train
total_accuracy_train = np.sum(y_predicted_train == y_train)/len(y_train)*100
print('Total accuracy train =', total_accuracy_train, 'with degrees =', best_degrees, \
      ', gammas =', best_gammas, 'and lambdas =', best_lambdas)

In [None]:
total_accuracy_test = np.sum(y_predicted_test == y_test)/len(y_test)*100
print('Total accuracy test =', total_accuracy_test, 'with degrees =', best_degrees)

In [None]:
np.sum(y_predicted_test==-1)

In [None]:
np.sum(y_predicted_test==1)

In [None]:
y_predicted_test[:200]