# Comparing Different Models

$\lambda=1$, $k=5$, $\text{degree}=9$

Using all feature engineering techniques.

In [1]:
%load_ext autoreload 
%autoreload 2

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from implementations import *
from utils.helpers import *
from utils.prediction import *
from utils.preprocess import *
from utils.cross_validation import *

In [3]:
TRAIN_PATH = './data/train.csv'
TEST_PATH = './data/test.csv'

In [4]:
lambda_ = 0
degree = 9
learning_rate = 0.1
max_iter = 2000
k_fold = 5
seed = 20221031
batch_size = 1

In [5]:
y_raw_tr, tx_raw_tr, ids_tr = load_csv_data(TRAIN_PATH)
_, tx_raw_te, ids_te = load_csv_data(TEST_PATH)

In [6]:
y_tr = process_y(y_raw_tr)
tx_tr = tx_raw_tr
tx_te = tx_raw_te
print(y_tr.shape)
print(tx_tr.shape)
print(tx_te.shape)

(250000, 1)
(250000, 30)
(568238, 30)


In [7]:
tx_tr[:, [22, 29]] = tx_tr[:, [29, 22]]
tx_te[:, [22, 29]] = tx_te[:, [29, 22]]
tx_tr[tx_tr[:, 0] == -999, 0] = np.nan
tx_te[tx_te[:, 0] == -999, 0] = np.nan

In [8]:
median = np.nanmedian(np.hstack((tx_tr[:, 0], tx_te[:, 0])))
tx_tr[np.isnan(tx_tr[:, 0]), 0] = median
tx_te[np.isnan(tx_te[:, 0]), 0] = median

In [9]:
tx_tr

array([[ 138.47 ,   51.655,   97.827, ...,    1.24 ,   -2.475,    2.   ],
       [ 160.937,   68.768,  103.235, ..., -999.   , -999.   ,    1.   ],
       [ 112.501,  162.172,  125.953, ..., -999.   , -999.   ,    1.   ],
       ...,
       [ 105.457,   60.526,   75.839, ..., -999.   , -999.   ,    1.   ],
       [  94.951,   19.362,   68.812, ..., -999.   , -999.   ,    0.   ],
       [ 112.501,   72.756,   70.831, ..., -999.   , -999.   ,    0.   ]])

In [10]:
# cross validation
k_indices = build_k_indices(y_tr, k_fold, seed)
tx_tr, tx_dev, y_tr, y_dev = cross_validation_dataset(y_tr, tx_tr, k_indices, k=k_fold-1)
print(tx_tr.shape)
print(tx_dev.shape)
print(y_tr.shape)
print(y_dev.shape)

(200000, 30)
(50000, 30)
(200000, 1)
(50000, 1)


In [11]:
# split datasets to different jet nums
# and remove columns with missing values for each jet num
tx_train_list, y_tr_list = split_jet_num(tx_tr, y_tr)
tx_dev_list, y_dev_list = split_jet_num(tx_dev, y_dev)

In [12]:
# remove outliers
means = []
stds = []
for i in range(3):
    mean = np.mean(tx_train_list[i], axis=0)
    std = np.std(tx_train_list[i], axis=0)
    tx_train_list[i] = np.clip(tx_train_list[i], mean-2*std, mean+2*std)
    tx_dev_list[i] = np.clip(tx_dev_list[i], mean-2*std, mean+2*std)
    means.append(mean)
    stds.append(std)

In [13]:
# add polynomial features
for i in range(3):
    tx_train_list[i] = build_poly(tx_train_list[i], degree)
    tx_dev_list[i] = build_poly(tx_dev_list[i], degree)

In [14]:
for i in range(3):
    print(tx_train_list[i].shape, tx_dev_list[i].shape)

(79917, 162) (19996, 162)
(62257, 198) (15287, 198)
(57826, 261) (14717, 261)


In [15]:
maxs = [0, 0, 0]
mins = [0, 0, 0]
for i in range(3):
    tx_train_list[i], tx_dev_list[i], maxs[i], mins[i] = normalization(
        tx_train_list[i],
        tx_dev_list[i]
    )

## Least Square

In [16]:
def ridge_regression_plot(y_tr, tx_tr, y_dev, tx_dev, lambda_):
    """Ridge regression using normal equations.
    Args:
        y: numpy array of shape (N, 1), N is the number of samples.
        tx: numpy array of shape (N, D), D is the number of features.
        lambda_: scalar.

    Returns:
        w: optimal weights, numpy array of shape(D, 1), D is the number of features.
        loss: scalar
    """
    N, D = tx_tr.shape
    I = np.eye(D)
    w = np.linalg.solve(tx_tr.T @ tx_tr + 2 * N * lambda_ * I, tx_tr.T @ y_tr).reshape(-1, 1)
    train_loss = compute_mse(y_tr, tx_tr, w)
    dev_loss = compute_mse(y_dev, tx_dev, w)

    return w, train_loss, dev_loss

In [17]:
train_losses = []
dev_losses = []
ws = []
y_tr_pred, y_tr_true = np.empty((0, 1)), np.empty((0, 1))
y_dev_pred, y_dev_true = np.empty((0, 1)), np.empty((0, 1))

for i in range(len(tx_train_list)):
    train_losses = []
    dev_losses = []
    w_list = []
    lambda_list = [0]

    y_tr = y_tr_list[i]
    tx_tr_fe = tx_train_list[i]
    y_dev = y_dev_list[i]
    tx_dev_fe = tx_dev_list[i]

    for lambda_ in lambda_list:
        w, train_loss, dev_loss = ridge_regression_plot(
            y_tr, tx_tr_fe,
            y_dev, tx_dev_fe,
            lambda_,
        )
        train_losses.append(train_loss)
        dev_losses.append(dev_loss)
        w_list.append(w)

    # cross_validation_visualization(lambda_list, train_losses, dev_losses, i)
    index = np.argmin(dev_losses)
    best_lambda = lambda_list[index]
    best_w = w_list[index]
    print("The best lambda for PRI_JET_NUM = {} is {}.".format(i, best_lambda))

    y_tr_pred = np.vstack((y_tr_pred, predict_linear(tx_tr_fe, best_w)))
    y_dev_pred = np.vstack((y_dev_pred, predict_linear(tx_dev_fe, best_w)))
    y_tr_true = np.vstack((y_tr_true, y_tr))
    y_dev_true = np.vstack((y_dev_true, y_dev))
    ws.append(best_w)

The best lambda for PRI_JET_NUM = 0 is 0.
The best lambda for PRI_JET_NUM = 1 is 0.
The best lambda for PRI_JET_NUM = 2 is 0.


In [18]:
accuracy, precision, recall, f1_score = compute_metrics(y_tr_true, y_tr_pred)
print("Training")
print(accuracy, precision, recall, f1_score)

accuracy, precision, recall, f1_score = compute_metrics(y_dev_true, y_dev_pred)
print("Validation")
print(accuracy, precision, recall, f1_score)

Training
0.8308 0.7798631790744467 0.7061198315188084 0.7411617127384541
Validation
0.82938 0.7735764267830776 0.7065791016770259 0.7385614906070913


## Ridge Regression

In [19]:
train_losses = []
dev_losses = []
ws = []
y_tr_pred, y_tr_true = np.empty((0, 1)), np.empty((0, 1))
y_dev_pred, y_dev_true = np.empty((0, 1)), np.empty((0, 1))

for i in range(len(tx_train_list)):
    train_losses = []
    dev_losses = []
    w_list = []
    lambda_list = [1e-8]

    y_tr = y_tr_list[i]
    tx_tr_fe = tx_train_list[i]
    y_dev = y_dev_list[i]
    tx_dev_fe = tx_dev_list[i]

    for lambda_ in lambda_list:
        w, train_loss, dev_loss = ridge_regression_plot(
            y_tr, tx_tr_fe,
            y_dev, tx_dev_fe,
            lambda_,
        )
        train_losses.append(train_loss)
        dev_losses.append(dev_loss)
        w_list.append(w)

    # cross_validation_visualization(lambda_list, train_losses, dev_losses, i)
    index = np.argmin(dev_losses)
    best_lambda = lambda_list[index]
    best_w = w_list[index]
    print("The best lambda for PRI_JET_NUM = {} is {}.".format(i, best_lambda))

    y_tr_pred = np.vstack((y_tr_pred, predict_linear(tx_tr_fe, best_w)))
    y_dev_pred = np.vstack((y_dev_pred, predict_linear(tx_dev_fe, best_w)))
    y_tr_true = np.vstack((y_tr_true, y_tr))
    y_dev_true = np.vstack((y_dev_true, y_dev))
    ws.append(best_w)

The best lambda for PRI_JET_NUM = 0 is 1e-08.
The best lambda for PRI_JET_NUM = 1 is 1e-08.
The best lambda for PRI_JET_NUM = 2 is 1e-08.


In [20]:
accuracy, precision, recall, f1_score = compute_metrics(y_tr_true, y_tr_pred)
print("Training")
print(accuracy, precision, recall, f1_score)

accuracy, precision, recall, f1_score = compute_metrics(y_dev_true, y_dev_pred)
print("Validation")
print(accuracy, precision, recall, f1_score)

Training
0.83079 0.7790923538760375 0.7073295148149767 0.741478618245153
Validation
0.83004 0.7738445781590065 0.7088659552011258 0.7399314481576692


## Linear Regression GD

In [21]:
def mean_squared_error_sgd(y_tr, tx_tr, y_dev, tx_dev, initial_w, max_iters, gamma, batch_size=1):
    """Linear regression using stochastic gradient descent.

    Args:
        y: numpy array of shape=(N, 1)
        tx: numpy array of shape=(N, D)
        initial_w: numpy array of shape=(D, 1). The initial guess (or the initialization) for the model parameters
        max_iters: a scalar denoting the total number of iterations of SGD
        gamma: a scalar denoting the stepsize
        batch_size: default 1, a scalar denoting the batch size

    Returns:
        w: the last weight vector of shape (D, 1)
        loss: the corresponding mse loss
    """

    # Define parameters to store w and loss
    w = initial_w
    train_loss = compute_mse(y_tr, tx_tr, w)
    ws = [initial_w]
    train_losses = [train_loss]

    for n_iter in range(max_iters):
        # implement stochastic gradient descent.
        for y_batch, tx_batch in batch_iter(y_tr, tx_tr, batch_size=batch_size, num_batches=1):

            # compute gradient
            grad = linear_reg_gradient(y_batch, tx_batch, w)

            # update w by gradient
            w = w - gamma * grad

            # compute loss
            train_loss = compute_mse(y_tr, tx_tr, w)
            dev_loss = compute_mse(y_dev, tx_dev, w)

            # store w and loss
            ws.append(w)
            train_losses.append(train_loss)
            dev_losses.append(dev_loss)

    index = np.argmin(dev_losses)
    return ws[index], train_losses[index], dev_losses[index]

In [22]:
train_losses = []
dev_losses = []
ws = []
y_tr_pred, y_tr_true = np.empty((0, 1)), np.empty((0, 1))
y_dev_pred, y_dev_true = np.empty((0, 1)), np.empty((0, 1))

for i in range(len(tx_train_list)):

    y_tr = y_tr_list[i]
    tx_tr_fe = tx_train_list[i]
    y_dev = y_dev_list[i]
    tx_dev_fe = tx_dev_list[i]
    initial_w = np.random.rand(tx_tr_fe.shape[1], 1)

    best_w, train_loss, dev_loss = mean_squared_error_sgd(
        y_tr, tx_tr_fe,
        y_dev, tx_dev_fe,
        initial_w,
        max_iter,
        learning_rate,
        batch_size=tx_tr_fe.shape[0]
    )

    y_tr_pred = np.vstack((y_tr_pred, predict_linear(tx_tr_fe, best_w)))
    y_dev_pred = np.vstack((y_dev_pred, predict_linear(tx_dev_fe, best_w)))
    y_tr_true = np.vstack((y_tr_true, y_tr))
    y_dev_true = np.vstack((y_dev_true, y_dev))
    ws.append(best_w)

  loss = 1 / (2 * N) * e.T @ e


In [23]:
accuracy, precision, recall, f1_score = compute_metrics(y_tr_true, y_tr_pred)
print("Training")
print(accuracy, precision, recall, f1_score)

accuracy, precision, recall, f1_score = compute_metrics(y_dev_true, y_dev_pred)
print("Validation")
print(accuracy, precision, recall, f1_score)

Training
0.7357 0.7266031819097212 0.36809350997624357 0.4886429594087373
Validation
0.73474 0.7226071638285378 0.3607951213791486 0.4812859321835035


## Linear Regression SGD

In [24]:
train_losses = []
dev_losses = []
ws = []
y_tr_pred, y_tr_true = np.empty((0, 1)), np.empty((0, 1))
y_dev_pred, y_dev_true = np.empty((0, 1)), np.empty((0, 1))

for i in range(len(tx_train_list)):

    y_tr = y_tr_list[i]
    tx_tr_fe = tx_train_list[i]
    y_dev = y_dev_list[i]
    tx_dev_fe = tx_dev_list[i]
    initial_w = np.random.rand(tx_tr_fe.shape[1], 1)

    best_w, train_loss, dev_loss = mean_squared_error_sgd(
        y_tr, tx_tr_fe,
        y_dev, tx_dev_fe,
        initial_w,
        max_iter,
        learning_rate,
        batch_size=1
    )

    y_tr_pred = np.vstack((y_tr_pred, predict_linear(tx_tr_fe, best_w)))
    y_dev_pred = np.vstack((y_dev_pred, predict_linear(tx_dev_fe, best_w)))
    y_tr_true = np.vstack((y_tr_true, y_tr))
    y_dev_true = np.vstack((y_dev_true, y_dev))
    ws.append(best_w)

In [25]:
accuracy, precision, recall, f1_score = compute_metrics(y_tr_true, y_tr_pred)
print("Training")
print(accuracy, precision, recall, f1_score)

accuracy, precision, recall, f1_score = compute_metrics(y_dev_true, y_dev_pred)
print("Validation")
print(accuracy, precision, recall, f1_score)

Training
0.46158 0.30450619940157514 0.4434873857723755 0.3610850707835436
Validation
0.46172 0.3014178683638121 0.43878269027794065 0.3573543457497612


## Logistic Regression GD

In [26]:
def reg_logistic_regression_plot(y_tr, tx_tr, y_dev, tx_dev, lambda_, initial_w, max_iters, gamma, batch_size=8):
    """Regularized logistic regression using gradient descent
    or SGD (y ∈ {0, 1}, with regularization term λ|w|2)

    Args:
        y_tr: numpy array of shape=(N_tr, 1)
        tx_tr: numpy array of shape=(N_tr, D)
        y_dev: numpy array of shape=(N_dev, 1)
        tx_dev: numpy array of shape=(N_dev, D)
        lambda_: a scalar denoting the regularization term
        initial_w: numpy array of shape=(D, 1). The initial guess (or the initialization) for the model parameters
        max_iters: a scalar denoting the total number of iterations of SGD
        gamma: a scalar denoting the stepsize
        batch_size: mini batch size. default 8.
        optimizer: 'gd' (batch sgd), 'ada' (adagrad), and 'adam'. default 'gd'.

    Returns:
        w: the best weight vector of shape (D, 1) for validation
        train_loss: the corresponding mse loss
        dev_loss: the corresponding mse loss
    """

    # Define parameters to store w and loss
    w = initial_w
    ws = [initial_w]
    train_losses = [compute_ce(y_tr, tx_tr, w)]
    dev_losses = [compute_ce(y_dev, tx_dev, w)]

    for n_iter in range(max_iters):
        for y_batch, tx_batch in batch_iter(
            y_tr, tx_tr, batch_size=batch_size, num_batches=1
        ):
            # compute gradient
            grad = logistic_reg_gradient(y_batch, tx_batch, w)

            # update w by gradient
            w = w - gamma * (grad + 2 * lambda_ * w)

            # compute loss
            loss = compute_ce(y_tr, tx_tr, w)

            # store w and loss
            ws.append(w)
            train_losses.append(loss)

            # compute dev loss
            dev_losses.append(compute_ce(y_dev, tx_dev, w))
    
    index = np.argmin(dev_losses)
    return ws[index], train_losses[index], dev_losses[index]

In [27]:
train_losses = []
dev_losses = []
ws = []
y_tr_pred, y_tr_true = np.empty((0, 1)), np.empty((0, 1))
y_dev_pred, y_dev_true = np.empty((0, 1)), np.empty((0, 1))

for i in range(len(tx_train_list)):
    train_losses = []
    dev_losses = []
    w_list = []
    lambda_list = [0]

    y_tr = y_tr_list[i]
    tx_tr_fe = tx_train_list[i]
    y_dev = y_dev_list[i]
    tx_dev_fe = tx_dev_list[i]

    for lambda_ in lambda_list:
        initial_w = np.random.rand(tx_tr_fe.shape[1], 1)
        w, train_loss, dev_loss = reg_logistic_regression_plot(
            y_tr, tx_tr_fe,
            y_dev, tx_dev_fe,
            lambda_,
            initial_w,
            max_iter,
            learning_rate,
            batch_size=tx_tr_fe.shape[0],
        )
        train_losses.append(train_loss)
        dev_losses.append(dev_loss)
        w_list.append(w)

    index = np.argmin(dev_losses)
    best_lambda = lambda_list[index]
    best_w = w_list[index]
    print("The best lambda for PRI_JET_NUM = {} is {}.".format(i, best_lambda))

    y_tr_pred = np.vstack((y_tr_pred, predict_logistic(tx_tr_fe, best_w)))
    y_dev_pred = np.vstack((y_dev_pred, predict_logistic(tx_dev_fe, best_w)))
    y_tr_true = np.vstack((y_tr_true, y_tr))
    y_dev_true = np.vstack((y_dev_true, y_dev))
    ws.append(best_w)

The best lambda for PRI_JET_NUM = 0 is 0.
The best lambda for PRI_JET_NUM = 1 is 0.
The best lambda for PRI_JET_NUM = 2 is 0.


In [28]:
accuracy, precision, recall, f1_score = compute_metrics(y_tr_true, y_tr_pred)
print("Training")
print(accuracy, precision, recall, f1_score)

accuracy, precision, recall, f1_score = compute_metrics(y_dev_true, y_dev_pred)
print("Validation")
print(accuracy, precision, recall, f1_score)

Training
0.80098 0.7423571969378312 0.6430559806450673 0.6891478195676621
Validation
0.8014 0.7382289994649546 0.6472381845901255 0.6897456726863713


## Logistic Regression SGD

In [29]:
train_losses = []
dev_losses = []
ws = []
y_tr_pred, y_tr_true = np.empty((0, 1)), np.empty((0, 1))
y_dev_pred, y_dev_true = np.empty((0, 1)), np.empty((0, 1))

for i in range(len(tx_train_list)):
    train_losses = []
    dev_losses = []
    w_list = []
    lambda_list = [0]

    y_tr = y_tr_list[i]
    tx_tr_fe = tx_train_list[i]
    y_dev = y_dev_list[i]
    tx_dev_fe = tx_dev_list[i]

    for lambda_ in lambda_list:
        initial_w = np.random.rand(tx_tr_fe.shape[1], 1)
        w, train_loss, dev_loss = reg_logistic_regression_plot(
            y_tr, tx_tr_fe,
            y_dev, tx_dev_fe,
            lambda_,
            initial_w,
            max_iter,
            learning_rate,
            batch_size=1,
        )
        train_losses.append(train_loss)
        dev_losses.append(dev_loss)
        w_list.append(w)

    index = np.argmin(dev_losses)
    best_lambda = lambda_list[index]
    best_w = w_list[index]
    print("The best lambda for PRI_JET_NUM = {} is {}.".format(i, best_lambda))

    y_tr_pred = np.vstack((y_tr_pred, predict_logistic(tx_tr_fe, best_w)))
    y_dev_pred = np.vstack((y_dev_pred, predict_logistic(tx_dev_fe, best_w)))
    y_tr_true = np.vstack((y_tr_true, y_tr))
    y_dev_true = np.vstack((y_dev_true, y_dev))
    ws.append(best_w)

The best lambda for PRI_JET_NUM = 0 is 0.
The best lambda for PRI_JET_NUM = 1 is 0.
The best lambda for PRI_JET_NUM = 2 is 0.


In [30]:
accuracy, precision, recall, f1_score = compute_metrics(y_tr_true, y_tr_pred)
print("Training")
print(accuracy, precision, recall, f1_score)

accuracy, precision, recall, f1_score = compute_metrics(y_dev_true, y_dev_pred)
print("Validation")
print(accuracy, precision, recall, f1_score)

Training
0.78109 0.6982894925973839 0.6372261816273884 0.6663618490238216
Validation
0.78104 0.6950051098620337 0.6380321332238771 0.6653011311525527


## Penalized Logistic Regression GD

In [31]:
train_losses = []
dev_losses = []
ws = []
y_tr_pred, y_tr_true = np.empty((0, 1)), np.empty((0, 1))
y_dev_pred, y_dev_true = np.empty((0, 1)), np.empty((0, 1))

for i in range(len(tx_train_list)):
    train_losses = []
    dev_losses = []
    w_list = []
    lambda_list = np.logspace(-10, 1, 12)

    y_tr = y_tr_list[i]
    tx_tr_fe = tx_train_list[i]
    y_dev = y_dev_list[i]
    tx_dev_fe = tx_dev_list[i]

    for lambda_ in lambda_list:
        initial_w = np.random.rand(tx_tr_fe.shape[1], 1)
        w, train_loss, dev_loss = reg_logistic_regression_plot(
            y_tr, tx_tr_fe,
            y_dev, tx_dev_fe,
            lambda_,
            initial_w,
            max_iter,
            learning_rate,
            batch_size=tx_tr_fe.shape[0],
        )
        train_losses.append(train_loss)
        dev_losses.append(dev_loss)
        w_list.append(w)

    index = np.argmin(dev_losses)
    best_lambda = lambda_list[index]
    best_w = w_list[index]
    print("The best lambda for PRI_JET_NUM = {} is {}.".format(i, best_lambda))

    y_tr_pred = np.vstack((y_tr_pred, predict_logistic(tx_tr_fe, best_w)))
    y_dev_pred = np.vstack((y_dev_pred, predict_logistic(tx_dev_fe, best_w)))
    y_tr_true = np.vstack((y_tr_true, y_tr))
    y_dev_true = np.vstack((y_dev_true, y_dev))
    ws.append(best_w)

The best lambda for PRI_JET_NUM = 0 is 1e-09.
The best lambda for PRI_JET_NUM = 1 is 0.0001.
The best lambda for PRI_JET_NUM = 2 is 1e-08.


In [32]:
accuracy, precision, recall, f1_score = compute_metrics(y_tr_true, y_tr_pred)
print("Training")
print(accuracy, precision, recall, f1_score)

accuracy, precision, recall, f1_score = compute_metrics(y_dev_true, y_dev_pred)
print("Validation")
print(accuracy, precision, recall, f1_score)

Training
0.801615 0.7432005379055303 0.6443822599215892 0.6902726712098858
Validation
0.8014 0.7394460876579726 0.6450099683358743 0.689007203257125


## Penalized Logistic Regression SGD

In [33]:
train_losses = []
dev_losses = []
ws = []
y_tr_pred, y_tr_true = np.empty((0, 1)), np.empty((0, 1))
y_dev_pred, y_dev_true = np.empty((0, 1)), np.empty((0, 1))

for i in range(len(tx_train_list)):
    train_losses = []
    dev_losses = []
    w_list = []
    lambda_list = np.logspace(-10, 1, 12)

    y_tr = y_tr_list[i]
    tx_tr_fe = tx_train_list[i]
    y_dev = y_dev_list[i]
    tx_dev_fe = tx_dev_list[i]

    for lambda_ in lambda_list:
        initial_w = np.random.rand(tx_tr_fe.shape[1], 1)
        w, train_loss, dev_loss = reg_logistic_regression_plot(
            y_tr, tx_tr_fe,
            y_dev, tx_dev_fe,
            lambda_,
            initial_w,
            max_iter,
            learning_rate,
            batch_size=1,
        )
        train_losses.append(train_loss)
        dev_losses.append(dev_loss)
        w_list.append(w)

    index = np.argmin(dev_losses)
    best_lambda = lambda_list[index]
    best_w = w_list[index]
    print("The best lambda for PRI_JET_NUM = {} is {}.".format(i, best_lambda))

    y_tr_pred = np.vstack((y_tr_pred, predict_logistic(tx_tr_fe, best_w)))
    y_dev_pred = np.vstack((y_dev_pred, predict_logistic(tx_dev_fe, best_w)))
    y_tr_true = np.vstack((y_tr_true, y_tr))
    y_dev_true = np.vstack((y_dev_true, y_dev))
    ws.append(best_w)

The best lambda for PRI_JET_NUM = 0 is 1e-09.
The best lambda for PRI_JET_NUM = 1 is 1e-07.
The best lambda for PRI_JET_NUM = 2 is 1e-07.


In [34]:
accuracy, precision, recall, f1_score = compute_metrics(y_tr_true, y_tr_pred)
print("Training")
print(accuracy, precision, recall, f1_score)

accuracy, precision, recall, f1_score = compute_metrics(y_dev_true, y_dev_pred)
print("Validation")
print(accuracy, precision, recall, f1_score)

Training
0.785035 0.7249183551638164 0.6017372801072683 0.6576090851895802
Validation
0.78552 0.7213596307175829 0.6047848012196552 0.6579484562388365
