# Cross validation

In [1]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [3]:
# Import methods from scripts/
from path import (add_src_to_path, create_out_dir, extract_archives,
                  DATA_TRAIN_PATH)
from proj1_helpers import load_csv_data

# Add src to path to import implementations
add_src_to_path()

# Import functions from src/
from cross_validation import get_best_degree
from print_utils import print_shapes, print_shapes_by_jet
from split_data import split_by_jet, split_train_test

In [4]:
# Extract archives if needed
extract_archives()

# Create output directory if needed
create_out_dir()

In [5]:
# Load the data
y, x, ids = load_csv_data(DATA_TRAIN_PATH, sub_sample=False, label_b=0)
print_shapes(y, x)

Shape of y: (250000,)
Shape of x: (250000, 30)


In [6]:
# Split data
x_tr, x_te, y_tr, y_te = split_train_test(y, x)
print('Training data:')
print_shapes(y_tr, x_tr)
print('Test data:')
print_shapes(y_te, x_te)

Training data:
Shape of y: (200000,)
Shape of x: (200000, 30)
Test data:
Shape of y: (50000,)
Shape of x: (50000, 30)


In [7]:
# Split train data by jet
y_tr_by_jet, x_tr_by_jet = split_by_jet(y_tr, x_tr)
print_shapes_by_jet(y_tr_by_jet, x_tr_by_jet)

Jet = 0:
Shape of y: (79923,)
Shape of x: (79923, 18)
Jet = 1:
Shape of y: (61985,)
Shape of x: (61985, 22)
Jet = 2:
Shape of y: (40333,)
Shape of x: (40333, 29)
Jet = 3:
Shape of y: (17759,)
Shape of x: (17759, 29)


In [8]:
# Split test data by jet
y_te_by_jet, x_te_by_jet = split_by_jet(y_te, x_te)
print_shapes_by_jet(y_te_by_jet, x_te_by_jet)

Jet = 0:
Shape of y: (19990,)
Shape of x: (19990, 18)
Jet = 1:
Shape of y: (15559,)
Shape of x: (15559, 22)
Jet = 2:
Shape of y: (10046,)
Shape of x: (10046, 29)
Jet = 3:
Shape of y: (4405,)
Shape of x: (4405, 29)


In [10]:
# Cross validation for degree by jet
gamma = 1e-10
degrees = range(8, 10)
for i, x_tr_jet, y_tr_jet in zip(
    range(len(x_tr_by_jet)), x_tr_by_jet, y_tr_by_jet):
    print(f'Jet = {i}:')
    degree = get_best_degree(y_tr_jet, x_tr_jet, gamma, degrees, verbose=True)

Jet = 0:
[Start] Cross validation
[CP] Degree = 8, Accuracy = 0.726
[CP] Degree = 9, Accuracy = 0.722
[End] Cross validation (time:  155.25 s.)
[Results] Best degree: 9
Jet = 1:
[Start] Cross validation
[CP] Degree = 8, Accuracy = 0.668
[CP] Degree = 9, Accuracy = 0.664
[End] Cross validation (time:  122.60 s.)
[Results] Best degree: 9
Jet = 2:
[Start] Cross validation
[CP] Degree = 8, Accuracy = 0.707
[CP] Degree = 9, Accuracy = 0.705
[End] Cross validation (time:  108.40 s.)
[Results] Best degree: 9
Jet = 3:
[Start] Cross validation
[CP] Degree = 8, Accuracy = 0.665
[CP] Degree = 9, Accuracy = 0.664
[End] Cross validation (time:  51.11 s.)
[Results] Best degree: 9
