In [1]:
import gc
gc.enable()
gc.collect()

35

In [2]:
import sys, os
import pandas as pd
import numpy as np

from sklearn.linear_model import Lasso, LassoCV
from sklearn.feature_selection import RFECV
from sklearn import preprocessing as pp
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV, ShuffleSplit, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, roc_auc_score, r2_score, make_scorer
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances

def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

# some heuristic settings
rfe_min_features = 11
rfe_step = 15
rfe_cv = 20
sss_n_splits = 20
sss_test_size = 0.35
noise_std = 0.01
r2_threshold = 0.185
random_seed = 213
gridsearchCV = 20

np.random.seed(random_seed)

# import data
train = pd.read_csv("/Users/JoonH/dont-overfit-ii/train.csv")
train_y = train['target']
train_X = train.drop(['id','target'], axis=1).values

test = pd.read_csv("/Users/JoonH/dont-overfit-ii/test.csv")
test = test.drop(['id'], axis=1).values

# scale using RobustScaler
# fitting scaler on full data outperforms fitting on test_X only (+0.006 kaggle score)
data = pp.RobustScaler().fit_transform(np.concatenate((train_X, test), axis=0))
train_X = data[:250]
test = data[250:]

# add a bit of noise to train_X to reduce overfitting
train_X += np.random.normal(0, noise_std, train_X.shape)


In [185]:
#possible idea: in place shuffle augmentation
#https://www.kaggle.com/jiweiliu/fast-inplace-shuffle-for-augmentation

In [9]:

# define roc_auc_metric robust to only one class in y_pred
def scoring_roc_auc(y, y_pred):
    try:
        return roc_auc_score(y, y_pred)
    except:
        return 0.5

robust_roc_auc = make_scorer(scoring_roc_auc)


def build_and_train_model(mtype, train_X, train_y):
    if mtype=='lasso':
        model = Lasso(alpha=0.031, tol=0.01, random_state=random_seed, selection='random')
        param_grid = {
                    'alpha' :  [0.019, 0.02, 0.021, 0.022, 0.023, 0.024, 0.025, 0.026, 0.027, 0.029, 0.031],
                    'tol'   :  [0.001, 0.0011, 0.0012, 0.0013, 0.0014, 0.0015, 0.0016, 0.0017],
                    'max_iter':[250, 500, 750, 1000, 1250, 1500, 1750, 2000]
        }
        
        # define recursive elimination feature selector
        feature_selector = RFECV(model, min_features_to_select=rfe_min_features, scoring=robust_roc_auc, step=rfe_step, verbose=0, cv=rfe_cv, n_jobs=-1)
    print("counter | val_mse  |  val_mae  |  val_roc  |  val_cos  |  val_dist  |  val_r2    | feature_count ")
    print("-------------------------------------------------------------------------------------------------")

    predictions = pd.DataFrame()
    counter = 0
    # split training data to build one model on each traing-data-subset

    for train_index, val_index in StratifiedShuffleSplit(n_splits=sss_n_splits, test_size=sss_test_size, random_state=random_seed).split(train_X, train_y):
        

        X, val_X = train_X[train_index], train_X[val_index]
        y, val_y = train_y[train_index], train_y[val_index]
        
        # get the best features for this data set
        feature_selector.fit(X, y)
        # remove irrelevant features from X, val_X and test
        X_important_features        = feature_selector.transform(X)
        val_X_important_features    = feature_selector.transform(val_X)
        test_important_features     = feature_selector.transform(test)

        # run grid search to find the best Lasso parameters for this subset of training data and subset of features 
        #grid_search = GridSearchCV(feature_selector.estimator_, param_grid=param_grid, verbose=0, n_jobs=-1, scoring=robust_roc_auc, cv=gridsearchCV)
        grid_search = RandomizedSearchCV(feature_selector.estimator_, param_distributions=param_grid, verbose=0, n_jobs=-1, scoring=robust_roc_auc, cv=gridsearchCV)#, n_iter = 100)
        
        
        grid_search.fit(X_important_features, y)
        print(grid_search.best_params_ )
        # score our fitted model on validation data
        val_y_pred = grid_search.best_estimator_.predict(val_X_important_features)
        val_mse = mean_squared_error(val_y, val_y_pred)
        val_mae = mean_absolute_error(val_y, val_y_pred)
        val_roc = roc_auc_score(val_y, val_y_pred)
        val_cos = cosine_similarity(val_y.values.reshape(1, -1), val_y_pred.reshape(1, -1))[0][0]
        val_dst = euclidean_distances(val_y.values.reshape(1, -1), val_y_pred.reshape(1, -1))[0][0]
        val_r2  = r2_score(val_y, val_y_pred)

        # if model did well on validation, save its prediction on test data, using only important features
        # r2_threshold (0.185) is a heuristic threshold for r2 error
        # you can use any other metric/metric combination that works for you
        if val_r2 > r2_threshold:
            message = '<-- OK'
            prediction = grid_search.best_estimator_.predict(test_important_features)
            predictions = pd.concat([predictions, pd.DataFrame(prediction)], axis=1)
        else:
            message = '<-- skipping'


        print("{0:2}      | {1:.4f}   |  {2:.4f}   |  {3:.4f}   |  {4:.4f}   |  {5:.4f}    |  {6:.4f}    |  {7:3}         {8}  ".format(counter, val_mse, val_mae, val_roc, val_cos, val_dst, val_r2, feature_selector.n_features_, message))
    
        counter += 1

    print("-------------------------------------------------------------------------------------------------")
    print("{}/{} models passed validation threshold and will be ensembled.".format(len(predictions.columns), sss_n_splits))

    return predictions
predictions = build_and_train_model('lasso',train_X, train_y)
mean_pred = pd.DataFrame(predictions.mean(axis=1))
mean_pred.index += 250
mean_pred.columns = ['target']
mean_pred.to_csv('/Users/JoonH/DO2_lasso_kernel_submission1.csv', index_label='id', index=True)        
#original approach saved without 1 suffix, 0.870 LB score
#mean_pred.to_csv('/Users/JoonH/DO2_lasso_kernel_submission.csv', index_label='id', index=True)        


counter | val_mse  |  val_mae  |  val_roc  |  val_cos  |  val_dist  |  val_r2    | feature_count 
-------------------------------------------------------------------------------------------------
{'tol': 0.0013, 'max_iter': 500, 'alpha': 0.019}
 0      | 0.1970   |  0.3972   |  0.7394   |  0.8325   |  4.1641    |  0.1485    |  120         <-- skipping  
{'tol': 0.0015, 'max_iter': 1500, 'alpha': 0.019}
 1      | 0.1873   |  0.3825   |  0.7651   |  0.8405   |  4.0594    |  0.1908    |  210         <-- OK  
{'tol': 0.0014, 'max_iter': 1250, 'alpha': 0.019}
 2      | 0.1947   |  0.3830   |  0.7550   |  0.8364   |  4.1395    |  0.1585    |   11         <-- skipping  
{'tol': 0.0015, 'max_iter': 1500, 'alpha': 0.019}
 3      | 0.1917   |  0.3866   |  0.7695   |  0.8371   |  4.1074    |  0.1715    |   15         <-- skipping  
{'tol': 0.0017, 'max_iter': 250, 'alpha': 0.019}
 4      | 0.1965   |  0.3611   |  0.7561   |  0.8372   |  4.1588    |  0.1507    |   30         <-- skipping  
{'tol':

In [5]:

# define roc_auc_metric robust to only one class in y_pred
def scoring_roc_auc(y, y_pred):
    try:
        return roc_auc_score(y, y_pred)
    except:
        return 0.5

robust_roc_auc = make_scorer(scoring_roc_auc)


def build_and_train_model(mtype, train_X, train_y):
    if mtype=='lasso':
        model = Lasso(alpha=0.031, tol=0.01, random_state=random_seed, selection='random')
        param_grid = {
                    'alpha' :  [0.019, 0.02, 0.021, 0.022, 0.023, 0.024, 0.025, 0.026, 0.027, 0.029, 0.031],
                    'tol'   :  [0.001, 0.0011, 0.0012, 0.0013, 0.0014, 0.0015, 0.0016, 0.0017],
                    'max_iter':[250, 500, 750, 1000, 1250, 1500, 1750, 2000]
        }
        
        print("counter | val_mse  |  val_mae  |  val_roc  |  val_cos  |  val_dist  |  val_r2    | feature_count ")
    print("-------------------------------------------------------------------------------------------------")

    predictions = pd.DataFrame()
    counter = 0
    # split training data to build one model on each traing-data-subset

    for train_index, val_index in StratifiedShuffleSplit(n_splits=sss_n_splits, test_size=sss_test_size, random_state=random_seed).split(train_X, train_y):
        
    
        X, val_X = train_X[train_index], train_X[val_index]
        y, val_y = train_y[train_index], train_y[val_index]
        # define recursive elimination feature selector
        grid_search = GridSearchCV(model, param_grid=param_grid, verbose=0, n_jobs=-1, scoring=robust_roc_auc, cv=gridsearchCV)
        grid_search.fit(X, y)
        feature_selector = RFECV(grid_search.best_estimator_, min_features_to_select=rfe_min_features, scoring=robust_roc_auc, step=rfe_step, verbose=0, cv=rfe_cv, n_jobs=-1)
        
        # get the best features for this data set
        feature_selector.fit(X, y)
        # remove irrelevant features from X, val_X and test
        X_important_features        = feature_selector.transform(X)
        val_X_important_features    = feature_selector.transform(val_X)
        test_important_features     = feature_selector.transform(test)

        # run grid search to find the best Lasso parameters for this subset of training data and subset of features 
        #grid_search = GridSearchCV(feature_selector.estimator_, param_grid=param_grid, verbose=0, n_jobs=-1, scoring=robust_roc_auc, cv=gridsearchCV)
        grid_search = RandomizedSearchCV(feature_selector.estimator_, param_distributions=param_grid, verbose=0, n_jobs=-1, scoring=robust_roc_auc, cv=gridsearchCV)
        
        grid_search.fit(X_important_features, y)
        print(grid_search.best_params_ )
        # score our fitted model on validation data
        val_y_pred = grid_search.best_estimator_.predict(val_X_important_features)
        val_mse = mean_squared_error(val_y, val_y_pred)
        val_mae = mean_absolute_error(val_y, val_y_pred)
        val_roc = roc_auc_score(val_y, val_y_pred)
        val_cos = cosine_similarity(val_y.values.reshape(1, -1), val_y_pred.reshape(1, -1))[0][0]
        val_dst = euclidean_distances(val_y.values.reshape(1, -1), val_y_pred.reshape(1, -1))[0][0]
        val_r2  = r2_score(val_y, val_y_pred)

        # if model did well on validation, save its prediction on test data, using only important features
        # r2_threshold (0.185) is a heuristic threshold for r2 error
        # you can use any other metric/metric combination that works for you
        if val_r2 > r2_threshold:
            message = '<-- OK'
            prediction = grid_search.best_estimator_.predict(test_important_features)
            predictions = pd.concat([predictions, pd.DataFrame(prediction)], axis=1)
        else:
            message = '<-- skipping'


        print("{0:2}      | {1:.4f}   |  {2:.4f}   |  {3:.4f}   |  {4:.4f}   |  {5:.4f}    |  {6:.4f}    |  {7:3}         {8}  ".format(counter, val_mse, val_mae, val_roc, val_cos, val_dst, val_r2, feature_selector.n_features_, message))
    
        counter += 1

    print("-------------------------------------------------------------------------------------------------")
    print("{}/{} models passed validation threshold and will be ensembled.".format(len(predictions.columns), sss_n_splits))

    return predictions
predictions = build_and_train_model('lasso',train_X, train_y)
mean_pred = pd.DataFrame(predictions.mean(axis=1))
mean_pred.index += 250
mean_pred.columns = ['target']
mean_pred.to_csv('/Users/JoonH/DO2_lasso_kernel_submission1.csv', index_label='id', index=True)        

counter | val_mse  |  val_mae  |  val_roc  |  val_cos  |  val_dist  |  val_r2    | feature_count 
-------------------------------------------------------------------------------------------------
{'tol': 0.0017, 'max_iter': 1750, 'alpha': 0.019}
 0      | 0.2033   |  0.4033   |  0.7271   |  0.8272   |  4.2301    |  0.1213    |  105         <-- skipping  
{'tol': 0.0013, 'max_iter': 1000, 'alpha': 0.019}
 1      | 0.1869   |  0.3818   |  0.7640   |  0.8407   |  4.0559    |  0.1922    |   45         <-- OK  
{'tol': 0.0016, 'max_iter': 1750, 'alpha': 0.019}
 2      | 0.1975   |  0.3755   |  0.7545   |  0.8363   |  4.1688    |  0.1466    |   11         <-- skipping  
{'tol': 0.0013, 'max_iter': 500, 'alpha': 0.019}
 3      | 0.1684   |  0.3627   |  0.8281   |  0.8582   |  3.8498    |  0.2722    |   45         <-- OK  
{'tol': 0.0014, 'max_iter': 750, 'alpha': 0.019}
 4      | 0.1965   |  0.3611   |  0.7561   |  0.8372   |  4.1588    |  0.1507    |   30         <-- skipping  
{'tol': 0.001

In [80]:
#bootstrapping?
mean_pred.head()

Unnamed: 0,target
250,0.755161
251,0.548979
252,0.613484
253,0.862792
254,0.557579


In [164]:
#Bootstrapping
predictions = mean_pred.sort_values(['target'], ascending = False)
top = predictions[:5]
predictions = mean_pred.sort_values(['target'], ascending = True)
btm = predictions[:5]

In [165]:
bootstrap = pd.concat([top, btm],axis = 0)

In [166]:
test_df = pd.read_csv("/Users/JoonH/dont-overfit-ii/test.csv", index_col = 'id')
test_df.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
250,0.5,-1.033,-1.595,0.309,-0.714,0.502,0.535,-0.129,-0.687,1.291,...,-0.088,-2.628,-0.845,2.078,-0.277,2.132,0.609,-0.104,0.312,0.979
251,0.776,0.914,-0.494,1.347,-0.867,0.48,0.578,-0.313,0.203,1.356,...,-0.683,-0.066,0.025,0.606,-0.353,-1.133,-3.138,0.281,-0.625,-0.761
252,1.75,0.509,-0.057,0.835,-0.476,1.428,-0.701,-2.009,-1.378,0.167,...,-0.094,0.351,-0.607,-0.737,-0.031,0.701,0.976,0.135,-1.327,2.463
253,-0.556,-1.855,-0.682,0.578,1.592,0.512,-1.419,0.722,0.511,0.567,...,-0.336,-0.787,0.255,-0.031,-0.836,0.916,2.411,1.053,-1.601,-1.529
254,0.754,-0.245,1.173,-1.623,0.009,0.37,0.781,-1.763,-1.432,-0.93,...,2.184,-1.09,0.216,1.186,-0.143,0.322,-0.068,-0.156,-1.153,0.825


In [167]:
bootstrap_features = test_df.iloc[list(bootstrap.index - 250)]
bootstrap_features.head()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
19411,0.31,1.635,0.194,-1.526,0.943,0.003,-1.393,0.332,1.232,1.226,...,-0.266,0.202,-0.163,1.396,0.048,-1.877,-0.569,-0.285,-0.61,-0.042
18785,-0.557,-0.07,-1.466,1.205,-0.914,0.097,0.609,0.973,-1.483,0.849,...,1.002,-0.019,0.485,-0.723,0.923,1.825,0.673,1.927,-0.6,1.618
13610,0.941,-1.356,0.357,-0.075,-1.032,-0.096,-0.021,-0.123,-0.946,-0.032,...,2.809,0.227,1.232,-0.371,-1.706,-0.864,0.219,0.718,-0.906,-0.08
15265,0.308,1.972,0.921,-1.025,-0.124,-0.842,-0.857,-0.452,0.572,0.594,...,1.262,-0.093,0.007,0.142,0.891,0.32,-0.084,0.861,0.02,0.413
6192,-1.484,0.627,0.695,-1.296,-0.487,-0.412,-1.111,1.4,-1.533,1.364,...,-1.009,-1.045,-0.136,0.253,1.415,-0.121,0.476,-0.885,0.576,1.098


In [168]:
bootstrap['id'] = bootstrap.index
bootstrap = bootstrap.set_index(['id'])

In [169]:
bootstrap_data = pd.concat([bootstrap,bootstrap_features],axis=1)

In [170]:
def special_round(n):
    if (n > 0.5):
        return 1
    else:
        return 0
    
bootstrap_data['target'] = [special_round(x) for x in bootstrap['target']]
bootstrap_data.head()

Unnamed: 0_level_0,target,0,1,2,3,4,5,6,7,8,...,290,291,292,293,294,295,296,297,298,299
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
19411,1,0.31,1.635,0.194,-1.526,0.943,0.003,-1.393,0.332,1.232,...,-0.266,0.202,-0.163,1.396,0.048,-1.877,-0.569,-0.285,-0.61,-0.042
18785,1,-0.557,-0.07,-1.466,1.205,-0.914,0.097,0.609,0.973,-1.483,...,1.002,-0.019,0.485,-0.723,0.923,1.825,0.673,1.927,-0.6,1.618
13610,1,0.941,-1.356,0.357,-0.075,-1.032,-0.096,-0.021,-0.123,-0.946,...,2.809,0.227,1.232,-0.371,-1.706,-0.864,0.219,0.718,-0.906,-0.08
15265,1,0.308,1.972,0.921,-1.025,-0.124,-0.842,-0.857,-0.452,0.572,...,1.262,-0.093,0.007,0.142,0.891,0.32,-0.084,0.861,0.02,0.413
6192,1,-1.484,0.627,0.695,-1.296,-0.487,-0.412,-1.111,1.4,-1.533,...,-1.009,-1.045,-0.136,0.253,1.415,-0.121,0.476,-0.885,0.576,1.098


In [171]:
bootstrap_data = bootstrap_data.reset_index(drop = True)
bootstrap_data.head()

Unnamed: 0,target,0,1,2,3,4,5,6,7,8,...,290,291,292,293,294,295,296,297,298,299
0,1,0.31,1.635,0.194,-1.526,0.943,0.003,-1.393,0.332,1.232,...,-0.266,0.202,-0.163,1.396,0.048,-1.877,-0.569,-0.285,-0.61,-0.042
1,1,-0.557,-0.07,-1.466,1.205,-0.914,0.097,0.609,0.973,-1.483,...,1.002,-0.019,0.485,-0.723,0.923,1.825,0.673,1.927,-0.6,1.618
2,1,0.941,-1.356,0.357,-0.075,-1.032,-0.096,-0.021,-0.123,-0.946,...,2.809,0.227,1.232,-0.371,-1.706,-0.864,0.219,0.718,-0.906,-0.08
3,1,0.308,1.972,0.921,-1.025,-0.124,-0.842,-0.857,-0.452,0.572,...,1.262,-0.093,0.007,0.142,0.891,0.32,-0.084,0.861,0.02,0.413
4,1,-1.484,0.627,0.695,-1.296,-0.487,-0.412,-1.111,1.4,-1.533,...,-1.009,-1.045,-0.136,0.253,1.415,-0.121,0.476,-0.885,0.576,1.098


In [172]:
comb_df = pd.concat([bootstrap_data,train.drop(['id'],axis=1)],axis=0).reset_index(drop = True)
comb_df.head()

Unnamed: 0,target,0,1,2,3,4,5,6,7,8,...,290,291,292,293,294,295,296,297,298,299
0,1.0,0.31,1.635,0.194,-1.526,0.943,0.003,-1.393,0.332,1.232,...,-0.266,0.202,-0.163,1.396,0.048,-1.877,-0.569,-0.285,-0.61,-0.042
1,1.0,-0.557,-0.07,-1.466,1.205,-0.914,0.097,0.609,0.973,-1.483,...,1.002,-0.019,0.485,-0.723,0.923,1.825,0.673,1.927,-0.6,1.618
2,1.0,0.941,-1.356,0.357,-0.075,-1.032,-0.096,-0.021,-0.123,-0.946,...,2.809,0.227,1.232,-0.371,-1.706,-0.864,0.219,0.718,-0.906,-0.08
3,1.0,0.308,1.972,0.921,-1.025,-0.124,-0.842,-0.857,-0.452,0.572,...,1.262,-0.093,0.007,0.142,0.891,0.32,-0.084,0.861,0.02,0.413
4,1.0,-1.484,0.627,0.695,-1.296,-0.487,-0.412,-1.111,1.4,-1.533,...,-1.009,-1.045,-0.136,0.253,1.415,-0.121,0.476,-0.885,0.576,1.098


In [173]:
x_train = comb_df.drop(['target'],axis=1)
y_train = comb_df['target']
x_test = test_df.reset_index(drop = True)

In [174]:
x_train = np.array(x_train)
x_test = np.array(x_test)

In [175]:
data_boot = pp.RobustScaler().fit_transform(np.concatenate((x_train, x_test), axis=0))
x_train = data_boot[:260]
x_test = data_boot[260:]



In [178]:
# add a bit of noise to train_X to reduce overfitting
x_train += np.random.normal(0, noise_std, x_train.shape)

In [179]:
r2_threshold = 0.185
predictions = build_and_train_model('lasso',x_train, y_train)
mean_pred = pd.DataFrame(predictions.mean(axis=1))
mean_pred.index += 250
mean_pred.columns = ['target']
mean_pred.to_csv('/Users/JoonH/DO2_lasso_kernel_submission1_bootstrap.csv', index_label='id', index=True)

counter | val_mse  |  val_mae  |  val_roc  |  val_cos  |  val_dist  |  val_r2    | feature_count 
-------------------------------------------------------------------------------------------------
 0      | 0.1817   |  0.3825   |  0.7591   |  0.8455   |  4.0667    |  0.2137    |  285         <-- OK  
 1      | 0.1721   |  0.3737   |  0.8312   |  0.8549   |  3.9573    |  0.2554    |   45         <-- OK  
 2      | 0.2058   |  0.3816   |  0.7315   |  0.8268   |  4.3271    |  0.1098    |   11         <-- skipping  
 3      | 0.1833   |  0.3655   |  0.7978   |  0.8442   |  4.0838    |  0.2071    |   60         <-- OK  
 4      | 0.2285   |  0.4136   |  0.6411   |  0.8029   |  4.5597    |  0.0115    |   15         <-- skipping  
 5      | 0.1714   |  0.3721   |  0.8140   |  0.8554   |  3.9498    |  0.2583    |  240         <-- OK  
 6      | 0.2096   |  0.3917   |  0.7226   |  0.8193   |  4.3669    |  0.0933    |   30         <-- skipping  
 7      | 0.2093   |  0.3999   |  0.7074   |  0.821