In [1]:
import numpy as np
import shutil
import os

import sys
sys.path.append("../utils/")
sys.path.append("../calculating_predictions")
sys.path.append("../evaluation/")
from calculate_predictions import calculate_predictions
from evaluate import evaluate
from run_evaluation import *
from argparse import Namespace
from catboost import CatBoostRegressor, CatBoostClassifier

%reload_ext autoreload
%autoreload 2

In [2]:
def get_baseline_metric(need_validation):
    args = Namespace()
    args.verbose = False
    args.first_feature = 0
    args.last_feature = -1
    args.data_folder = "../../../data/best_features_days_data/"
    args.out_folder = "res"
    args.type = "regression"
    args.metric_folder = "metrics"
    args.model_constructor=lambda verbose: CatBoostRegressor(verbose=verbose)
    args.additional_features = None
    args.train_days = [0, 1, 2]
    if need_validation:
        args.validation_day = 3
    else:
        args.validation_day = None
    args.test_day = 6
    
    def clear(folder):
        if os.path.exists(folder):
            shutil.rmtree(folder)
        os.mkdir(folder)
    
    clear(args.out_folder)
    clear(args.metric_folder)
    
    calculate_predictions(args)
    evaluate(args.out_folder, args.data_folder, args.metric_folder, argmax_positions)
    
    with open("metrics/metrics.txt") as handler:
        metric = float(next(handler).strip().split()[0])
        return metric

In [5]:
print(no_val_metrics)
print(np.mean(no_val_metrics))
print(np.std(no_val_metrics))

[0.622552224779459, 0.6084019246234298, 0.6001719741512457, 0.6035806440807945, 0.6410533974448633, 0.635420558829577, 0.6387686172565953, 0.6080499032464564, 0.6055790911555483, 0.6386901600445535, 0.6724322107183247, 0.619606495315432, 0.6275450725643599, 0.6082089581695699, 0.6151517057685155]
0.6230141958765816
0.018920250879526538


In [6]:
print(val_metrics)
print(np.mean(val_metrics))
print(np.std(val_metrics))

[0.6361418544809591, 0.6109498263235139, 0.5756743203725229, 0.6606114715258377, 0.6322742158854051, 0.6070590707340403, 0.629777940796023, 0.6299392332497199, 0.5950429452486984, 0.6074039085046664, 0.610024716177584, 0.6446146106102801, 0.6186540187640717, 0.6250432260994494, 0.6128655233957128]
0.6197384588112324
0.02005503270737271


In [3]:
%%time
no_val_metrics = [get_baseline_metric(False) for i in range(15)]

"2018-06-18 01:08:29.839216": preprocesing started
"2018-06-18 01:08:39.337453": train features shape: (67567, 80)
"2018-06-18 01:08:39.337559": preprocesing finished
"2018-06-18 01:08:39.337660": start training on days [0, 1, 2]
"2018-06-18 01:08:39.337676": using fit without validation
"2018-06-18 01:09:02.477044": built 1000 trees
"2018-06-18 01:09:02.477153": start predicting on day 6
"2018-06-18 01:09:18.271295": saveing results
"2018-06-18 01:09:18.279102": results saved
"2018-06-18 01:09:18.282971": predictions_filenames: ['res/train_0_1_2_test_6.npy']
"2018-06-18 01:09:18.283016": will evaluate days: [6]
"2018-06-18 01:09:18.283149": predict on file "res/train_0_1_2_test_6.npy"
"2018-06-18 01:09:20.238268": calculating metric
"2018-06-18 01:09:20.239491": preprocesing started
"2018-06-18 01:09:29.180509": train features shape: (67567, 80)
"2018-06-18 01:09:29.180599": preprocesing finished
"2018-06-18 01:09:29.180712": start training on days [0, 1, 2]
"2018-06-18 01:09:29.18072

"2018-06-18 01:18:10.761389": train features shape: (67567, 80)
"2018-06-18 01:18:10.761494": preprocesing finished
"2018-06-18 01:18:10.761663": start training on days [0, 1, 2]
"2018-06-18 01:18:10.761678": using fit without validation
"2018-06-18 01:18:33.675940": built 1000 trees
"2018-06-18 01:18:33.676015": start predicting on day 6
"2018-06-18 01:18:48.619415": saveing results
"2018-06-18 01:18:48.627822": results saved
"2018-06-18 01:18:48.631695": predictions_filenames: ['res/train_0_1_2_test_6.npy']
"2018-06-18 01:18:48.631742": will evaluate days: [6]
"2018-06-18 01:18:48.631895": predict on file "res/train_0_1_2_test_6.npy"
"2018-06-18 01:18:50.624009": calculating metric
"2018-06-18 01:18:50.625427": preprocesing started
"2018-06-18 01:18:59.840004": train features shape: (67567, 80)
"2018-06-18 01:18:59.840098": preprocesing finished
"2018-06-18 01:18:59.840215": start training on days [0, 1, 2]
"2018-06-18 01:18:59.840231": using fit without validation
"2018-06-18 01:19:

In [4]:
%%time
val_metrics = [get_baseline_metric(True) for i in range(15)]

"2018-06-18 01:21:17.458705": preprocesing started
"2018-06-18 01:21:26.793120": train features shape: (67567, 80)
"2018-06-18 01:21:26.906248": preprocesing finished
"2018-06-18 01:21:26.906408": start training on days [0, 1, 2]
"2018-06-18 01:21:26.906427": using fit with validation
"2018-06-18 01:22:12.383705": built 1371 trees
"2018-06-18 01:22:12.383794": start predicting on day 6
"2018-06-18 01:22:27.375829": saveing results
"2018-06-18 01:22:27.383983": results saved
"2018-06-18 01:22:27.388465": predictions_filenames: ['res/train_0_1_2_test_6.npy']
"2018-06-18 01:22:27.388521": will evaluate days: [6]
"2018-06-18 01:22:27.388686": predict on file "res/train_0_1_2_test_6.npy"
"2018-06-18 01:22:29.346735": calculating metric
"2018-06-18 01:22:29.347693": preprocesing started
"2018-06-18 01:22:38.554234": train features shape: (67567, 80)
"2018-06-18 01:22:38.668936": preprocesing finished
"2018-06-18 01:22:38.668998": start training on days [0, 1, 2]
"2018-06-18 01:22:38.669053":

"2018-06-18 01:34:40.689764": train features shape: (67567, 80)
"2018-06-18 01:34:40.801205": preprocesing finished
"2018-06-18 01:34:40.801316": start training on days [0, 1, 2]
"2018-06-18 01:34:40.801341": using fit with validation
"2018-06-18 01:35:26.396704": built 1475 trees
"2018-06-18 01:35:26.396776": start predicting on day 6
"2018-06-18 01:35:42.186739": saveing results
"2018-06-18 01:35:42.195478": results saved
"2018-06-18 01:35:42.200174": predictions_filenames: ['res/train_0_1_2_test_6.npy']
"2018-06-18 01:35:42.200228": will evaluate days: [6]
"2018-06-18 01:35:42.200311": predict on file "res/train_0_1_2_test_6.npy"
"2018-06-18 01:35:44.165225": calculating metric
"2018-06-18 01:35:44.166482": preprocesing started
"2018-06-18 01:35:53.281604": train features shape: (67567, 80)
"2018-06-18 01:35:53.389248": preprocesing finished
"2018-06-18 01:35:53.389399": start training on days [0, 1, 2]
"2018-06-18 01:35:53.389416": using fit with validation
"2018-06-18 01:36:39.626