In [5]:
import numpy as np
import shutil
import os

import sys
sys.path.append("../utils/")
sys.path.append("../calculating_predictions")
sys.path.append("../evaluation/")
from calculate_predictions import calculate_predictions
from evaluate import evaluate
from run_evaluation import *
from argparse import Namespace
from catboost import CatBoostRegressor, CatBoostClassifier

%reload_ext autoreload
%autoreload 2

In [6]:
def get_baseline_metric():
    args = Namespace()
    args.verbose = True
    args.first_feature = 0
    args.last_feature = -1
    args.data_folder = "../../../data/best_features_days_data/"
    args.out_folder = "res"
    args.type = "regression"
    args.need_position_feature = True
    args.labels_to_substruct = None
    args.metric_folder = "metrics"
    args.model_constructor = lambda verbose: CatBoostRegressor(verbose=False)
    args.additional_features = None
    args.add_base_features = True
    args.train_days = [3]
    args.validation_day = None
    args.test_days = [6]
    
    def clear(folder):
        if os.path.exists(folder):
            shutil.rmtree(folder)
        os.mkdir(folder)
    
    clear(args.out_folder)
    clear(args.metric_folder)
    
    calculate_predictions(args)
    evaluate(args.out_folder, args.data_folder, args.metric_folder, argmax_positions)
    
    with open("metrics/metrics.txt") as handler:
        metric = float(next(handler).strip().split()[0])
        return metric

In [7]:
%%time
metrics = [get_baseline_metric() for i in range(15)]

"2018-06-21 21:18:34.388884": preprocesing started
"2018-06-21 21:18:35.172799":     base features shape: (20203, 81)
"2018-06-21 21:18:35.175488":     0 features added
"2018-06-21 21:18:35.175541":     base features not included
"2018-06-21 21:18:35.175634":     result shape: (20203, 81)
"2018-06-21 21:18:35.176694": train features shape: (20203, 81)
"2018-06-21 21:18:35.790138": preprocesing finished
"2018-06-21 21:18:35.790215": start training on days [3]
"2018-06-21 21:18:35.790368": using fit without validation
"2018-06-21 21:18:58.192787": built 1000 trees
"2018-06-21 21:19:00.448281":     base features shape: (247423, 81)
"2018-06-21 21:19:00.513481":     0 features added
"2018-06-21 21:19:00.513567":     base features not included
"2018-06-21 21:19:00.513613":     result shape: (247423, 81)
"2018-06-21 21:19:00.517993": days to predict: [6]
"2018-06-21 21:19:00.518165": features shape: (247423, 81)
"2018-06-21 21:19:00.518238": start predicting on day 6
"2018-06-21 21:19:02.011

"2018-06-21 21:22:14.254833": preprocesing finished
"2018-06-21 21:22:14.254916": start training on days [3]
"2018-06-21 21:22:14.254935": using fit without validation
"2018-06-21 21:22:38.006000": built 1000 trees
"2018-06-21 21:22:40.547003":     base features shape: (247423, 81)
"2018-06-21 21:22:40.625429":     0 features added
"2018-06-21 21:22:40.625507":     base features not included
"2018-06-21 21:22:40.625534":     result shape: (247423, 81)
"2018-06-21 21:22:40.630376": days to predict: [6]
"2018-06-21 21:22:40.630438": features shape: (247423, 81)
"2018-06-21 21:22:40.630556": start predicting on day 6
"2018-06-21 21:22:42.303068": predictions shape: (22493, 11)
"2018-06-21 21:22:42.303136": saveing results to res/train_3_test_6
"2018-06-21 21:22:42.304968": results saved
"2018-06-21 21:22:44.474125": preprocesing started
"2018-06-21 21:22:45.232185":     base features shape: (20203, 81)
"2018-06-21 21:22:45.233368":     0 features added
"2018-06-21 21:22:45.233416":     ba

"2018-06-21 21:26:14.518721": built 1000 trees
"2018-06-21 21:26:16.794976":     base features shape: (247423, 81)
"2018-06-21 21:26:16.861285":     0 features added
"2018-06-21 21:26:16.861372":     base features not included
"2018-06-21 21:26:16.861498":     result shape: (247423, 81)
"2018-06-21 21:26:16.868005": days to predict: [6]
"2018-06-21 21:26:16.868137": features shape: (247423, 81)
"2018-06-21 21:26:16.868198": start predicting on day 6
"2018-06-21 21:26:18.398886": predictions shape: (22493, 11)
"2018-06-21 21:26:18.399012": saveing results to res/train_3_test_6
"2018-06-21 21:26:18.401212": results saved
CPU times: user 37min 8s, sys: 1min 18s, total: 38min 27s
Wall time: 7min 46s


In [8]:
print(metrics)
print(np.mean(metrics))
print(np.std(metrics) / np.sqrt(15))

[0.6151258954509214, 0.6214447105349333, 0.605708517387299, 0.5921477096082469, 0.6395085730852217, 0.6265452837596254, 0.6209911104586073, 0.6239194625397643, 0.5973835119448839, 0.6369614547500089, 0.6202054766702059, 0.6170991908440882, 0.6251285396260383, 0.6055082962994239, 0.6162554566862585]
0.6175955459763685
0.003277448665366997
