In [1]:
import numpy as np
import shutil
import os

import sys
sys.path.append("../utils/")
sys.path.append("../calculating_predictions")
sys.path.append("../evaluation/")
from calculate_predictions import calculate_predictions
from evaluate import evaluate
from run_evaluation import *
from argparse import Namespace
from catboost import CatBoostRegressor, CatBoostClassifier

%reload_ext autoreload
%autoreload 2

In [2]:
def clear(folder):
    if os.path.exists(folder):
        shutil.rmtree(folder)
    os.mkdir(folder)

def get_regression_stacking_metric(i):
    args = Namespace()
    args.verbose = True
    args.first_feature = 0
    args.last_feature = -1
    args.data_folder = "../../../data/best_features_days_data/"
    out_folder = "regression_predictions"
    args.type = "regression"
    args.need_position_feature = True
    args.labels_to_substruct = None
    args.metric_folder = "metrics"
    args.model_constructor = lambda verbose: CatBoostRegressor(verbose=False)
    args.additional_features = {
        day: [
            os.path.join("features_models_with_pos", str(i), filename)
            for filename in os.listdir(os.path.join("features_models_with_pos", str(i)))
        ]
        for day in [3, 6]
    }
    args.validation_day = None
    args.train_days = [3]
    args.test_days = [6]
    
    args.out_folder = os.path.join(out_folder, str(i))

    clear(args.out_folder)
    clear(args.metric_folder)
    
    calculate_predictions(args)
    evaluate(args.out_folder, args.data_folder, args.metric_folder, argmax_positions)
    
    with open("metrics/metrics.txt") as handler:
        metric = float(next(handler).strip().split()[0])
        return metric

In [3]:
%%time
metrics = [get_regression_stacking_metric(i) for i in range(15)]

"2018-06-21 12:48:14.764440": preprocesing started
"2018-06-21 12:48:15.523908":     base features shape: (20203, 80)
"2018-06-21 12:48:16.569804":     28 features added
"2018-06-21 12:48:16.569911":     result shape: (20203, 108)
"2018-06-21 12:48:16.573656": train features shape: (20203, 108)
"2018-06-21 12:48:17.153490": preprocesing finished
"2018-06-21 12:48:17.153576": start training on days [3]
"2018-06-21 12:48:17.153690": using fit without validation
"2018-06-21 12:48:39.485175": built 1000 trees
"2018-06-21 12:48:41.513654":     base features shape: (247423, 80)
"2018-06-21 12:48:46.465870":     28 features added
"2018-06-21 12:48:46.465957":     result shape: (247423, 108)
"2018-06-21 12:48:46.466941": days to predict: [6]
"2018-06-21 12:48:46.466976": features shape: (247423, 108)
"2018-06-21 12:48:46.467244": start predicting on day 6
"2018-06-21 12:48:48.255299": predictions shape: (22493, 11)
"2018-06-21 12:48:48.255488": saveing results to regression_predictions/0/train

"2018-06-21 12:55:23.600901": preprocesing started
"2018-06-21 12:55:24.930192":     base features shape: (20203, 80)
"2018-06-21 12:55:25.720880":     28 features added
"2018-06-21 12:55:25.721062":     result shape: (20203, 108)
"2018-06-21 12:55:25.724989": train features shape: (20203, 108)
"2018-06-21 12:55:26.817452": preprocesing finished
"2018-06-21 12:55:26.817574": start training on days [3]
"2018-06-21 12:55:26.819856": using fit without validation
"2018-06-21 12:56:37.704065": built 1000 trees
"2018-06-21 12:56:41.274871":     base features shape: (247423, 80)
"2018-06-21 12:56:48.130634":     28 features added
"2018-06-21 12:56:48.130759":     result shape: (247423, 108)
"2018-06-21 12:56:48.139279": days to predict: [6]
"2018-06-21 12:56:48.139452": features shape: (247423, 108)
"2018-06-21 12:56:48.139491": start predicting on day 6
"2018-06-21 12:56:51.807298": predictions shape: (22493, 11)
"2018-06-21 12:56:51.807682": saveing results to regression_predictions/8/train

In [4]:
np.mean(metrics), np.std(metrics) / np.sqrt(15)

(0.6252553925171769, 0.004246191562662895)

In [5]:
metrics

[0.5987401087500452,
 0.6000036806370098,
 0.6203009932308671,
 0.628834717659364,
 0.6198430810073711,
 0.6344424637022905,
 0.6151145886251218,
 0.6283846820140015,
 0.65703237714631,
 0.6199507936402507,
 0.6540612834895604,
 0.6070102348604651,
 0.623445917715271,
 0.6323140458465449,
 0.6393519194331805]