In [1]:
import numpy as np
import shutil
import os

from argparse import Namespace
from catboost import CatBoostRegressor, CatBoostClassifier
from sklearn.svm import LinearSVR
from sklearn.linear_model import Lars, ElasticNet, Perceptron, SGDRegressor
from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor

import sys
sys.path.append("../utils/")
sys.path.append("../calculating_predictions")
sys.path.append("../evaluation/")
from calculate_predictions import calculate_predictions
from evaluate import evaluate
from run_evaluation import *

FEATURES_NUMBER = 79

%reload_ext autoreload
%autoreload 2

In [2]:
def get_stacking_metric(i):
    args = Namespace()
    args.verbose = False
    args.first_feature = 0
    args.last_feature = -1
    args.data_folder = "../../../data/best_features_days_data/"
    args.out_folder = "substructed_target_prediction_with_stacked_features"
    args.model_constructor=lambda verbose: CatBoostRegressor(verbose=verbose)
    args.type = "regression"
    args.metric_folder = "metrics"
    args.labels_to_substruct = {
        day: os.path.join("targets_to_substruct", str(i), "train_2_test_{}.npy".format(day))
        for day in [3, 6]
    }
    args.additional_features = {
        day: [
            os.path.join("res", folder, "train_1_test_{}.npy".format(day))
            for folder in os.listdir("res")
            if folder.startswith(str(i))
        ] for day in [3, 6]
    }
    args.train_days = [3]
    args.validation_day = None
    args.test_days = [6]

    def clear(folder):
        if os.path.exists(folder):
            shutil.rmtree(folder)
        os.mkdir(folder)

    args.out_folder = os.path.join(args.out_folder, str(i))
    os.mkdir(args.out_folder)
    clear(args.metric_folder)    

    calculate_predictions(args)
    evaluate(args.out_folder, args.data_folder, args.metric_folder, argmax_positions)

    with open("metrics/metrics.txt") as handler:
        metric = float(next(handler).strip().split()[0])
        return metric

In [3]:
metrics = [get_stacking_metric(i) for i in range(16)]

"2018-06-19 01:14:11.132176": preprocesing started
"2018-06-19 01:14:12.999719": train features shape: (20203, 108)
"2018-06-19 01:14:13.601741": preprocesing finished
"2018-06-19 01:14:13.601831": start training on days [3]
"2018-06-19 01:14:13.601867": using fit without validation
"2018-06-19 01:14:35.874789": built 1000 trees
"2018-06-19 01:14:35.874900": start predicting on day 6
"2018-06-19 01:14:52.177352": saveing results
"2018-06-19 01:14:52.185749": results saved
"2018-06-19 01:14:52.189041": predictions_filenames: ['substructed_target_prediction_with_stacked_features/0/train_3_test_6.npy']
"2018-06-19 01:14:52.189113": will evaluate days: [6]
"2018-06-19 01:14:52.189311": predict on file "substructed_target_prediction_with_stacked_features/0/train_3_test_6.npy"
"2018-06-19 01:14:54.168673": calculating metric
"2018-06-19 01:14:54.171007": preprocesing started
"2018-06-19 01:14:57.661379": train features shape: (20203, 276)
"2018-06-19 01:14:58.262982": preprocesing finished
"

"2018-06-19 01:21:47.230162": calculating metric
"2018-06-19 01:21:47.231988": preprocesing started
"2018-06-19 01:21:49.092196": train features shape: (20203, 108)
"2018-06-19 01:21:49.674397": preprocesing finished
"2018-06-19 01:21:49.674498": start training on days [3]
"2018-06-19 01:21:49.674517": using fit without validation
"2018-06-19 01:22:12.444362": built 1000 trees
"2018-06-19 01:22:12.444493": start predicting on day 6
"2018-06-19 01:22:28.313055": saveing results
"2018-06-19 01:22:28.321437": results saved
"2018-06-19 01:22:28.324485": predictions_filenames: ['substructed_target_prediction_with_stacked_features/10/train_3_test_6.npy']
"2018-06-19 01:22:28.324532": will evaluate days: [6]
"2018-06-19 01:22:28.324720": predict on file "substructed_target_prediction_with_stacked_features/10/train_3_test_6.npy"
"2018-06-19 01:22:30.271380": calculating metric
"2018-06-19 01:22:30.273047": preprocesing started
"2018-06-19 01:22:32.106285": train features shape: (20203, 108)
"2

In [8]:
np.mean(metrics), np.std(metrics) / np.sqrt(16)

(0.6248993667037508, 0.004702781000031007)

In [9]:
x = np.load("substructed_target_prediction_with_stacked_features/0/train_3_test_6.npy")

In [10]:
x

array([[ 0.07490666,  0.0757687 ,  0.07574219, ...,  0.07006508,
         0.07049821,  0.06911759],
       [ 0.04848479,  0.04934684,  0.05076668, ...,  0.04966179,
         0.05843498,  0.05940922],
       [ 0.02200016,  0.0228622 ,  0.02283569, ...,  0.02343735,
         0.02343373,  0.02462495],
       ...,
       [-0.01906072, -0.01819867, -0.0157887 , ..., -0.01736168,
        -0.01549999, -0.01569779],
       [ 0.04103111,  0.03899885,  0.04076628, ...,  0.04596376,
         0.04584027,  0.04445965],
       [-0.02985216, -0.02899011, -0.02901663, ..., -0.01336781,
        -0.01187144, -0.01068021]])