In [1]:
import numpy as np
import shutil
import os

from argparse import Namespace
from catboost import CatBoostRegressor, CatBoostClassifier
from sklearn.svm import LinearSVR
from sklearn.linear_model import Lars, ElasticNet, Perceptron, SGDRegressor
from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor

import sys
sys.path.append("../utils/")
sys.path.append("../calculating_predictions")
sys.path.append("../evaluation/")
from calculate_predictions import calculate_predictions
from evaluate import evaluate
from run_evaluation import *

FEATURES_NUMBER = 79

%reload_ext autoreload
%autoreload 2

In [2]:
def get_stacking_metric(i):
    args = Namespace()
    args.verbose = False
    args.first_feature = 0
    args.last_feature = -1
    args.data_folder = "../../../data/best_features_days_data/"
    args.out_folder = "substructed_target_predictions"
    args.model_constructor=lambda verbose: CatBoostRegressor(verbose=verbose)
    args.type = "regression"
    args.metric_folder = "metrics"
    args.labels_to_substruct = {
        day: os.path.join("targets_to_substruct", str(i), "train_2_test_{}.npy".format(day))
        for day in [3, 6]
    }
    args.additional_features = None
    args.train_days = [3]
    args.validation_day = None
    args.test_days = [6]

    def clear(folder):
        if os.path.exists(folder):
            shutil.rmtree(folder)
        os.mkdir(folder)

    args.out_folder = os.path.join(args.out_folder, str(i))
    os.mkdir(args.out_folder)
    clear(args.metric_folder)    

    calculate_predictions(args)
    evaluate(args.out_folder, args.data_folder, args.metric_folder, argmax_positions)

    with open("metrics/metrics.txt") as handler:
        metric = float(next(handler).strip().split()[0])
        return metric

In [3]:
metrics = [get_stacking_metric(i) for i in range(16)]

"2018-06-19 01:37:11.049638": preprocesing started
"2018-06-19 01:37:12.631023": train features shape: (20203, 80)
"2018-06-19 01:37:13.194581": preprocesing finished
"2018-06-19 01:37:13.194667": start training on days [3]
"2018-06-19 01:37:13.194777": using fit without validation
"2018-06-19 01:37:31.811154": built 1000 trees
"2018-06-19 01:37:31.811273": start predicting on day 6
"2018-06-19 01:37:48.250629": saveing results
"2018-06-19 01:37:48.258484": results saved
"2018-06-19 01:37:48.261864": predictions_filenames: ['substructed_target_predictions/0/train_3_test_6.npy']
"2018-06-19 01:37:48.261955": will evaluate days: [6]
"2018-06-19 01:37:48.262136": predict on file "substructed_target_predictions/0/train_3_test_6.npy"
"2018-06-19 01:37:50.197651": calculating metric
"2018-06-19 01:37:50.198444": preprocesing started
"2018-06-19 01:37:51.678964": train features shape: (20203, 80)
"2018-06-19 01:37:52.263198": preprocesing finished
"2018-06-19 01:37:52.263284": start training 

"2018-06-19 01:44:10.104701": saveing results
"2018-06-19 01:44:10.116820": results saved
"2018-06-19 01:44:10.121116": predictions_filenames: ['substructed_target_predictions/10/train_3_test_6.npy']
"2018-06-19 01:44:10.121216": will evaluate days: [6]
"2018-06-19 01:44:10.121372": predict on file "substructed_target_predictions/10/train_3_test_6.npy"
"2018-06-19 01:44:12.076258": calculating metric
"2018-06-19 01:44:12.077620": preprocesing started
"2018-06-19 01:44:13.671680": train features shape: (20203, 80)
"2018-06-19 01:44:14.275214": preprocesing finished
"2018-06-19 01:44:14.275313": start training on days [3]
"2018-06-19 01:44:14.275408": using fit without validation
"2018-06-19 01:44:32.899632": built 1000 trees
"2018-06-19 01:44:32.899749": start predicting on day 6
"2018-06-19 01:44:49.365819": saveing results
"2018-06-19 01:44:49.374695": results saved
"2018-06-19 01:44:49.378056": predictions_filenames: ['substructed_target_predictions/11/train_3_test_6.npy']
"2018-06-1

In [5]:
np.mean(metrics), np.std(metrics)

(0.6194472682461306, 0.016315691282294053)

In [6]:
metrics

[0.6448146841164871,
 0.61918952874568,
 0.6020240154954177,
 0.6323119017501837,
 0.59994079899329,
 0.6209904934567039,
 0.6210794920624425,
 0.6439845693803984,
 0.6136699681832587,
 0.6261769058298543,
 0.5924704162011646,
 0.6124579918774751,
 0.6202758237509662,
 0.5988583604868071,
 0.6486414459601554,
 0.6142698956478049]

In [8]:
x = np.load("substructed_target_predictions/0/train_3_test_6.npy")

In [9]:
x

array([[ 0.13274615,  0.13118206,  0.13084053, ...,  0.12008382,
         0.11908894,  0.11908894],
       [ 0.02705355,  0.02711887,  0.0281676 , ...,  0.02486455,
         0.02924033,  0.02924033],
       [ 0.02459531,  0.02667005,  0.02632851, ...,  0.0108111 ,
         0.00769965,  0.00769965],
       ...,
       [-0.00276214, -0.00317481, -0.00351634, ..., -0.00480333,
        -0.00472398, -0.00472398],
       [ 0.12568453,  0.1285459 ,  0.12820437, ...,  0.1167956 ,
         0.11554036,  0.11554036],
       [ 0.13993629,  0.14699612,  0.14804485, ...,  0.14385995,
         0.13551051,  0.13551051]])