In [1]:
import numpy as np
import shutil
import os

from argparse import Namespace
from catboost import CatBoostRegressor, CatBoostClassifier
from sklearn.svm import LinearSVR
from sklearn.linear_model import Lars, ElasticNet, Perceptron, SGDRegressor
from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor

import sys
sys.path.append("../utils/")
sys.path.append("../calculating_predictions")
sys.path.append("../evaluation/")
from calculate_predictions import calculate_predictions
from evaluate import evaluate
from run_evaluation import *

FEATURES_NUMBER = 79

%reload_ext autoreload
%autoreload 2

In [2]:
def get_stacking_metric(i):
    args = Namespace()
    args.verbose = False
    args.first_feature = 0
    args.last_feature = -1
    args.data_folder = "../../../data/best_features_days_data/"
    args.out_folder = "predictions"
    args.model_constructor=lambda verbose: CatBoostRegressor(verbose=verbose)
    args.type = "regression"
    args.metric_folder = "metrics"
    args.additional_features = {
        2: [
            os.path.join("res", folder, "train_1_test_2.npy")
            for folder in os.listdir("res")
            if folder.startswith(str(i))
        ],
        6: [
            os.path.join("res", folder, "train_1_test_6.npy")
            for folder in os.listdir("res")
            if folder.startswith(str(i))
        ]
    }
    args.train_days = [2]
    args.validation_day = None
    args.test_days = [6]

    def clear(folder):
        if os.path.exists(folder):
            shutil.rmtree(folder)
        os.mkdir(folder)

    clear(args.out_folder)
    clear(args.metric_folder)
    
    calculate_predictions(args)
    evaluate(args.out_folder, args.data_folder, args.metric_folder, argmax_positions)
    
    with open("metrics/metrics.txt") as handler:
        metric = float(next(handler).strip().split()[0])
        return metric

In [5]:
np.mean(metrics), np.std(metrics)

(0.616844273469954, 0.015565988017839271)

In [3]:
metrics = [get_stacking_metric(i) for i in range(16)]

"2018-06-18 20:06:10.357777": preprocesing started
"2018-06-18 20:06:12.409074": train features shape: (22314, 108)
"2018-06-18 20:06:16.635310": preprocesing finished
"2018-06-18 20:06:16.635398": start training on days [2]
"2018-06-18 20:06:16.635505": using fit without validation
"2018-06-18 20:06:40.864354": built 1000 trees
"2018-06-18 20:06:40.864475": start predicting on day 6
"2018-06-18 20:06:56.648576": saveing results
"2018-06-18 20:06:56.657214": results saved
"2018-06-18 20:06:56.660803": predictions_filenames: ['predictions/train_2_test_6.npy']
"2018-06-18 20:06:56.660855": will evaluate days: [6]
"2018-06-18 20:06:56.661018": predict on file "predictions/train_2_test_6.npy"
"2018-06-18 20:06:58.655513": calculating metric
"2018-06-18 20:06:58.657804": preprocesing started
"2018-06-18 20:07:02.219164": train features shape: (22314, 276)
"2018-06-18 20:07:06.402063": preprocesing finished
"2018-06-18 20:07:06.402148": start training on days [2]
"2018-06-18 20:07:06.402257"

"2018-06-18 20:15:27.165233": train features shape: (22314, 108)
"2018-06-18 20:15:31.415142": preprocesing finished
"2018-06-18 20:15:31.415241": start training on days [2]
"2018-06-18 20:15:31.415262": using fit without validation
"2018-06-18 20:15:55.881113": built 1000 trees
"2018-06-18 20:15:55.881239": start predicting on day 6
"2018-06-18 20:16:10.459126": saveing results
"2018-06-18 20:16:10.467326": results saved
"2018-06-18 20:16:10.470409": predictions_filenames: ['predictions/train_2_test_6.npy']
"2018-06-18 20:16:10.470450": will evaluate days: [6]
"2018-06-18 20:16:10.470567": predict on file "predictions/train_2_test_6.npy"
"2018-06-18 20:16:12.647343": calculating metric
"2018-06-18 20:16:12.649271": preprocesing started
"2018-06-18 20:16:14.697517": train features shape: (22314, 108)
"2018-06-18 20:16:19.036329": preprocesing finished
"2018-06-18 20:16:19.036415": start training on days [2]
"2018-06-18 20:16:19.036528": using fit without validation
"2018-06-18 20:16:43

In [6]:
metrics

[0.5958344449019258,
 0.6145928913246959,
 0.618692261496208,
 0.6243788805427603,
 0.6185580187941483,
 0.621221794320235,
 0.6447465507372773,
 0.6276703039406661,
 0.633696806285413,
 0.6288142013284951,
 0.6138080119236777,
 0.5834505321330907,
 0.623941933998769,
 0.6221379836419182,
 0.5893872228663154,
 0.6085765372836692]

In [7]:
x = np.load("predictions/train_2_test_6.npy")

In [8]:
x

array([[0.61812625, 0.61846679, 0.61890057, ..., 0.61478642, 0.61478642,
        0.61478642],
       [0.34391105, 0.34409557, 0.34577321, ..., 0.34736335, 0.34736335,
        0.34736335],
       [0.66238071, 0.66272125, 0.66203065, ..., 0.66103397, 0.66103397,
        0.66103397],
       ...,
       [0.37748273, 0.37782327, 0.37825705, ..., 0.37628596, 0.37628596,
        0.37628596],
       [0.87094201, 0.8729542 , 0.87522218, ..., 0.87679949, 0.87679949,
        0.87679949],
       [0.63196891, 0.63215344, 0.63566528, ..., 0.63515088, 0.63515088,
        0.63515088]])