In [4]:
import numpy as np
import shutil
import os

from argparse import Namespace
from catboost import CatBoostRegressor, CatBoostClassifier
from sklearn.svm import LinearSVR
from sklearn.linear_model import Lars, ElasticNet, Perceptron, SGDRegressor
from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor

import sys
sys.path.append("../utils/")
sys.path.append("../calculating_predictions")
from make_feature_generator_model import make_feature_generator_model

FEATURES_NUMBER = 127

%reload_ext autoreload
%autoreload 2

In [6]:
args = Namespace()
args.verbose = True
args.data_folder = "../../../data/new_days_data_best_features/"
args.need_position_feature = False
args.add_base_features = True
args.train_days = [1]

out_folder = "features_models_no_pos"

model_constructors = [
    lambda verbose: LinearSVR(),
    lambda verbose: Lars(),
    lambda verbose: ElasticNet(),
    lambda verbose: Perceptron(),
    lambda verbose: AdaBoostRegressor(),
    lambda verbose: GradientBoostingRegressor(),
    lambda verbose: RandomForestRegressor()
]
model_names = [
    "svr",
    "lars",
    "elastic",
    "preceptron",
    "ada_boost",
    "gradient_boost",
    "random_forest"
]

In [7]:
%%time

if os.path.exists(out_folder):
    shutil.rmtree(out_folder)
os.mkdir(out_folder)

for i in range(10):
    args.out_folder = os.path.join(out_folder, str(i))
    os.mkdir(args.out_folder)
    args.step = 43
    for model_name, model_constructor in zip(model_names, model_constructors):
        args.model_constructor = model_constructor
        args.first_feature = 0
        args.last_feature = -1
        args.model_name = model_name + "_all_features"
        if not os.path.exists(os.path.join(args.out_folder, args.model_name)):
            make_feature_generator_model(args)
        
        args.last_feature = args.step
        while args.first_feature < FEATURES_NUMBER:
            args.last_feature = min(args.last_feature, FEATURES_NUMBER)
            args.model_name = model_name + "_features_from_{}_to_{}".format(
                args.first_feature, args.last_feature
            )
            if not os.path.exists(os.path.join(args.out_folder, args.model_name)):
                make_feature_generator_model(args)
            args.first_feature += args.step
            args.last_feature += args.step

"2018-06-21 22:35:24.203818": preprocesing started
"2018-06-21 22:35:28.675022": train features shape: (142298, 127)
"2018-06-21 22:35:31.753955": preprocesing finished
"2018-06-21 22:35:31.754048": start training on days [1]
"2018-06-21 22:36:37.577214": training finished
"2018-06-21 22:36:37.577329": saving model in path "features_models_no_pos/0/svr_all_features_trained_on_1"
"2018-06-21 22:36:37.584270": preprocesing started
"2018-06-21 22:36:41.129583": train features shape: (142298, 43)
"2018-06-21 22:36:44.151470": preprocesing finished
"2018-06-21 22:36:44.151580": start training on days [1]
"2018-06-21 22:37:21.608697": training finished
"2018-06-21 22:37:21.608796": saving model in path "features_models_no_pos/0/svr_features_from_0_to_43_trained_on_1"
"2018-06-21 22:37:21.611028": preprocesing started
"2018-06-21 22:37:25.154827": train features shape: (142298, 43)
"2018-06-21 22:37:28.148949": preprocesing finished
"2018-06-21 22:37:28.149068": start training on days [1]
"20



"2018-06-21 22:39:57.094216": training finished
"2018-06-21 22:39:57.094308": saving model in path "features_models_no_pos/0/preceptron_all_features_trained_on_1"
"2018-06-21 22:39:57.101850": preprocesing started
"2018-06-21 22:40:00.665525": train features shape: (142298, 43)
"2018-06-21 22:40:03.692578": preprocesing finished
"2018-06-21 22:40:03.692692": start training on days [1]
"2018-06-21 22:40:05.689679": training finished
"2018-06-21 22:40:05.689772": saving model in path "features_models_no_pos/0/preceptron_features_from_0_to_43_trained_on_1"
"2018-06-21 22:40:05.692687": preprocesing started
"2018-06-21 22:40:09.434557": train features shape: (142298, 43)
"2018-06-21 22:40:12.476324": preprocesing finished
"2018-06-21 22:40:12.476401": start training on days [1]
"2018-06-21 22:40:14.484668": training finished
"2018-06-21 22:40:14.484759": saving model in path "features_models_no_pos/0/preceptron_features_from_43_to_86_trained_on_1"
"2018-06-21 22:40:14.487459": preprocesing

"2018-06-21 23:00:58.646091": training finished
"2018-06-21 23:00:58.646190": saving model in path "features_models_no_pos/1/lars_features_from_0_to_43_trained_on_1"
"2018-06-21 23:00:58.647021": preprocesing started
"2018-06-21 23:01:02.053652": train features shape: (142298, 43)
"2018-06-21 23:01:04.942857": preprocesing finished
"2018-06-21 23:01:04.942947": start training on days [1]
"2018-06-21 23:01:05.180864": training finished
"2018-06-21 23:01:05.180945": saving model in path "features_models_no_pos/1/lars_features_from_43_to_86_trained_on_1"
"2018-06-21 23:01:05.181464": preprocesing started
"2018-06-21 23:01:08.597262": train features shape: (142298, 41)
"2018-06-21 23:01:11.632531": preprocesing finished
"2018-06-21 23:01:11.632628": start training on days [1]
"2018-06-21 23:01:11.848990": training finished
"2018-06-21 23:01:11.849080": saving model in path "features_models_no_pos/1/lars_features_from_86_to_127_trained_on_1"
"2018-06-21 23:01:11.849754": preprocesing starte



"2018-06-21 23:01:53.046744": training finished
"2018-06-21 23:01:53.046839": saving model in path "features_models_no_pos/1/preceptron_all_features_trained_on_1"
"2018-06-21 23:01:53.053595": preprocesing started
"2018-06-21 23:01:56.846650": train features shape: (142298, 43)
"2018-06-21 23:02:00.003170": preprocesing finished
"2018-06-21 23:02:00.003460": start training on days [1]
"2018-06-21 23:02:01.992294": training finished
"2018-06-21 23:02:01.992389": saving model in path "features_models_no_pos/1/preceptron_features_from_0_to_43_trained_on_1"
"2018-06-21 23:02:01.995206": preprocesing started
"2018-06-21 23:02:05.468479": train features shape: (142298, 43)
"2018-06-21 23:02:08.369976": preprocesing finished
"2018-06-21 23:02:08.370090": start training on days [1]
"2018-06-21 23:02:10.313112": training finished
"2018-06-21 23:02:10.313218": saving model in path "features_models_no_pos/1/preceptron_features_from_43_to_86_trained_on_1"
"2018-06-21 23:02:10.315847": preprocesing

"2018-06-21 23:20:27.583326": train features shape: (142298, 43)
"2018-06-21 23:20:30.408605": preprocesing finished
"2018-06-21 23:20:30.408693": start training on days [1]
"2018-06-21 23:20:30.588828": training finished
"2018-06-21 23:20:30.588904": saving model in path "features_models_no_pos/2/lars_features_from_43_to_86_trained_on_1"
"2018-06-21 23:20:30.589457": preprocesing started
"2018-06-21 23:20:34.012426": train features shape: (142298, 41)
"2018-06-21 23:20:36.835123": preprocesing finished
"2018-06-21 23:20:36.835230": start training on days [1]
"2018-06-21 23:20:37.003200": training finished
"2018-06-21 23:20:37.003281": saving model in path "features_models_no_pos/2/lars_features_from_86_to_127_trained_on_1"
"2018-06-21 23:20:37.003816": preprocesing started
"2018-06-21 23:20:40.997996": train features shape: (142298, 127)
"2018-06-21 23:20:43.860628": preprocesing finished
"2018-06-21 23:20:43.860721": start training on days [1]
"2018-06-21 23:20:44.304861": training f



"2018-06-21 23:21:14.876992": training finished
"2018-06-21 23:21:14.877076": saving model in path "features_models_no_pos/2/preceptron_all_features_trained_on_1"
"2018-06-21 23:21:14.877742": preprocesing started
"2018-06-21 23:21:18.831417": train features shape: (142298, 43)
"2018-06-21 23:21:21.901911": preprocesing finished
"2018-06-21 23:21:21.902004": start training on days [1]
"2018-06-21 23:21:23.640465": training finished
"2018-06-21 23:21:23.640559": saving model in path "features_models_no_pos/2/preceptron_features_from_0_to_43_trained_on_1"
"2018-06-21 23:21:23.641109": preprocesing started
"2018-06-21 23:21:26.969822": train features shape: (142298, 43)
"2018-06-21 23:21:29.856340": preprocesing finished
"2018-06-21 23:21:29.856428": start training on days [1]
"2018-06-21 23:21:31.563080": training finished
"2018-06-21 23:21:31.563174": saving model in path "features_models_no_pos/2/preceptron_features_from_43_to_86_trained_on_1"
"2018-06-21 23:21:31.563731": preprocesing

"2018-06-21 23:40:03.683676": train features shape: (142298, 43)
"2018-06-21 23:40:06.530024": preprocesing finished
"2018-06-21 23:40:06.530205": start training on days [1]
"2018-06-21 23:40:06.711622": training finished
"2018-06-21 23:40:06.711705": saving model in path "features_models_no_pos/3/lars_features_from_43_to_86_trained_on_1"
"2018-06-21 23:40:06.712384": preprocesing started
"2018-06-21 23:40:10.093821": train features shape: (142298, 41)
"2018-06-21 23:40:12.913524": preprocesing finished
"2018-06-21 23:40:12.913599": start training on days [1]
"2018-06-21 23:40:13.088476": training finished
"2018-06-21 23:40:13.088564": saving model in path "features_models_no_pos/3/lars_features_from_86_to_127_trained_on_1"
"2018-06-21 23:40:13.089241": preprocesing started
"2018-06-21 23:40:17.108092": train features shape: (142298, 127)
"2018-06-21 23:40:19.978797": preprocesing finished
"2018-06-21 23:40:19.978882": start training on days [1]
"2018-06-21 23:40:20.413973": training f



"2018-06-21 23:40:49.230404": training finished
"2018-06-21 23:40:49.230496": saving model in path "features_models_no_pos/3/preceptron_all_features_trained_on_1"
"2018-06-21 23:40:49.231308": preprocesing started
"2018-06-21 23:40:52.570614": train features shape: (142298, 43)
"2018-06-21 23:40:55.468864": preprocesing finished
"2018-06-21 23:40:55.468959": start training on days [1]
"2018-06-21 23:40:57.172351": training finished
"2018-06-21 23:40:57.172445": saving model in path "features_models_no_pos/3/preceptron_features_from_0_to_43_trained_on_1"
"2018-06-21 23:40:57.173163": preprocesing started
"2018-06-21 23:41:00.548378": train features shape: (142298, 43)
"2018-06-21 23:41:03.441615": preprocesing finished
"2018-06-21 23:41:03.441689": start training on days [1]
"2018-06-21 23:41:05.153687": training finished
"2018-06-21 23:41:05.153774": saving model in path "features_models_no_pos/3/preceptron_features_from_43_to_86_trained_on_1"
"2018-06-21 23:41:05.154454": preprocesing

"2018-06-22 00:00:07.892773": train features shape: (142298, 43)
"2018-06-22 00:00:10.834056": preprocesing finished
"2018-06-22 00:00:10.834167": start training on days [1]
"2018-06-22 00:00:11.027378": training finished
"2018-06-22 00:00:11.027465": saving model in path "features_models_no_pos/4/lars_features_from_43_to_86_trained_on_1"
"2018-06-22 00:00:11.028102": preprocesing started
"2018-06-22 00:00:14.581461": train features shape: (142298, 41)
"2018-06-22 00:00:17.500053": preprocesing finished
"2018-06-22 00:00:17.500200": start training on days [1]
"2018-06-22 00:00:17.674373": training finished
"2018-06-22 00:00:17.674450": saving model in path "features_models_no_pos/4/lars_features_from_86_to_127_trained_on_1"
"2018-06-22 00:00:17.674975": preprocesing started
"2018-06-22 00:00:21.869239": train features shape: (142298, 127)
"2018-06-22 00:00:24.802341": preprocesing finished
"2018-06-22 00:00:24.802433": start training on days [1]
"2018-06-22 00:00:25.163995": training f



"2018-06-22 00:00:55.054155": training finished
"2018-06-22 00:00:55.054237": saving model in path "features_models_no_pos/4/preceptron_all_features_trained_on_1"
"2018-06-22 00:00:55.054864": preprocesing started
"2018-06-22 00:00:58.524342": train features shape: (142298, 43)
"2018-06-22 00:01:01.513513": preprocesing finished
"2018-06-22 00:01:01.513621": start training on days [1]
"2018-06-22 00:01:03.259944": training finished
"2018-06-22 00:01:03.260041": saving model in path "features_models_no_pos/4/preceptron_features_from_0_to_43_trained_on_1"
"2018-06-22 00:01:03.260804": preprocesing started
"2018-06-22 00:01:06.836704": train features shape: (142298, 43)
"2018-06-22 00:01:09.792757": preprocesing finished
"2018-06-22 00:01:09.792834": start training on days [1]
"2018-06-22 00:01:11.536596": training finished
"2018-06-22 00:01:11.536775": saving model in path "features_models_no_pos/4/preceptron_features_from_43_to_86_trained_on_1"
"2018-06-22 00:01:11.537477": preprocesing

"2018-06-22 00:20:46.266259": train features shape: (142298, 43)
"2018-06-22 00:20:49.201308": preprocesing finished
"2018-06-22 00:20:49.201400": start training on days [1]
"2018-06-22 00:20:49.393513": training finished
"2018-06-22 00:20:49.393599": saving model in path "features_models_no_pos/5/lars_features_from_43_to_86_trained_on_1"
"2018-06-22 00:20:49.394222": preprocesing started
"2018-06-22 00:20:52.890320": train features shape: (142298, 41)
"2018-06-22 00:20:55.852598": preprocesing finished
"2018-06-22 00:20:55.852692": start training on days [1]
"2018-06-22 00:20:56.031846": training finished
"2018-06-22 00:20:56.031935": saving model in path "features_models_no_pos/5/lars_features_from_86_to_127_trained_on_1"
"2018-06-22 00:20:56.032568": preprocesing started
"2018-06-22 00:21:00.258763": train features shape: (142298, 127)
"2018-06-22 00:21:03.172275": preprocesing finished
"2018-06-22 00:21:03.172387": start training on days [1]
"2018-06-22 00:21:03.528647": training f



"2018-06-22 00:21:33.252539": training finished
"2018-06-22 00:21:33.252624": saving model in path "features_models_no_pos/5/preceptron_all_features_trained_on_1"
"2018-06-22 00:21:33.253392": preprocesing started
"2018-06-22 00:21:36.746576": train features shape: (142298, 43)
"2018-06-22 00:21:39.673813": preprocesing finished
"2018-06-22 00:21:39.673905": start training on days [1]
"2018-06-22 00:21:41.423268": training finished
"2018-06-22 00:21:41.423363": saving model in path "features_models_no_pos/5/preceptron_features_from_0_to_43_trained_on_1"
"2018-06-22 00:21:41.424027": preprocesing started
"2018-06-22 00:21:44.990901": train features shape: (142298, 43)
"2018-06-22 00:21:47.944187": preprocesing finished
"2018-06-22 00:21:47.944295": start training on days [1]
"2018-06-22 00:21:49.692251": training finished
"2018-06-22 00:21:49.692344": saving model in path "features_models_no_pos/5/preceptron_features_from_43_to_86_trained_on_1"
"2018-06-22 00:21:49.693061": preprocesing

"2018-06-22 00:41:03.876162": train features shape: (142298, 43)
"2018-06-22 00:41:06.793947": preprocesing finished
"2018-06-22 00:41:06.794025": start training on days [1]
"2018-06-22 00:41:06.984268": training finished
"2018-06-22 00:41:06.984353": saving model in path "features_models_no_pos/6/lars_features_from_43_to_86_trained_on_1"
"2018-06-22 00:41:06.985070": preprocesing started
"2018-06-22 00:41:10.485945": train features shape: (142298, 41)
"2018-06-22 00:41:13.450426": preprocesing finished
"2018-06-22 00:41:13.450576": start training on days [1]
"2018-06-22 00:41:13.625533": training finished
"2018-06-22 00:41:13.625614": saving model in path "features_models_no_pos/6/lars_features_from_86_to_127_trained_on_1"
"2018-06-22 00:41:13.626310": preprocesing started
"2018-06-22 00:41:17.774866": train features shape: (142298, 127)
"2018-06-22 00:41:20.749834": preprocesing finished
"2018-06-22 00:41:20.749947": start training on days [1]
"2018-06-22 00:41:21.104226": training f



"2018-06-22 00:41:51.189605": training finished
"2018-06-22 00:41:51.189707": saving model in path "features_models_no_pos/6/preceptron_all_features_trained_on_1"
"2018-06-22 00:41:51.190456": preprocesing started
"2018-06-22 00:41:54.616417": train features shape: (142298, 43)
"2018-06-22 00:41:57.568044": preprocesing finished
"2018-06-22 00:41:57.568137": start training on days [1]
"2018-06-22 00:41:59.302841": training finished
"2018-06-22 00:41:59.302933": saving model in path "features_models_no_pos/6/preceptron_features_from_0_to_43_trained_on_1"
"2018-06-22 00:41:59.303642": preprocesing started
"2018-06-22 00:42:02.829738": train features shape: (142298, 43)
"2018-06-22 00:42:05.784246": preprocesing finished
"2018-06-22 00:42:05.784352": start training on days [1]
"2018-06-22 00:42:07.547567": training finished
"2018-06-22 00:42:07.547676": saving model in path "features_models_no_pos/6/preceptron_features_from_43_to_86_trained_on_1"
"2018-06-22 00:42:07.548361": preprocesing

"2018-06-22 01:01:50.198084": training finished
"2018-06-22 01:01:50.198170": saving model in path "features_models_no_pos/7/lars_features_from_0_to_43_trained_on_1"
"2018-06-22 01:01:50.198860": preprocesing started
"2018-06-22 01:01:53.696045": train features shape: (142298, 43)
"2018-06-22 01:01:56.667090": preprocesing finished
"2018-06-22 01:01:56.667184": start training on days [1]
"2018-06-22 01:01:56.857739": training finished
"2018-06-22 01:01:56.857818": saving model in path "features_models_no_pos/7/lars_features_from_43_to_86_trained_on_1"
"2018-06-22 01:01:56.858398": preprocesing started
"2018-06-22 01:02:00.359546": train features shape: (142298, 41)
"2018-06-22 01:02:03.332740": preprocesing finished
"2018-06-22 01:02:03.332864": start training on days [1]
"2018-06-22 01:02:03.511411": training finished
"2018-06-22 01:02:03.511488": saving model in path "features_models_no_pos/7/lars_features_from_86_to_127_trained_on_1"
"2018-06-22 01:02:03.512010": preprocesing starte



"2018-06-22 01:02:41.234715": training finished
"2018-06-22 01:02:41.234801": saving model in path "features_models_no_pos/7/preceptron_all_features_trained_on_1"
"2018-06-22 01:02:41.235562": preprocesing started
"2018-06-22 01:02:44.745489": train features shape: (142298, 43)
"2018-06-22 01:02:47.699861": preprocesing finished
"2018-06-22 01:02:47.700006": start training on days [1]
"2018-06-22 01:02:49.443839": training finished
"2018-06-22 01:02:49.443930": saving model in path "features_models_no_pos/7/preceptron_features_from_0_to_43_trained_on_1"
"2018-06-22 01:02:49.444709": preprocesing started
"2018-06-22 01:02:53.018105": train features shape: (142298, 43)
"2018-06-22 01:02:56.036524": preprocesing finished
"2018-06-22 01:02:56.036610": start training on days [1]
"2018-06-22 01:02:57.796095": training finished
"2018-06-22 01:02:57.796193": saving model in path "features_models_no_pos/7/preceptron_features_from_43_to_86_trained_on_1"
"2018-06-22 01:02:57.796884": preprocesing

"2018-06-22 01:22:00.237950": training finished
"2018-06-22 01:22:00.238084": saving model in path "features_models_no_pos/8/lars_features_from_0_to_43_trained_on_1"
"2018-06-22 01:22:00.238721": preprocesing started
"2018-06-22 01:22:03.669494": train features shape: (142298, 43)
"2018-06-22 01:22:06.588855": preprocesing finished
"2018-06-22 01:22:06.588947": start training on days [1]
"2018-06-22 01:22:06.778005": training finished
"2018-06-22 01:22:06.778084": saving model in path "features_models_no_pos/8/lars_features_from_43_to_86_trained_on_1"
"2018-06-22 01:22:06.778703": preprocesing started
"2018-06-22 01:22:10.256550": train features shape: (142298, 41)
"2018-06-22 01:22:13.187133": preprocesing finished
"2018-06-22 01:22:13.187224": start training on days [1]
"2018-06-22 01:22:13.361438": training finished
"2018-06-22 01:22:13.361516": saving model in path "features_models_no_pos/8/lars_features_from_86_to_127_trained_on_1"
"2018-06-22 01:22:13.362043": preprocesing starte



"2018-06-22 01:22:50.686222": training finished
"2018-06-22 01:22:50.686306": saving model in path "features_models_no_pos/8/preceptron_all_features_trained_on_1"
"2018-06-22 01:22:50.686927": preprocesing started
"2018-06-22 01:22:54.129899": train features shape: (142298, 43)
"2018-06-22 01:22:57.101553": preprocesing finished
"2018-06-22 01:22:57.101670": start training on days [1]
"2018-06-22 01:22:58.842395": training finished
"2018-06-22 01:22:58.842488": saving model in path "features_models_no_pos/8/preceptron_features_from_0_to_43_trained_on_1"
"2018-06-22 01:22:58.843029": preprocesing started
"2018-06-22 01:23:02.532147": train features shape: (142298, 43)
"2018-06-22 01:23:05.547141": preprocesing finished
"2018-06-22 01:23:05.547276": start training on days [1]
"2018-06-22 01:23:07.306275": training finished
"2018-06-22 01:23:07.306367": saving model in path "features_models_no_pos/8/preceptron_features_from_43_to_86_trained_on_1"
"2018-06-22 01:23:07.306926": preprocesing

"2018-06-22 01:42:54.956605": train features shape: (142298, 43)
"2018-06-22 01:42:57.908940": preprocesing finished
"2018-06-22 01:42:57.909035": start training on days [1]
"2018-06-22 01:42:58.118275": training finished
"2018-06-22 01:42:58.118366": saving model in path "features_models_no_pos/9/lars_features_from_43_to_86_trained_on_1"
"2018-06-22 01:42:58.119012": preprocesing started
"2018-06-22 01:43:01.586856": train features shape: (142298, 41)
"2018-06-22 01:43:04.530453": preprocesing finished
"2018-06-22 01:43:04.530560": start training on days [1]
"2018-06-22 01:43:04.708491": training finished
"2018-06-22 01:43:04.708571": saving model in path "features_models_no_pos/9/lars_features_from_86_to_127_trained_on_1"
"2018-06-22 01:43:04.709150": preprocesing started
"2018-06-22 01:43:08.833864": train features shape: (142298, 127)
"2018-06-22 01:43:11.813878": preprocesing finished
"2018-06-22 01:43:11.813984": start training on days [1]
"2018-06-22 01:43:12.184741": training f



"2018-06-22 01:43:42.264616": training finished
"2018-06-22 01:43:42.264702": saving model in path "features_models_no_pos/9/preceptron_all_features_trained_on_1"
"2018-06-22 01:43:42.265351": preprocesing started
"2018-06-22 01:43:45.733776": train features shape: (142298, 43)
"2018-06-22 01:43:48.710725": preprocesing finished
"2018-06-22 01:43:48.710818": start training on days [1]
"2018-06-22 01:43:50.475584": training finished
"2018-06-22 01:43:50.475680": saving model in path "features_models_no_pos/9/preceptron_features_from_0_to_43_trained_on_1"
"2018-06-22 01:43:50.476273": preprocesing started
"2018-06-22 01:43:54.042810": train features shape: (142298, 43)
"2018-06-22 01:43:57.017326": preprocesing finished
"2018-06-22 01:43:57.017422": start training on days [1]
"2018-06-22 01:43:58.768513": training finished
"2018-06-22 01:43:58.768610": saving model in path "features_models_no_pos/9/preceptron_features_from_43_to_86_trained_on_1"
"2018-06-22 01:43:58.769288": preprocesing