In [1]:
import numpy as np
import shutil
import os

from argparse import Namespace
from catboost import CatBoostRegressor, CatBoostClassifier
from sklearn.svm import LinearSVR
from sklearn.linear_model import Lars, ElasticNet, Perceptron, SGDRegressor
from sklearn.ensemble import AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor

import sys
sys.path.append("../utils/")
sys.path.append("../calculating_predictions")
sys.path.append("../evaluation/")
from calculate_predictions import calculate_predictions
from evaluate import evaluate
from run_evaluation import *

FEATURES_NUMBER = 79

%reload_ext autoreload
%autoreload 2

In [2]:
def prepare_features(i):
    args = Namespace()
    args.verbose = False
    args.data_folder = "../../../data/best_features_days_data/"
    args.out_folder = "res"
    args.type = "binary_regression"
    args.additional_features = None
    args.train_days = [1]
    args.validation_day = None
    args.test_days = [3]

    model_constructors = [
        lambda verbose: LinearSVR(),
        lambda verbose: Lars(),
        lambda verbose: ElasticNet(),
        lambda verbose: Perceptron(),
        lambda verbose: AdaBoostRegressor(),
        lambda verbose: GradientBoostingRegressor(),
        lambda verbose: RandomForestRegressor()
    ]
    model_names = [
        "svr",
        "lars",
        "elastic",
        "preceptron",
        "ada_boost",
        "gradient_boost",
        "random_forest"
    ]

    def clear(folder):
        if os.path.exists(folder):
            shutil.rmtree(folder)
        os.mkdir(folder)
    
    out_folder = args.out_folder

    args.step = 30
    for model_name, model_constructor in zip(model_names, model_constructors):
        args.first_feature = 0
        args.last_feature = args.step
        args.model_constructor = model_constructor
        while args.first_feature < FEATURES_NUMBER:
            args.last_feature = min(args.last_feature, FEATURES_NUMBER)
            args.out_folder = os.path.join(
                out_folder,
                str(i) + '_' + model_name + "_features_from_{}_to_{}".format(
                    args.first_feature, args.last_feature
                )
            )
            #if not os.path.exists(args.out_folder):
                #clear(args.out_folder)
            calculate_predictions(args)
            args.first_feature += args.step
            args.last_feature += args.step

    args.first_feature = 0
    args.last_feature = -1
            
    for model_name, model_constructor in zip(model_names, model_constructors):
        args.out_folder = os.path.join(out_folder, str(i) + '_' + model_name + "_all_features")
        args.model_constructor = model_constructor
        #if not os.path.exists(args.out_folder):
            #clear(args.out_folder)
        calculate_predictions(args)

In [3]:
for i in range(14, 16):
    prepare_features(i)

"2018-06-19 00:14:57.684789": preprocesing started
"2018-06-19 00:14:59.113559": train features shape: (22452, 30)
"2018-06-19 00:15:03.128605": preprocesing finished
"2018-06-19 00:15:03.128693": start training on days [1]
"2018-06-19 00:15:03.128800": using fit without validation
"2018-06-19 00:15:06.861271": start predicting on day 3
"2018-06-19 00:15:06.862295": saveing results
"2018-06-19 00:15:06.862929": results saved
"2018-06-19 00:15:06.863069": preprocesing started
"2018-06-19 00:15:08.211513": train features shape: (22452, 30)
"2018-06-19 00:15:12.322905": preprocesing finished
"2018-06-19 00:15:12.323008": start training on days [1]
"2018-06-19 00:15:12.323027": using fit without validation
"2018-06-19 00:15:14.977824": start predicting on day 3
"2018-06-19 00:15:14.978740": saveing results
"2018-06-19 00:15:14.979294": results saved
"2018-06-19 00:15:14.979396": preprocesing started
"2018-06-19 00:15:16.254962": train features shape: (22452, 19)
"2018-06-19 00:15:20.313207



"2018-06-19 00:16:02.581089": train features shape: (22452, 30)
"2018-06-19 00:16:06.597987": preprocesing finished
"2018-06-19 00:16:06.598091": start training on days [1]
"2018-06-19 00:16:06.598202": using fit without validation
"2018-06-19 00:16:06.715801": start predicting on day 3
"2018-06-19 00:16:06.721166": saveing results
"2018-06-19 00:16:06.721773": results saved
"2018-06-19 00:16:06.721882": preprocesing started
"2018-06-19 00:16:08.054666": train features shape: (22452, 19)
"2018-06-19 00:16:12.129267": preprocesing finished
"2018-06-19 00:16:12.129357": start training on days [1]
"2018-06-19 00:16:12.129471": using fit without validation
"2018-06-19 00:16:12.208607": start predicting on day 3
"2018-06-19 00:16:12.212549": saveing results
"2018-06-19 00:16:12.213174": results saved
"2018-06-19 00:16:12.213283": preprocesing started
"2018-06-19 00:16:13.635016": train features shape: (22452, 30)
"2018-06-19 00:16:17.752927": preprocesing finished
"2018-06-19 00:16:17.75303



"2018-06-19 00:17:58.513583": start predicting on day 3
"2018-06-19 00:17:58.528470": saveing results
"2018-06-19 00:17:58.529137": results saved
"2018-06-19 00:17:58.529477": preprocesing started
"2018-06-19 00:18:00.060981": train features shape: (22452, 79)
"2018-06-19 00:18:04.501592": preprocesing finished
"2018-06-19 00:18:04.501680": start training on days [1]
"2018-06-19 00:18:04.501698": using fit without validation
"2018-06-19 00:18:10.094912": start predicting on day 3
"2018-06-19 00:18:10.132744": saveing results
"2018-06-19 00:18:10.133374": results saved
"2018-06-19 00:18:10.133718": preprocesing started
"2018-06-19 00:18:11.676762": train features shape: (22452, 79)
"2018-06-19 00:18:16.040531": preprocesing finished
"2018-06-19 00:18:16.040608": start training on days [1]
"2018-06-19 00:18:16.040707": using fit without validation
"2018-06-19 00:18:27.530971": start predicting on day 3
"2018-06-19 00:18:27.564707": saveing results
"2018-06-19 00:18:27.565306": results sa



"2018-06-19 00:19:50.809790": train features shape: (22452, 30)
"2018-06-19 00:19:54.718448": preprocesing finished
"2018-06-19 00:19:54.718553": start training on days [1]
"2018-06-19 00:19:54.718572": using fit without validation
"2018-06-19 00:19:54.806044": start predicting on day 3
"2018-06-19 00:19:54.810953": saveing results
"2018-06-19 00:19:54.811478": results saved
"2018-06-19 00:19:54.811563": preprocesing started
"2018-06-19 00:19:56.138897": train features shape: (22452, 19)
"2018-06-19 00:19:59.983960": preprocesing finished
"2018-06-19 00:19:59.984050": start training on days [1]
"2018-06-19 00:19:59.984085": using fit without validation
"2018-06-19 00:20:00.057701": start predicting on day 3
"2018-06-19 00:20:00.061533": saveing results
"2018-06-19 00:20:00.062090": results saved
"2018-06-19 00:20:00.062220": preprocesing started
"2018-06-19 00:20:01.355801": train features shape: (22452, 30)
"2018-06-19 00:20:05.249285": preprocesing finished
"2018-06-19 00:20:05.24935



"2018-06-19 00:21:50.517025": start predicting on day 3
"2018-06-19 00:21:50.533306": saveing results
"2018-06-19 00:21:50.533859": results saved
"2018-06-19 00:21:50.534176": preprocesing started
"2018-06-19 00:21:52.056929": train features shape: (22452, 79)
"2018-06-19 00:21:56.356909": preprocesing finished
"2018-06-19 00:21:56.357010": start training on days [1]
"2018-06-19 00:21:56.357027": using fit without validation
"2018-06-19 00:22:08.457852": start predicting on day 3
"2018-06-19 00:22:08.553371": saveing results
"2018-06-19 00:22:08.554059": results saved
"2018-06-19 00:22:08.554527": preprocesing started
"2018-06-19 00:22:10.256811": train features shape: (22452, 79)
"2018-06-19 00:22:14.704486": preprocesing finished
"2018-06-19 00:22:14.704564": start training on days [1]
"2018-06-19 00:22:14.704685": using fit without validation
"2018-06-19 00:22:28.440282": start predicting on day 3
"2018-06-19 00:22:28.480287": saveing results
"2018-06-19 00:22:28.481124": results sa