In [1]:
import numpy as np
import shutil
import os
from argparse import Namespace
from catboost import CatBoostRegressor, CatBoostClassifier
import sys

sys.path.append("../utils/")
from constants import POSITIONS_VARIANTS
sys.path.append("../calculating_predictions")
from calculate_predictions import calculate_predictions
sys.path.append("../evaluation/")
from evaluate import evaluate
from run_evaluation import *

FEATURES_NUMBER = 16

%reload_ext autoreload
%autoreload 2

In [None]:
def greedy_positions(predictions):
    predictions = np.argmax(predictions, axis=2)
    def get_first_one(prediction):
        for i, item in enumerate(prediction):
            if item == 1:
                return i if i != 10 else 100
        return 100
    return np.array([
        get_first_one(prediction)
        for prediction in predictions
    ])


def smart_positions(predictions):
    predictions = predictions[:, :, 1]
    return np.array(POSITIONS_VARIANTS)[np.argmax(predictions, axis=1)]


def smart_positions_with_threshold(predictions, threshold):
    predictions = predictions[:, :, 1]
    positions = np.array(POSITIONS_VARIANTS)[np.argmax(predictions, axis=1)]
    positions[np.argmax(predictions, axis=1) < threshold] = 8
    return positions

def get_greedy_metric(threshold, positions):
    args = Namespace()
    args.verbose = True
    args.first_feature = 0
    args.last_feature = FEATURES_NUMBER
    args.data_folder = "../../../data/best_features_days_data/"
    args.out_folder = "res"
    args.type = "binary_classification"
    args.need_position_feature = True
    args.labels_to_substruct = None
    args.metric_folder = "metrics"
    args.model_constructor = lambda verbose: CatBoostClassifier(verbose=False)
    args.additional_features = None
    {
        day: [
            os.path.join("features_models_with_pos", str(i), filename)
            for filename in os.listdir(os.path.join("features_models_with_pos", str(i)))
        ]
        for day in [2, 3]
    }
    args.train_days = [2]
    args.validation_day = None
    args.test_days = [3]
    args.threshold = threshold
    
    def clear(folder):
        if os.path.exists(folder):
            shutil.rmtree(folder)
        os.mkdir(folder)
    
    clear(args.out_folder)
    clear(args.metric_folder)
    
    calculate_predictions(args)
    evaluate(args.out_folder, args.data_folder, args.metric_folder, positions)
    
    with open("metrics/metrics.txt") as handler:
        metric = float(next(handler).strip().split()[0])
        return metric

In [None]:
average_target = 0.6118888430078808

In [None]:
metrics = [get_greedy_metric(average_target, greedy_positions) for i in range(15)]

In [None]:
np.mean(metrics), np.std(metrics) / np.sqrt(15)

In [None]:
metrics = [get_greedy_metric(average_target, smart_positions) for i in range(15)]

In [None]:
np.mean(metrics), np.std(metrics) / np.sqrt(15)

In [None]:
metrics = [
    get_greedy_metric(
        average_target,
        lambda predictions: smart_positions_with_threshold(predictions, 0.5))
    for i in range(15)
]

In [None]:
np.mean(metrics), np.std(metrics) / np.sqrt(15)

In [None]:
res = {}
for th in np.linspace(0.1, 0.9, 10):
    metrics = [
        get_greedy_metric(
            average_target,
            lambda predictions: smart_positions_with_threshold(predictions, th))
        for i in range(5)
    ]
    res[th] = (np.mean(metrics), np.std(metrics) / np.sqrt(10))

In [None]:
res

In [None]:
for key in res:
    res[key] = (res[key][0], res[key][1] * np.sqrt(10) / np.sqrt(5))

In [None]:
res = {0.1: (0.6355749236135495, 0.003162723271489616),
 0.18888888888888888: (0.6359131047916202, 0.004209740060774447),
 0.2777777777777778: (0.6290267875927288, 0.004640652431359883),
 0.3666666666666667: (0.622029501396152, 0.005567964267894078),
 0.4555555555555556: (0.6295784621436431, 0.004142436307717166),
 0.5444444444444445: (0.6378853378229242, 0.002472572099384837),
 0.6333333333333333: (0.6346278003512318, 0.0048027658295530665),
 0.7222222222222222: (0.6226123616463015, 0.003784136582314592),
 0.8111111111111111: (0.6308403814607049, 0.0034276639715340013),
 0.9: (0.6259451185491536, 0.004869667873788324)}

In [None]:
res