In [1]:
import numpy as np
import os
import sys
sys.path.append("../utils")
from constants import DAYS_NUMBER
from pool_iterator import  pool_iterator
from json_tools import get_regression_labels

In [2]:
data_folder = "../../../data/best_features_days_data/"

json_filenames = [
    os.path.join(data_folder, "day_{}.json".format(day))
    for day in range(DAYS_NUMBER)
]

labels = {
    day: get_regression_labels(pool_iterator(json_filename))
    for day, json_filename in enumerate(json_filenames)
}

In [3]:
def get_mse_regression_metrics(targets_folder):
    metrics = []
    for folder in os.listdir(targets_folder):
        predictions_filenames = [
            os.path.join(targets_folder, folder, filename)
            for filename in os.listdir(os.path.join(targets_folder, folder))
        ]
        for filename in predictions_filenames:
            predictions = np.load(filename)
            current_labels = labels[int(filename[-5])]
            metrics.append(np.mean((predictions - current_labels) ** 2))
    return metrics


def get_mse_expect_classification_metrics(targets_folder):
    metrics = []
    for folder in os.listdir(targets_folder):
        predictions_filenames = [
            os.path.join(targets_folder, folder, filename)
            for filename in os.listdir(os.path.join(targets_folder, folder))
        ]
        for filename in predictions_filenames:
            predictions = np.load(filename)
            repeated_arange = np.repeat(
                [np.arange(np.shape(predictions)[1])],
                np.shape(predictions)[0],
                axis=0
            )
            predictions = np.sum(predictions * repeated_arange, axis=1)
            current_labels = labels[int(filename[-5])]
            metrics.append(np.mean((predictions - current_labels) ** 2))
    return metrics

def get_mse_softmax_classification_metrics(targets_folder, temperature):
    metrics = []
    for folder in os.listdir(targets_folder):
        predictions_filenames = [
            os.path.join(targets_folder, folder, filename)
            for filename in os.listdir(os.path.join(targets_folder, folder))
        ]
        for filename in predictions_filenames:
            predictions = np.load(filename)
            repeated_arange = np.repeat(
                [np.arange(np.shape(predictions)[1])],
                np.shape(predictions)[0],
                axis=0
            )
            exps = np.exp(predictions / temperature)
            predictions = np.sum(exps * repeated_arange, axis=1) / np.sum(exps, axis=1)
            current_labels = labels[int(filename[-5])]
            metrics.append(np.mean((predictions - current_labels) ** 2))
    return metrics

def get_mse_argmax_classification_metrics(targets_folder):
    metrics = []
    for folder in os.listdir(targets_folder):
        predictions_filenames = [
            os.path.join(targets_folder, folder, filename)
            for filename in os.listdir(os.path.join(targets_folder, folder))
        ]
        for filename in predictions_filenames:
            predictions = np.argmax(np.load(filename), axis=1)
            current_labels = labels[int(filename[-5])]
            metrics.append(np.mean((predictions - current_labels) ** 2))
    return metrics

In [4]:
metrics = get_mse_regression_metrics("targets_to_substruct")
print(np.mean(metrics), np.std(metrics) / np.sqrt(15))

0.525436802845968 0.004115428630544515


In [5]:
metrics = get_mse_expect_classification_metrics(
    "classification_targets_to_substruct_max_clicks_0"
)
print(np.mean(metrics), np.std(metrics) / np.sqrt(15))
metrics = get_mse_argmax_classification_metrics(
    "classification_targets_to_substruct_max_clicks_0"
)
print(np.mean(metrics), np.std(metrics) / np.sqrt(15))

0.5433750969183098 0.004866064557117116
0.642852622314692 0.0036590592717397567


In [6]:
metrics = get_mse_expect_classification_metrics(
    "classification_targets_to_substruct_max_clicks_1"
)
print(np.mean(metrics), np.std(metrics) / np.sqrt(15))
metrics = get_mse_argmax_classification_metrics(
    "classification_targets_to_substruct_max_clicks_1"
)
print(np.mean(metrics), np.std(metrics) / np.sqrt(15))

0.5244411473731525 0.00446401522231749
0.687960481980466 0.005171432661184939


In [7]:
metrics = get_mse_expect_classification_metrics(
    "classification_targets_to_substruct_max_clicks_2"
)
print(np.mean(metrics), np.std(metrics) / np.sqrt(15))
metrics = get_mse_argmax_classification_metrics(
    "classification_targets_to_substruct_max_clicks_2"
)
print(np.mean(metrics), np.std(metrics) / np.sqrt(15))

0.522108665315979 0.004245636768204069
0.686681535722337 0.004846568448984567


In [8]:
metrics = get_mse_expect_classification_metrics(
    "classification_targets_to_substruct_max_clicks_3"
)
print(np.mean(metrics), np.std(metrics) / np.sqrt(15))
metrics = get_mse_argmax_classification_metrics(
    "classification_targets_to_substruct_max_clicks_3"
)
print(np.mean(metrics), np.std(metrics) / np.sqrt(15))

0.521906416210771 0.004232799730499929
0.6881919747098552 0.005124028476085855


In [9]:
metrics = get_mse_expect_classification_metrics(
    "classification_targets_to_substruct_max_clicks_4"
)
print(np.mean(metrics), np.std(metrics) / np.sqrt(15))
metrics = get_mse_argmax_classification_metrics(
    "classification_targets_to_substruct_max_clicks_4"
)
print(np.mean(metrics), np.std(metrics) / np.sqrt(15))

0.5219278134068053 0.004190988519672796
0.6877532600118812 0.0050017800411887355


In [10]:
metrics = get_mse_expect_classification_metrics(
    "classification_targets_to_substruct_max_clicks_5"
)
print(np.mean(metrics), np.std(metrics) / np.sqrt(15))
metrics = get_mse_argmax_classification_metrics(
    "classification_targets_to_substruct_max_clicks_5"
)
print(np.mean(metrics), np.std(metrics) / np.sqrt(15))

0.5217916759759469 0.004201494569045797
0.6875614772246965 0.004942825738470855


In [11]:
metrics = get_mse_softmax_classification_metrics(
    "classification_targets_to_substruct_max_clicks_3",
    0.15
)
print("", np.mean(metrics), np.std(metrics) / np.sqrt(15))

 0.5446472438663498 0.004141963237655343
