In [2]:
!pip install numpy matplotlib

Collecting numpy
  Downloading numpy-1.23.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting matplotlib
  Downloading matplotlib-3.6.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (11.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.8/11.8 MB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting pillow>=6.2.0
  Downloading Pillow-9.3.0-cp310-cp310-manylinux_2_28_x86_64.whl (3.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting fonttools>=4.22.0
  Downloading fonttools-4.38.0-py3-none-any.whl (965 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m965.4/965.4 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hColle

In [53]:
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt

In [11]:
BASE_FOLDER = Path.cwd().parent.parent / 'work'
RESULT_FOLDER = BASE_FOLDER / 'results'
BEST_MODELS = [
        ('hp_pr', '57'),
        ('hp_po', '63'),
        ('hp_tf', '69'),
        ('hp_cr', '55'),
        ('hp_sh', '72'),
        ('hp_ro', '33'),
    ]

In [8]:
def get_csv_predictions(file_path):
    data = np.loadtxt(file_path, delimiter=',')
    return data[:, 0]

In [9]:
def get_csv_true(file_path):
    data = np.loadtxt(file_path, delimiter=',')
    return data[:, 1]

In [39]:
def get_model_predictions(model_type, model_id, data_set_name):
    predictions_folder = RESULT_FOLDER / model_type / f'model_{model_id}' / 'predictions'
    return {
        pullback_file.stem: get_csv_predictions(pullback_file)
        for fold_folder in predictions_folder.iterdir()
        for pullback_file in (fold_folder / data_set_name.upper()).iterdir()
    }

In [21]:
def get_all_model_predictions(data_set_name):
    return {
        (model_type, model_id): get_model_predictions(model_type, model_id, data_set_name)
        for (model_type, model_id) in BEST_MODELS
    }

In [33]:
def get_smoothed_predictions(data_set_name):
    predictions_folder = BASE_FOLDER / 'new tuning' / 'smoothed' / 'predictions' / data_set_name.lower()
    return {
        pullback_file.stem: get_csv_predictions(pullback_file)
        for pullback_file in predictions_folder.iterdir()
    }

In [36]:
def get_all_real(data_set_name):
    predictions_folder = BASE_FOLDER / 'new tuning' / 'smoothed' / 'predictions' / data_set_name.lower()
    return {
        pullback_file.stem: get_csv_true(pullback_file)
        for pullback_file in predictions_folder.iterdir()
    }

In [47]:
def get_pullback_data(pullback_name, models_data, smoothed_data, real_data):
    return {
        'smoothed': smoothed_data[pullback_name],
        'real': real_data[pullback_name],
        **{
            model: model_data[pullback_name]
            for model, model_data in models_data.items()
        },
    }

In [48]:
def get_all_pullbacks_data(data_set_name):
    models_data = get_all_model_predictions(data_set_name)
    smoothed_data = get_smoothed_predictions(data_set_name)
    real_data = get_all_real(data_set_name)

    return {
        pullback_name: get_pullback_data(pullback_name, models_data, smoothed_data, real_data)
        for pullback_name in real_data.keys()
    }

In [50]:
all_pullbacks_data = get_all_pullbacks_data('val')

In [108]:
def get_style(source):
    if source == ('hp_pr', '57'):
        return '#1abc9c', 'PR-57', 'solid'
    if source == ('hp_po', '63'):
        return '#e67e22', 'PO-63', 'solid'
    if source == ('hp_tf', '69'):
        return '#3498db', 'TF-69', 'solid'
    if source == ('hp_cr', '55'):
        return '#9b59b6', 'CR-55', 'solid'
    if source == ('hp_sh', '72'):
        return '#34495e', 'SH-72', 'solid'
    if source == ('hp_ro', '33'):
        return '#7f8c8d', 'RO-33', 'solid'
    if source == 'real':
        return '#e74c3c', 'Real', 'dotted'
    if source == 'smoothed':
        return '#f1c40f', 'Smoothing', 'dashed'

In [132]:
def get_pullback_fold_mapping():
    model_type, model_id = ('hp_sh', '72')
    predictions_folder = RESULT_FOLDER / model_type / f'model_{model_id}' / 'predictions'
    return {
        pullback_file.stem: int(fold_folder.stem)
        for fold_folder in predictions_folder.iterdir()
        for pullback_file in (fold_folder / 'VAL').iterdir()
    }

In [133]:
def get_theta(pullback_name):
    fold = get_pullback_fold_mapping()[pullback_name]
    theta_values = [0.398990,0.213131,0.447475,0.245455,0.334343,0.132323,0.342424,0.188889,0.269697,0.237374]
    return theta_values[fold-1]

In [165]:
def generate_pullback_plot(pullback_name, pullback_data):
    plt.figure(figsize=(15, 3), dpi=144)
    plt.ylim(ymax=1.1, ymin=-0.1)
    pullback_length = len(pullback_data['real'])
    X = np.arange(0, pullback_length, 1)
    sources = [
        'real',
        'smoothed',
        *BEST_MODELS
    ]
    
    for source in sources:
        data = pullback_data[source]
        color, label, linestyle = get_style(source)
        plt.plot(X, data, color=color, label=label, linestyle=linestyle)
    
    plt.hlines(y=0.5, xmin=0, xmax=pullback_length-1, colors='g', linestyles='--', lw=1, label='0.5')
    theta = get_theta(pullback_name)
    plt.hlines(y=theta, xmin=0, xmax=pullback_length-1, colors='#bdc3c7', linestyles='--', lw=1, label=rf'$\theta$={theta:.3f}')
    plt.legend(loc=(1.02, 0))
    plt.tight_layout()
    plt.savefig(RESULT_FOLDER / 'assets' / 'pullback_plots' / f'{pullback_name}.pdf')
    plt.close()

In [168]:
def generate_all_pullback_plots():
    for pullback_name, pullback_data in all_pullbacks_data.items():
        generate_pullback_plot(pullback_name, pullback_data)

In [169]:
generate_all_pullback_plots()