In [1]:
!pip install numpy matplotlib



In [131]:
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
from collections import OrderedDict

In [3]:
BASE_FOLDER = Path.cwd().parent.parent / 'work'
RESULT_FOLDER = BASE_FOLDER / 'results'
BEST_MODELS = [
        ('hp_pr', '57'),
        ('hp_po', '63'),
        ('hp_tf', '69'),
        ('hp_cr', '55'),
        ('hp_sh', '72'),
        ('hp_ro', '33'),
    ]

In [4]:
def get_csv_predictions(file_path):
    data = np.loadtxt(file_path, delimiter=',')
    return data[:, 0]

In [5]:
def get_csv_true(file_path):
    data = np.loadtxt(file_path, delimiter=',')
    return data[:, 1]

In [6]:
def get_model_predictions(model_type, model_id, data_set_name):
    predictions_folder = RESULT_FOLDER / model_type / f'model_{model_id}' / 'predictions'
    return {
        pullback_file.stem: get_csv_predictions(pullback_file)
        for fold_folder in predictions_folder.iterdir()
        for pullback_file in (fold_folder / data_set_name.upper()).iterdir()
    }

In [7]:
def get_all_model_predictions(data_set_name):
    return {
        (model_type, model_id): get_model_predictions(model_type, model_id, data_set_name)
        for (model_type, model_id) in BEST_MODELS
    }

In [8]:
def get_smoothed_predictions(data_set_name):
    predictions_folder = BASE_FOLDER / 'new tuning' / 'smoothed' / 'predictions' / data_set_name.lower()
    return {
        pullback_file.stem: get_csv_predictions(pullback_file)
        for pullback_file in predictions_folder.iterdir()
    }

In [9]:
def get_all_real(data_set_name):
    predictions_folder = BASE_FOLDER / 'new tuning' / 'smoothed' / 'predictions' / data_set_name.lower()
    return {
        pullback_file.stem: get_csv_true(pullback_file)
        for pullback_file in predictions_folder.iterdir()
    }

In [10]:
def get_pullback_data(pullback_name, models_data, smoothed_data, real_data):
    return {
        'smoothed': smoothed_data[pullback_name],
        'real': real_data[pullback_name],
        **{
            model: model_data[pullback_name]
            for model, model_data in models_data.items()
        },
    }

In [11]:
def get_all_pullbacks_data(data_set_name):
    models_data = get_all_model_predictions(data_set_name)
    smoothed_data = get_smoothed_predictions(data_set_name)
    real_data = get_all_real(data_set_name)

    return {
        pullback_name: get_pullback_data(pullback_name, models_data, smoothed_data, real_data)
        for pullback_name in real_data.keys()
    }

In [12]:
all_pullbacks_data = get_all_pullbacks_data('val')

In [13]:
def get_style(source):
    if source == ('hp_pr', '57'):
        return {'color': '#1abc9c', 'label': 'PR-57', 'linestyle': 'solid'}
    if source == ('hp_po', '63'):
        return {'color': '#e67e22', 'label': 'PO-63', 'linestyle': 'solid'}
    if source == ('hp_tf', '69'):
        return {'color': '#3498db', 'label': 'TF-69', 'linestyle': 'solid'}
    if source == ('hp_cr', '55'):
        return {'color': '#9b59b6', 'label': 'CR-55', 'linestyle': 'solid'}
    if source == ('hp_sh', '72'):
        return {'color': '#34495e', 'label': 'SH-72', 'linestyle': 'solid'}
    if source == ('hp_ro', '33'):
        return {'color': '#7f8c8d', 'label': 'RO-33', 'linestyle': 'solid'}
    if source == 'real':
        return {'color': '#000000', 'label': 'Real', 'linestyle': 'dashed', 'marker': 'x'}
    if source == 'smoothed':
        return {'color': '#f1c40f', 'label': 'Smoothing', 'linestyle': 'solid'}

In [14]:
def get_pullback_fold_mapping():
    model_type, model_id = ('hp_sh', '72')
    predictions_folder = RESULT_FOLDER / model_type / f'model_{model_id}' / 'predictions'
    return {
        pullback_file.stem: int(fold_folder.stem)
        for fold_folder in predictions_folder.iterdir()
        for pullback_file in (fold_folder / 'VAL').iterdir()
    }

In [15]:
def get_theta(pullback_name):
    fold = get_pullback_fold_mapping()[pullback_name]
    theta_values = [0.398990,0.213131,0.447475,0.245455,0.334343,0.132323,0.342424,0.188889,0.269697,0.237374]
    return theta_values[fold-1]

In [261]:
def generate_rnn_pullback_plot(axis, pullback_name, pullback_data):
    pullback_length = len(pullback_data['real'])
    X = np.arange(0, pullback_length, 1)
    sources = ['real', *BEST_MODELS]

    axis.set_ylim(ymax=1.1, ymin=-0.1)
    axis.hlines(y=0.5, xmin=0, xmax=pullback_length-1, colors='g',linestyle= 'dashed', lw=1, label='0.5')
    axis.margins(x=0.01)
    
    for source in sources:
        data = pullback_data[source]
        style = get_style(source)
        axis.plot(X, data, **style)

In [268]:
def generate_cnn_pullback_plot(axis, pullback_name, pullback_data):
    pullback_length = len(pullback_data['real'])
    X = np.arange(0, pullback_length, 1)
    sources = ['real', 'smoothed']
    theta = get_theta(pullback_name)
    
    axis.set_ylim(ymax=1.1, ymin=-0.1)
    axis.hlines(y=theta, xmin=0, xmax=pullback_length-1, colors='g',linestyle= 'dashed', lw=1, label=rf'$\theta$={theta:.3f}')
    axis.margins(x=0.01)
    
    for source in sources:
        data = pullback_data[source]
        style = get_style(source)
        axis.plot(X, data, **style)

In [269]:
def labels_order(label):
    if label == 'Real':
        return 0
    elif label == 'Smoothing':
        return 1
    elif 'theta' in label:
        return 2
    elif 'PR' in label:
        return 3
    elif 'PO' in label:
        return 4
    elif 'TF' in label:
        return 5
    elif 'CR' in label:
        return 6
    elif 'SH' in label:
        return 7
    elif 'RO' in label:
        return 8
    elif label == '0.5':
        return 9

In [270]:
def extract_handles_and_labels(figure):
    handles_and_labels = [axis.get_legend_handles_labels() for axis in figure.axes]
    handles, labels = [sum(lol, []) for lol in zip(*handles_and_labels)]
    d = OrderedDict(sorted([(label, handle) for handle, label in zip(handles, labels)], key=lambda x: labels_order(x[0])))
    labels = list(d.keys())
    handles = list(d.values())
    return handles, labels

In [328]:
def generate_pullback_plots(pullback_name, pullback_data):
    figure = plt.figure(figsize=(16, 10), dpi=144)
    gridspec = figure.add_gridspec(2, hspace=0)
    (axis_1, axis_2) = gridspec.subplots(sharex=True, sharey=True)
    figure.suptitle(f'Pullback {pullback_name}', y=.91)

    generate_cnn_pullback_plot(axis_1, pullback_name, pullback_data)
    generate_rnn_pullback_plot(axis_2, pullback_name, pullback_data)
    
    handles, labels = extract_handles_and_labels(figure)
    
    figure.legend(handles, labels, loc='upper left', bbox_to_anchor=(0.9, 0.67))
    plt.savefig(RESULT_FOLDER / 'assets' / 'pullback_plots' / f'{pullback_name}.pdf', bbox_inches='tight')
    plt.close()

In [329]:
def generate_all_pullback_plots():
    plt.rc('font', size=15)
    plt.rc('xtick', labelsize=12)
    plt.rc('ytick', labelsize=12)
    
    for pullback_name, pullback_data in all_pullbacks_data.items():
        generate_pullback_plots(pullback_name, pullback_data)

In [330]:
generate_all_pullback_plots()