### This notebook contains the code for comparing the estimation value, time required to obtain it, and expected performance profiles of the resulting stopping methods

In [None]:
import os, sys, json

import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.ticker import FormatStrFormatter, MultipleLocator

%matplotlib inline

In [None]:
ALL_DATASETS = ['midv500', 'midv2019', 'ic15', 'yvt']
ESTIMATION_PLOT_DATASETS = [['midv500', 'midv2019'], ['ic15', 'yvt']]
TIMING_PLOT_DATASETS = [['midv500', 'midv2019'], ['ic15', 'yvt']]

# EPPS_DATASETS = ['midv500', 'midv2019']
EPPS_DATASETS = ['ic15', 'yvt']

EPPS_YLIMITS = {
    'midv500': [0.06, 0.125],
    'midv2019': [0.09, 0.25],
    'ic15': [0.15, 0.35],
    'yvt': [0.195, 0.24]
}

DATASET_LABELS = {
    'midv500': 'MIDV-500',
    'midv2019': 'MIDV-2019',
    'ic15': 'IC15-Train',
    'yvt': 'YVT'
}

METHODS = ['summation', 'treap', 'base']

PRECALC_DIRECTORIES = {}
for dataset in ALL_DATASETS:
    PRECALC_DIRECTORIES[dataset] = {
        'base': './precalc_base_%s' % dataset,
        'summation': './precalc_summation_%s' % dataset,
        'treap': './precalc_treap_%s' % dataset
    }

PLOT_COLOR = { 'base': '0.0', 'summation': '0.5', 'treap': '0.2' }
PLOT_LINESTYLE = { 'base': '-', 'summation': '--', 'treap': ':' }
PLOT_MARKER = { 'base': None, 'summation': 'o', 'treap': None }
PLOT_MARKERSIZE = { 'base': None, 'summation': 6, 'treap': None }
PLOT_LINEWIDTH = { 'base': 1.5, 'summation': 1.5, 'treap': 2.0 }

PLOT_LABEL = {
    'base': 'Base method', 
    'summation': 'Method A',
    'treap': 'Method B'
}

#### Comparing the estimation values

In [None]:
def collect_estimation_datapoints(method, dataset):
    '''
    Collects precalculated values for estimation 
    '''
    precalc_dir = PRECALC_DIRECTORIES[dataset][method]
    SMALL_DELTA = 0.1

    x = [1.0 * (i + 1) for i in range(30)]
    y = [0.0 for i in range(30)]
    
    precalc_files = [os.path.join(precalc_dir, x) for x in os.listdir(precalc_dir)]
    for precalc_file in precalc_files:
        precalc_data = None
        with open(precalc_file) as js:
            precalc_data = json.load(js)
        for i in range(30):
            y[i] += (SMALL_DELTA + precalc_data[i][1]) / (i + 2)
    
    for i in range(30):
        y[i] /= len(precalc_files)
    
    return x, y, len(precalc_files)

In [None]:
plt.rcParams['figure.figsize'] = (8, 4)
plt.rcParams.update({'font.size': 12})

plt.clf()
plt.cla()

for i_plot, plot in enumerate(ESTIMATION_PLOT_DATASETS):
    plt.subplot(100 + 10 * len(ESTIMATION_PLOT_DATASETS) + i_plot + 1)
    plt.title(('%s) ' % chr(ord('a') + i_plot)) + ' and '.join([DATASET_LABELS[dataset] for dataset in plot]))

    X = {'summation': None, 'treap': None, 'base': None}
    Y = {'summation': None, 'treap': None, 'base': None}
    D = {'summation': 0, 'treap': 0, 'base': 0}
    
    for i_dataset, dataset in enumerate(plot):
        for method in METHODS:
            x, y, d = collect_estimation_datapoints(method, dataset)
            X[method] = x
            D[method] += d
            for i in range(len(y)):
                y[i] *= d
            if Y[method] is None:
                Y[method] = y
            else:
                for i in range(len(y)):
                    Y[method][i] += y[i]
                
    for method in METHODS:
        for i in range(len(Y[method])):
            Y[method][i] /= D[method]
    
    plt.gca().xaxis.set_minor_locator(MultipleLocator(1))
    plt.gca().set_xticks([1] + [5 * (i + 1) for i in range(6)])
    plt.gca().yaxis.set_major_locator(MultipleLocator(0.01))
    plt.grid(which = 'minor', alpha = 0.3)
    plt.grid(which = 'major', alpha = 0.6)

    for method in METHODS:
        plt.plot(X[method], Y[method], \
                 label = PLOT_LABEL[method], \
                 color = PLOT_COLOR[method], \
                 linestyle = PLOT_LINESTYLE[method], \
                 marker = PLOT_MARKER[method], \
                 markersize = PLOT_MARKERSIZE[method], \
                 linewidth = PLOT_LINEWIDTH[method])

    plt.xlim([1, 30])
    plt.ylim([0.005, 0.065])

    plt.legend(loc='upper right', prop={'size': 11})

    plt.xlabel(r'Number of processed frame results')
    plt.ylabel('Mean estimation value')

plt.tight_layout(w_pad=1, h_pad=0)
plt.savefig('estimations-composite.pdf', dpi=1200, bbox_inches='tight', pad_inches=0)

#### Comparing the estimation time

In [None]:
def collect_timing_datapoints(method, dataset):
    '''
    Collects timing values for estimation 
    '''
    precalc_dir = PRECALC_DIRECTORIES[dataset][method]
    
    x = [1.0 * (i + 1) for i in range(30)]
    y = [0.0 for i in range(30)]
    
    precalc_files = [os.path.join(precalc_dir, x) for x in os.listdir(precalc_dir)]
    for precalc_file in precalc_files:
        precalc_data = None
        with open(precalc_file) as js:
            precalc_data = json.load(js)
        for i in range(30):
            y[i] += precalc_data[i][2] + precalc_data[i][3]
    
    for i in range(30):
        y[i] /= len(precalc_files)
    
    return x, y, len(precalc_files)

In [None]:
plt.rcParams['figure.figsize'] = (8, 4)
plt.rcParams.update({'font.size': 12})

plt.clf()
plt.cla()

for i_plot, plot in enumerate(TIMING_PLOT_DATASETS):
    plt.subplot(100 + 10 * len(TIMING_PLOT_DATASETS) + i_plot + 1)
    plt.title(('%s) ' % chr(ord('a') + i_plot)) + ' and '.join([DATASET_LABELS[dataset] for dataset in plot]))

    X = {'summation': None, 'treap': None, 'base': None}
    Y = {'summation': None, 'treap': None, 'base': None}
    D = {'summation': 0, 'treap': 0, 'base': 0}
    
    for i_dataset, dataset in enumerate(plot):
        for method in METHODS:
            x, y, d = collect_timing_datapoints(method, dataset)
            X[method] = x
            D[method] += d
            for i in range(len(y)):
                y[i] *= d
            if Y[method] is None:
                Y[method] = y
            else:
                for i in range(len(y)):
                    Y[method][i] += y[i]
    
    for method in METHODS:
        for i in range(len(Y[method])):
            Y[method][i] /= D[method]

    plt.gca().xaxis.set_minor_locator(MultipleLocator(1))
    plt.gca().set_xticks([1] + [5 * (i + 1) for i in range(6)])
    plt.grid(which = 'minor', alpha = 0.3)
    plt.grid(which = 'major', alpha = 0.6)

    for method in METHODS:
        plt.plot(X[method], Y[method], \
                 label = PLOT_LABEL[method], \
                 color = PLOT_COLOR[method], \
                 linestyle = PLOT_LINESTYLE[method], \
                 marker = PLOT_MARKER[method], \
                 markersize = PLOT_MARKERSIZE[method], \
                 linewidth = PLOT_LINEWIDTH[method])

    plt.xlim([1, 30])
    plt.ylim([-0.05, 0.65])

    plt.legend(loc='upper right', prop={'size': 11})

    plt.xlabel(r'Number of processed frame results')
    plt.ylabel('Mean time in sec. per decision')

plt.tight_layout(w_pad=1, h_pad=0)
plt.savefig('timing-composite.pdf', dpi=1200, bbox_inches='tight', pad_inches=0)

In [None]:
# Printing the table
for i_tbl, tbl in enumerate(TIMING_PLOT_DATASETS):
    print('Table %d: ' % (1 + i_tbl) + ' and '.join([DATASET_LABELS[dataset] for dataset in tbl]))

    XS = [5, 10, 15, 20, 25]
    
    X = {'summation': None, 'treap': None, 'base': None}
    Y = {'summation': None, 'treap': None, 'base': None}
    D = {'summation': 0, 'treap': 0, 'base': 0}
    
    for i_dataset, dataset in enumerate(tbl):
        for method in METHODS:
            x, y, d = collect_timing_datapoints(method, dataset)
            X[method] = x
            D[method] += d
            for i in range(len(y)):
                y[i] *= d
            if Y[method] is None:
                Y[method] = y
            else:
                for i in range(len(y)):
                    Y[method][i] += y[i]
    
    print('          ' + ' ; '.join(['n = %02d' % x for x in XS]))
    
    for method in ['base', 'summation', 'treap']:
        for i in range(len(Y[method])):
            Y[method][i] /= D[method]
        ys = []
        for x, y in zip(X[method], Y[method]):
            if x in XS:
                ys.append(y)
        print('% 9s  ' % method + '    '.join(['%.3f' % y for y in ys]))
    
    print('')
    

#### Comparing expected performance profiles

In [None]:
def collect_counting_stopper_epp(dataset):
    '''
    Collects expected performance profile for a simple stopper which 
    stops after a fixed number of processed frames
    '''
    precalc_dir = PRECALC_DIRECTORIES[dataset]['base']
    x = [1.0 * (i + 1) for i in range(30)]
    y = [0.0 for i in range(30)]
    
    precalc_files = [os.path.join(precalc_dir, x) for x in os.listdir(precalc_dir)]
    for precalc_file in precalc_files:
        precalc_data = None
        with open(precalc_file) as js:
            precalc_data = json.load(js)
        for i in range(30):
            y[i] += precalc_data[i][0]
    
    for i in range(30):
        y[i] /= len(precalc_files)
    
    return x, y

def collect_modelling_stopper_epp(method, dataset):
    '''
    Collects expected performance profile for a next combination result 
    modelling stopping method
    '''
    precalc_dir = PRECALC_DIRECTORIES[dataset][method]
    SMALL_DELTA = 0.1
    
    DATAPOINTS_COUNT = 300
    MIN_THRESHOLD = -0.001
    MAX_THRESHOLD = 0.15
    THRESHOLDS = [MIN_THRESHOLD + (MAX_THRESHOLD - MIN_THRESHOLD) * i / (DATAPOINTS_COUNT - 1) \
                  for i in range(DATAPOINTS_COUNT)]
    
    x = []
    y = []
    
    precalc = []
    precalc_files = [os.path.join(precalc_dir, x) for x in os.listdir(precalc_dir)]
    for precalc_file in precalc_files:
        precalc_data = None
        with open(precalc_file) as js:
            precalc_data = json.load(js)
        precalc.append(precalc_data)
    
    for threshold in THRESHOLDS:
        sum_clip_length = 0.0
        sum_error_level = 0.0
        
        for precalc_data in precalc:
            stopped = False
            clip_start = 1 if threshold <= 1.0 else 0
            for i in range(clip_start, 30):
                delta = (SMALL_DELTA + precalc_data[i][1]) / (i + 2)
                if delta <= threshold:
                    sum_clip_length += (i + 1)
                    sum_error_level += precalc_data[i][0]
                    stopped = True
                    break
            if not stopped:
                sum_clip_length += 30
                sum_error_level += precalc_data[-1][0]
                
        x.append(sum_clip_length / len(precalc))
        y.append(sum_error_level / len(precalc))
    
    return x, y

In [None]:
plt.rcParams['figure.figsize'] = (14, 4)
plt.rcParams.update({'font.size': 12})

plt.clf()
plt.cla()

for i_dataset, dataset in enumerate(EPPS_DATASETS):
    plt.subplot(100 + 10 * len(EPPS_DATASETS) + i_dataset + 1)
    plt.title(('%s) ' % chr(ord('a') + i_dataset)) + DATASET_LABELS[dataset])

    plt.gca().xaxis.set_minor_locator(MultipleLocator(0.5))
    plt.gca().xaxis.set_major_locator(MultipleLocator(2))

    c_x, c_y = collect_counting_stopper_epp(dataset)
    plt.plot(c_x, c_y, label=r'Stopping at fixed stage', color='0.5', linestyle='--', linewidth = 1)

    PLOT_LINEWIDTH = { 'base': 2.0, 'summation': 2.0, 'treap': 2.5 }

    for method in METHODS:
        x, y = collect_modelling_stopper_epp(method, dataset)
        plt.plot(x, y, \
                 label = PLOT_LABEL[method], \
                 color = PLOT_COLOR[method], \
                 linestyle = PLOT_LINESTYLE[method], \
                 linewidth = PLOT_LINEWIDTH[method])

    plt.xlim([1, 20])
    plt.ylim(EPPS_YLIMITS[dataset])
    plt.grid()
    plt.legend()

    plt.xlabel(r'Mean number of frames')
    plt.ylabel('Mean error level')
    plt.gca().xaxis.set_major_formatter(FormatStrFormatter('%.2f'))

plt.savefig('epps_%s.pdf' % '_'.join(EPPS_DATASETS), dpi=1200, bbox_inches='tight', pad_inches=0)