In [1]:
import matplotlib
%matplotlib notebook
import matplotlib.pyplot as plt
import numpy as np
import os
import json

In [2]:
def visualize_case(files, title, show_methods=None, name_map={}, sizes=None):
    plt.figure(figsize=(8,6))

    datas = []
    if sizes is None:
        sizes = [2**(3+i) for i in range(len(files))]

    for ns in sizes:
        found = None
        for path in files:
            if 'train_cnt{}'.format(ns) in path:
                found = path
        with open(found, 'r') as f:
            data = json.load(f)
            datas.append(data)

    keys = sorted(datas[0].keys())
    legend = []
    
    if show_methods is None:
        show_mehtods = keys

    for k in show_methods:
        mas = [data[k]['mean'] if 'mean' in data[k] else data[k]['test_score'] for data in datas]
        legend.append(k)
        plt.plot(mas)
    
    legend = [name_map[x] if x in name_map else x for x in legend]
    plt.legend(legend)
    plt.xticks(range(len(sizes)), sizes)
    plt.title(title)
    plt.xlabel('number of samples')
    plt.ylabel('average negative log-likelihood')
    plt.show()

In [3]:
def visualize_case_barplot(files, title, show_methods=None, name_map={}, sizes=None, yrange=None):
    plt.figure(figsize=(8,6))

    datas = []
    if sizes is None:
        sizes = [2**(3+i) for i in range(len(files))]

    for ns in sizes:
        found = None
        for path in files:
            if 'train_cnt{}'.format(ns) in path:
                found = path
        with open(found, 'r') as f:
            data = json.load(f)
            datas.append(data)

    keys = sorted(datas[0].keys())
    legend = []
    
    if show_methods is None:
        show_mehtods = keys
        
    
    width = 0.2
    shift = len(show_methods) / 2.0 * width
    block_len = len(show_methods) * width
    begin_xs = np.arange(0, (len(sizes) - 0.5) * (block_len + shift), block_len + shift)

    for index, k in enumerate(show_methods):
        curx = begin_xs + index * width
        mas = []
        for data in datas:
            if k not in data:
                mas.append(np.inf)
            elif 'mean' in data[k]:
                x = data[k]['mean']
                if x > 1e4:
                    x = np.inf
                mas.append(x)
            else:
                x = data[k]['test_score']
                if x > 1e4:
                    x = np.inf
                mas.append(x)
        plt.bar(curx, mas, width=width, align='center')
        legend.append(k)
    
    if yrange is not None:
        plt.ylim(ymin=yrange[0], ymax=yrange[1])
    legend = [name_map[x] if x in name_map else x for x in legend]
    plt.legend(legend)
    plt.xticks(begin_xs + len(show_methods) / 2.0 * width, sizes)
    plt.title(title)
    plt.xlabel('number of samples')
    plt.ylabel('average negative log-likelihood')
    plt.show()

#     plt.ylim(0.9*data['Ground Truth']['mean'])
#     plt.xticks(range(len(keys)), keys, rotation=90)
#     for i, v in enumerate(values):
#         plt.text(i - 0.4, v+1, '{:.2f}'.format(v))        

# List result files

In [4]:
syn_sudden_buckets_A1 = [
    'results/syn_nglf_buckets.nt10.m8.bs16.train_cnt16.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets.nt10.m8.bs16.train_cnt32.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets.nt10.m8.bs16.train_cnt8.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets.nt10.m8.bs16.train_cnt64.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets.nt10.m8.bs16.train_cnt128.val_cnt16.test_cnt100.snr5.00.results.json'
]

syn_sudden_buckets_A2 = [
    'results/syn_nglf_buckets.nt10.m32.bs4.train_cnt16.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets.nt10.m32.bs4.train_cnt32.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets.nt10.m32.bs4.train_cnt8.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets.nt10.m32.bs4.train_cnt64.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets.nt10.m32.bs4.train_cnt128.val_cnt16.test_cnt100.snr5.00.results.json'
]

syn_sudden_buckets_A3 = [
    'results/syn_nglf_buckets.nt10.m4.bs32.train_cnt16.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets.nt10.m4.bs32.train_cnt8.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets.nt10.m4.bs32.train_cnt32.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets.nt10.m4.bs32.train_cnt64.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets.nt10.m4.bs32.train_cnt128.val_cnt16.test_cnt100.snr5.00.results.json'
]

In [5]:
syn_smooth_timeseries_A1 = [
    'results/syn_nglf_ts.nt30.m8.bs16.train_cnt16.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_ts.nt30.m8.bs16.train_cnt32.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_ts.nt30.m8.bs16.train_cnt8.val_cnt16.test_cnt100.snr5.00.results.json'
]

syn_smooth_timeseries_A2 = [
    'results/syn_nglf_ts.nt30.m32.bs4.train_cnt16.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_ts.nt30.m32.bs4.train_cnt32.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_ts.nt30.m32.bs4.train_cnt8.val_cnt16.test_cnt100.snr5.00.results.json'
]

In [6]:
syn_smooth_buckets_A1 = [
    'results/syn_nglf_buckets_smooth.nt10.m8.bs16.train_cnt16.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets_smooth.nt10.m8.bs16.train_cnt32.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets_smooth.nt10.m8.bs16.train_cnt8.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets_smooth.nt10.m8.bs16.train_cnt64.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets_smooth.nt10.m8.bs16.train_cnt128.val_cnt16.test_cnt100.snr5.00.results.json'
]

syn_smooth_buckets_A2 = [
    'results/syn_nglf_buckets_smooth.nt10.m32.bs4.train_cnt16.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets_smooth.nt10.m32.bs4.train_cnt32.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets_smooth.nt10.m32.bs4.train_cnt8.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets_smooth.nt10.m32.bs4.train_cnt64.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets_smooth.nt10.m32.bs4.train_cnt128.val_cnt16.test_cnt100.snr5.00.results.json'
]

syn_smooth_buckets_A3 = [
    'results/syn_nglf_buckets_smooth.nt10.m4.bs32.train_cnt16.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets_smooth.nt10.m4.bs32.train_cnt8.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets_smooth.nt10.m4.bs32.train_cnt32.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets_smooth.nt10.m4.bs32.train_cnt64.val_cnt16.test_cnt100.snr5.00.results.json',
    'results/syn_nglf_buckets_smooth.nt10.m4.bs32.train_cnt128.val_cnt16.test_cnt100.snr5.00.results.json'
]

In [7]:
stock_day_64 = [
    'results/stock_day.nt20.nv64.train_cnt3.val_cnt1.test_cnt1.results.json',
    'results/stock_day.nt20.nv64.train_cnt6.val_cnt2.test_cnt2.results.json',
    'results/stock_day.nt20.nv64.train_cnt12.val_cnt4.test_cnt4.results.json',
    'results/stock_day.nt20.nv64.train_cnt24.val_cnt8.test_cnt8.results.json',
    'results/stock_day.nt20.nv64.train_cnt48.val_cnt16.test_cnt16.results.json'
]

stock_day_128 = [
    'results/stock_day.nt20.nv128.train_cnt3.val_cnt1.test_cnt1.results.json',
    'results/stock_day.nt20.nv128.train_cnt6.val_cnt2.test_cnt2.results.json',
    'results/stock_day.nt20.nv128.train_cnt12.val_cnt4.test_cnt4.results.json',
    'results/stock_day.nt20.nv128.train_cnt24.val_cnt8.test_cnt8.results.json',
    'results/stock_day.nt20.nv128.train_cnt48.val_cnt16.test_cnt16.results.json'
]

In [8]:
stock_week_64 = [
    'results/stock_day.nt20.nv64.train_cnt3.val_cnt1.test_cnt1.results.json',
    'results/stock_day.nt20.nv64.train_cnt6.val_cnt2.test_cnt2.results.json',
    'results/stock_day.nt20.nv64.train_cnt12.val_cnt4.test_cnt4.results.json',
    'results/stock_day.nt20.nv64.train_cnt24.val_cnt8.test_cnt8.results.json',
]

stock_week_128 = [
    'results/stock_day.nt20.nv128.train_cnt3.val_cnt1.test_cnt1.results.json',
    'results/stock_day.nt20.nv128.train_cnt6.val_cnt2.test_cnt2.results.json',
    'results/stock_day.nt20.nv128.train_cnt12.val_cnt4.test_cnt4.results.json',
    'results/stock_day.nt20.nv128.train_cnt24.val_cnt8.test_cnt8.results.json',
]

# Visualizations

In [8]:
show_methods = [
    'Ground Truth',
    'T-GLASSO (no reg)',
#     'T-GLASSO',
    'T-GLASSO (more iters)',
    'Linear CorEx (applied bucket-wise)',
#     'Linear CorEx (applied on whole data)',
    'T-Corex (W)',
#     'T-Corex + priors (W, method 1)',
#     'T-Corex + priors (W, method 2)',
#     'T-Corex + priors (W, method 2, weighted samples)',
    'T-Corex (W, weighted samples)',
#     'T-Corex (W, weighted samples, no init)',
#     'Ledoit-Wolf',
#     'Factor Analysis',
#     'T-Corex (WWT)',
#     'T-Corex (MI)',
#     'T-Corex (Sigma)',
#     'Oracle approximating shrinkage',
#     'Diagonal',
#     'Graphical LASSO (sklearn)',
#     'PCA',
#     'T-GLASSO (more iters, no reg)'
]

name_map = {
    'T-GLASSO (more iters)': 'T-GLASSO'
}

visualize_case(syn_sudden_buckets_A1, 'm=8, bs=16, buckets, sudden change', show_methods, name_map)
visualize_case(syn_sudden_buckets_A2, 'm=32, bs=4, buckets, sudden change', show_methods, name_map)
visualize_case(syn_sudden_buckets_A3, 'm=4, bs=32, buckets, sudden change', show_methods, name_map)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [9]:
show_methods=[
    'Ground Truth',
    'Linear CorEx (applied bucket-wise)',
#     'Linear CorEx (applied on whole data)',
    'T-GLASSO (no reg)',
    'T-GLASSO (L2)',
    'T-GLASSO (L1)',
    'T-Corex (W, L2)',
    'T-Corex (W, L1)',
    'T-Corex (W, L2, weighted samples)',
    'T-Corex (W, L1, weighted samples)',
#     'PCA',
#     'Graphical LASSO (sklearn)',
#     'Diagonal',
#     'Oracle approximating shrinkage',
#     'Factor Analysis',
#     'Ledoit-Wolf'
]

visualize_case(syn_smooth_buckets_A1, 'm=8, bs=16, buckets, smooth change', show_methods)
visualize_case(syn_smooth_buckets_A2, 'm=32, bs=4, buckets, smooth change', show_methods)
visualize_case(syn_smooth_buckets_A3, 'm=4, bs=32, buckets, smooth change', show_methods)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [12]:
sizes = [3, 6, 12, 24, 48]

show_methods = ['Ledoit-Wolf',
#                 'Factor Analysis',
                'T-Corex (W, weighted samples)',
#                 'T-Corex (W, weighted samples, no reg)',
#                 'T-Corex (W)',
#                 'Diagonal',
                'Oracle approximating shrinkage',
#                 'Linear CorEx (applied on buckets)',
                'Linear CorEx (applied on whole data)',
                'Graphical LASSO (sklearn)',
                'T-GLASSO (no reg)',
                'PCA',
                'T-GLASSO']

visualize_case_barplot(stock_day_64, 'daily stock returns, nv=64', show_methods, {}, sizes, yrange=(0,200))
visualize_case_barplot(stock_day_128, 'daily stock returns, nv=128', show_methods, {}, sizes, yrange=(0,250))

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [9]:
sizes = [3, 6, 12, 24, 48]

show_methods = ['Ledoit-Wolf',
#                 'Factor Analysis',
                'T-Corex (W, weighted samples)',
#                 'T-Corex (W, weighted samples, no reg)',
#                 'T-Corex (W)',
#                 'Diagonal',
                'Oracle approximating shrinkage',
#                 'Linear CorEx (applied on buckets)',
                'Linear CorEx (applied on whole data)',
                'Graphical LASSO (sklearn)',
                'T-GLASSO (no reg)',
                'PCA',
                'T-GLASSO']

visualize_case_barplot(stock_week_64, 'weekly stock returns, nv=64', show_methods, {}, sizes[1:-1], yrange=None)
visualize_case_barplot(stock_day_128, 'weekly stock returns, nv=128', show_methods, {}, sizes, yrange=None)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

#### Barplots

In [3]:
def bar_plot_baselines(jsonfile, title):
    with open(jsonfile) as f:
        data = json.load(f)
    keys = data.keys()
    keys = sorted(keys)
    plt.figure(figsize=(9,7))
    values = [data[k]['mean'] for k in keys]
    values = [0 if np.isinf(x) else x for x in values]
    plt.bar(range(len(keys)), values, width=0.6)
    plt.ylim(0.9*data['Ground Truth']['mean'])
    plt.xticks(range(len(keys)), keys, rotation=90)
    for i, v in enumerate(values):
        plt.text(i - 0.4, v+1, '{:.2f}'.format(v))
    plt.tight_layout()
    plt.title(title)
    plt.subplots_adjust(top=0.9)
    plt.show()
        

In [None]:
bar_plot_baselines(A1_C3[0], "m=8,bs=16 time-series")

In [39]:
bar_plot_baselines(A2_C3[1], "m=32,bs=4 time-series")

<IPython.core.display.Javascript object>

In [46]:
A1_C1_change_structure = [
    'results/change_structure.syn_nglf_buckets.nt10.m8.bs16.train_cnt16.val_cnt16.test_cnt100.min_cor0.60.max_cor1.00.results.json',
    'results/change_structure.syn_nglf_buckets.nt10.m8.bs16.train_cnt32.val_cnt16.test_cnt100.min_cor0.60.max_cor1.00.results.json',
    'results/change_structure.syn_nglf_buckets.nt10.m8.bs16.train_cnt8.val_cnt16.test_cnt100.min_cor0.60.max_cor1.00.results.json'
]

In [47]:
visualize_case(A1_C1_change_structure, 'm=8, bs=16 buckets (without fixing SNR)')

<IPython.core.display.Javascript object>

# draft

In [64]:
bar_plot_baselines('results/syn_nglf_buckets.nt10.m8.bs16.train_cnt8.val_cnt16.test_cnt100.snr5.00.results.json',
                   'm=8,bs=16 buckets')

<IPython.core.display.Javascript object>