In [1]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import os
import pandas as pd
import pickle
import re
import seaborn as sns

plt.style.use('./stylefiles/plot.mplstyle')
mpl.rcParams['xtick.labelsize'] = 10

In [2]:
PATH = './results/sparse/'

In [3]:
# List the files
results_folders = os.listdir(PATH)

# Load the data
list_results = []
for folder in results_folders:
    if folder[0] == '.':
        continue
    folder_split = re.split("_", folder)
    N = int(folder_split[0][1:])
    M = folder_split[1][1:]
    eps = float(folder_split[2][3:])
    
    files = os.listdir(PATH + folder)
    list_times = len(files) * [None]
    for idx, file in enumerate(files):
        with open(PATH + folder + "/" + file, "rb") as f:
            data = pickle.load(f)
        times = {
            'ratiocov': data['time_covariance'] / data['time_gram'],
            'ratiopsplines': data['time_psplines'] / data['time_gram']
        }
        #times = {k: v for k, v in data.items() if k.startswith('time')}
        times = pd.DataFrame.from_records([times])
        times.insert(0, "eps", eps)
        times.insert(0, "N", N)
        times.insert(0, "M", M)
        list_times[idx] = times
    list_results.append(pd.concat(list_times))
results = pd.concat(list_results)

In [4]:
results

Unnamed: 0,M,N,eps,ratiocov,ratiopsplines
0,101-51-201,250,0.025,0.782651,0.480174
0,101-51-201,250,0.025,1.331477,0.774502
0,101-51-201,250,0.025,1.429316,0.818271
0,101-51-201,250,0.025,1.369570,0.781608
0,101-51-201,250,0.025,1.427246,0.851948
...,...,...,...,...,...
0,101-51-201,250,0.100,1.229118,0.852806
0,101-51-201,250,0.100,1.356059,1.102426
0,101-51-201,250,0.100,1.340118,0.965552
0,101-51-201,250,0.100,1.153282,0.841252


In [5]:
SORT_VALUES = ['11-11-21', '26-26-51', '101-51-201']

results_pp = pd.melt(
    results,
    id_vars=['M', 'N', 'eps'],
    value_vars=['ratiocov', 'ratiopsplines']
)
results_pp.M = results_pp.M.astype("category")
results_pp.M = results_pp.M.cat.set_categories(SORT_VALUES)
results_pp = results_pp.sort_values(by=['N', 'M', 'eps'])

In [6]:
results_pp

Unnamed: 0,M,N,eps,variable,value
0,101-51-201,250,0.025,ratiocov,0.782651
1,101-51-201,250,0.025,ratiocov,1.331477
2,101-51-201,250,0.025,ratiocov,1.429316
3,101-51-201,250,0.025,ratiocov,1.369570
4,101-51-201,250,0.025,ratiocov,1.427246
...,...,...,...,...,...
795,101-51-201,250,0.100,ratiopsplines,0.852806
796,101-51-201,250,0.100,ratiopsplines,1.102426
797,101-51-201,250,0.100,ratiopsplines,0.965552
798,101-51-201,250,0.100,ratiopsplines,0.841252


In [7]:
gg = sns.catplot(
    data=results_pp,
    x="value", y="variable", col="eps",
    kind="violin",
    fill=False,
    color="#111111",
    height=2,
    aspect=2
)
#gg.set_titles(template="$N = {col_name} \:|\: M = {row_name}$", size=12)
gg.set(xlim=(10e-2, 30))
gg.set_xlabels("Ratio of computation time (log scale)", fontsize=10)
gg.set_ylabels("")
gg.set_yticklabels(["(Tensor) PCA / Gram", "2D/1D B-Splines / Gram"], size=10)
for ax in gg.axes.flat:
    ax.axvline(x=1, color='r', lw=1, ls='--')
    ax.set_xscale("log")
gg.axes.flat[0].set_title("Sparsity: high", size=12)
gg.axes.flat[1].set_title("Sparsity: medium", size=12)
gg.fig.tight_layout()

plt.savefig(
    f'computation_time_sparse.eps',
    format='eps',
    bbox_inches='tight'
)
plt.close()