In [2]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import sys
import math
sys.path.append('/home/eduardo/PycharmProjects/treemap-analysis/code')

import Parser
import SpatialMetrics
import TemporalMetrics

In [3]:
technique_list = Parser.list_techniques()
dataset_list = Parser.list_datasets()
print(technique_list)
print(dataset_list)

['app', 'git', 'hil', 'moo', 'nac', 'new', 'pbm', 'pbs', 'pbz', 'snd', 'spi', 'sqr', 'str']
['animate.css', 'AudioKit', 'bdb', 'beets', 'brackets', 'caffe', 'calcuta', 'cpython', 'earthdata-search', 'emcee', 'exo', 'exports', 'fsharp', 'gimp', 'hiv', 'hospitalrun-frontend', 'Hystrix', 'iina', 'jenkins', 'Leaflet', 'm-coffee', 'm-names', 'OptiKey', 'osquery', 'PhysicsJS', 'pybuilder', 'scikitlearn', 'shellcheck', 'soundnode-app', 'spacemacs', 'standard', 'uws']


# Visual Metrics

Let $w_k$ and $h_k$ be the weight and height of cell $c_k$, and $a_k$ be the relative weight of item $k$ relative to the complete tree weight.

$Q^{AR}_k = \min(w_k,h_k)/\max(w_k,h_k)$

$Q^{WAR}_k = a_k * Q^{AR}_k$

In [4]:
# If the metrics were already computed, read from file. Else, recompute and save to file.
try:
    ar_df = pd.read_csv('ar.csv', index_col=0)
    war_df = pd.read_csv('war.csv', index_col=0)
    
except:
    ar_df  = pd.DataFrame(columns=dataset_list, index=technique_list) # Aspect ratio dataframe
    war_df = pd.DataFrame(columns=dataset_list, index=technique_list) # Weighted aspect ratio dataframe
    for technique in technique_list:
        for dataset in dataset_list:
            history = Parser.parse_rectangles(technique, dataset)
            for i, df in enumerate(history):
                ar_df[dataset][technique]  = SpatialMetrics.q_ar(df)['q_ar'].mean()
                war_df[dataset][technique] = SpatialMetrics.q_weighted_ar(df)['q_w_ar'].mean()

    ar_df.to_csv('ar.csv')
    ar_df.to_csv('war.csv')    

### How much each technique in $Q^{ar}$ score for each dataset 

In [5]:
ar_df

Unnamed: 0,animate.css,AudioKit,bdb,beets,brackets,caffe,calcuta,cpython,earthdata-search,emcee,...,OptiKey,osquery,PhysicsJS,pybuilder,scikitlearn,shellcheck,soundnode-app,spacemacs,standard,uws
app,0.684565,0.659055,0.445995,0.662962,0.635311,0.690907,0.603092,0.684429,0.666995,0.681847,...,0.562277,0.675816,0.650434,0.710802,0.665772,0.668284,0.687974,0.626794,0.739188,0.70198
git,0.461645,0.454805,0.251037,0.400137,0.392522,0.453855,0.433477,0.317625,0.212654,0.344499,...,0.395375,0.474618,0.363336,0.527232,0.274224,0.484265,0.370244,0.421906,0.413298,0.356951
hil,0.630774,0.435393,0.332571,0.479805,0.303583,0.370615,0.389041,0.336044,0.383148,0.465973,...,0.46293,0.341557,0.332893,0.378496,0.422687,0.256631,0.436462,0.406484,0.413442,0.400631
moo,0.629194,0.368385,0.25587,0.416297,0.334311,0.251678,0.448974,0.40812,0.199319,0.513805,...,0.46293,0.412599,0.368525,0.284382,0.437752,0.227899,0.458906,0.396532,0.359182,0.477957
nac,0.551992,0.398819,0.286815,0.343779,0.319154,0.395018,0.373129,0.360574,0.296851,0.390329,...,0.336626,0.401643,0.423617,0.385819,0.355646,0.387849,0.3895,0.388563,0.315709,0.353041
new,0.667569,0.567579,0.335528,0.530474,0.520653,0.554568,0.561581,0.521457,0.497821,0.50886,...,0.535917,0.552576,0.572394,0.566798,0.528815,0.6068,0.558541,0.495402,0.275742,0.477449
pbm,0.622484,0.550473,0.334689,0.42414,0.495053,0.528365,0.463841,0.488546,0.487231,0.505689,...,0.448904,0.528203,0.507423,0.533881,0.511144,0.328786,0.473934,0.482408,0.367774,0.476632
pbs,0.660418,0.614854,0.33712,0.548278,0.475022,0.59493,0.459847,0.55153,0.541601,0.591333,...,0.562277,0.569044,0.552945,0.574005,0.537025,0.589508,0.561601,0.537996,0.463491,0.548236
pbz,0.67336,0.553062,0.344971,0.51748,0.524025,0.550637,0.527375,0.548549,0.533954,0.513561,...,0.506441,0.548509,0.524762,0.58316,0.532376,0.417262,0.56104,0.48025,0.473946,0.475757
snd,0.518291,0.114074,0.228111,0.10817,0.191458,0.150805,0.209768,0.116347,0.097881,0.361535,...,0.273351,0.214281,0.282249,0.025534,0.223167,0.100708,0.150015,0.209306,0.270424,0.198423


### Count and sort ranks for $Q^{ar}$ and $Q^{war}$

In [6]:
ar_rank_count = pd.DataFrame(0, columns=range(1, len(technique_list) + 1), index=technique_list)

for column in ar_df:
    df = ar_df.sort_values(by=column, ascending=False)[column]
    for rank, tech in enumerate(df.index.values):
        ar_rank_count[rank + 1][tech] += 1

# for column in war_df:
#     df = ar_df.sort_values(by=column, ascending=False)[column]
#     for rank, tech in enumerate(df.index.values):
#         ar_rank_count[rank + 1][tech] += 1
    
ar_rank_count

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13
app,27,3,1,1,0,0,0,0,0,0,0,0,0
git,0,0,0,0,1,0,0,2,9,2,9,7,2
hil,0,0,1,0,0,1,3,6,2,7,6,6,0
moo,0,1,0,2,0,1,1,1,7,9,4,5,1
nac,0,0,0,0,0,0,0,0,3,6,12,11,0
new,0,3,5,6,9,5,0,2,0,0,1,1,0
pbm,0,0,0,0,3,11,9,3,3,3,0,0,0
pbs,0,6,13,7,2,1,2,1,0,0,0,0,0
pbz,0,1,7,11,6,2,3,0,1,1,0,0,0
snd,0,0,0,0,0,0,0,0,0,1,0,2,29


In [7]:
def rank_row(row):
    value = 0
    for rank, n_times in enumerate(row):
        value += (rank + 1) * n_times
    return value
    
ar_rank_count['avg'] = ar_rank_count.apply(lambda row: rank_row(row.values), axis=1)
ar_rank_count.sort_values(by='avg', ascending=True)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,avg
app,27,3,1,1,0,0,0,0,0,0,0,0,0,40
sqr,5,17,3,1,6,0,0,0,0,0,0,0,0,82
pbs,0,6,13,7,2,1,2,1,0,0,0,0,0,117
pbz,0,1,7,11,6,2,3,0,1,1,0,0,0,149
new,0,3,5,6,9,5,0,2,0,0,1,1,0,159
spi,0,0,2,4,4,6,9,3,4,0,0,0,0,201
pbm,0,0,0,0,3,11,9,3,3,3,0,0,0,225
str,0,1,0,0,1,5,5,14,3,3,0,0,0,241
moo,0,1,0,2,0,1,1,1,7,9,4,5,1,301
hil,0,0,1,0,0,1,3,6,2,7,6,6,0,304


# Stability Metrics

Let $c_k(t_i)$ and $c_k(t_j)$ be two cells in two consecutive versions $T(t_i)$ and $T(t_j=t_{i+1})$ for the same node in a dynamic tree. We use for $\delta c_k$ the average sum of distances between the four closest corresponding corners of $c_k(t_i)$ and $c_k(t_j)$, normalized by the treemap diagonal $\sqrt{W^2+H^2}$, so $\delta \in [0,1]$.

We next define the \emph{data change} between nodes $n_k(t_i)$ and $n_k(t_j)$ as $\delta a_k = |a_k(t_i)-a_k(t_j)|$, where $a_k$ is the data attribute of $n_k$. If either of $n_k(t_i)$ or $n_k(t_j)$ does not exist, \emph{i.e.}, a node was created or deleted in versions $t_i$ or $t_j$, we set the respective attribute to zero. We normalize $\delta a_k$ by the maximal $a_k(t_i)$ for all nodes $n_k$ in all versions $t_j$ in a dataset, so $\delta a_k \in [0,1]$.


$Q^{RATIO}_k =  (1-\delta c_k) / (1 - \delta a_k)$

$Q^{WRATIO}_k =  a_k * Q^{RATIO}_k$


In [51]:
try:
    ratio_df = pd.read_csv('ratio_g.csv', index_col=0)
    wratio_df = pd.read_csv('wratio_g.csv', index_col=0)
    
except:
    ratio_df  = pd.DataFrame(columns=dataset_list, index=technique_list) # Ratio dataframe
    wratio_df = pd.DataFrame(columns=dataset_list, index=technique_list) # Weighted ratio dataframe
    p_df  = pd.DataFrame(columns=dataset_list, index=technique_list)     # Pearson correlation dataframe
    wp_df = pd.DataFrame(columns=dataset_list, index=technique_list)     # Weighted Pearson correlation dataframe
    
    for technique in technique_list:
        for dataset in dataset_list:
            history = Parser.parse_rectangles(technique, dataset)
            for i in range(1, len(history)):
                df = TemporalMetrics.delta_vis(history[i - 1], history[i])
                df = pd.merge(df, TemporalMetrics.delta_data_by_area(history[i - 1], history[i]))
                df = pd.merge(df, TemporalMetrics.relative_weight(history[i - 1], history[i]))

                # ratio metric
                ratio = TemporalMetrics.q_ratio(df)['q_ratio'].mean()
                if (ratio != 1.0):
                    ratio_df[dataset][technique]  = ratio

                ratio = TemporalMetrics.q_weighted_ratio(df)['q_w_ratio'].mean()
                if (ratio != 1.0):
                    wratio_df[dataset][technique]  = ratio

    ratio_df.to_csv('ratio_g.csv', float_format='%g')
    wratio_df.to_csv('wratio_g.csv', float_format='%g')
    
#             # pearson correlation
#             p_df[dataset][technique]  = max(TemporalMetrics.pearson(df)[0], 0)
#             wp_df[dataset][technique] = max(TemporalMetrics.weighted_pearson(df), 0)

#     ratio_df.to_csv('ratio.csv')
#     wratio_df.to_csv('wratio.csv')
#     p_df.to_csv('p.csv')
#     wp_df.to_csv('wp.csv')
    
#     ratio_df.to_csv('ratio_12.csv', float_format='%.12f')
#     wratio_df.to_csv('wratio_12.csv', float_format='%.12f')
#     p_df.to_csv('p_12.csv', float_format='%.12f')
#     wp_df.to_csv('wp_12.csv', float_format='%.12f')
    
#     ratio_df.to_csv('ratio_g.csv', float_format='%g')
#     wratio_df.to_csv('wratio_g.csv', float_format='%g')
#     p_df.to_csv('p_g.csv', float_format='%g')
#     wp_df.to_csv('wp_g.csv', float_format='%g')

In [57]:
ratio_df.to_csv('ratio_g.csv', float_format='%g')
wratio_df.to_csv('wratio_g.csv', float_format='%g')


In [63]:
wratio_df

Unnamed: 0,animate.css,AudioKit,bdb,beets,brackets,caffe,calcuta,cpython,earthdata-search,emcee,...,OptiKey,osquery,PhysicsJS,pybuilder,scikitlearn,shellcheck,soundnode-app,spacemacs,standard,uws
app,0.0118322,0.00137719,0.0136974,0.00577195,0.00342466,0.00242316,0.0107613,0.000403112,0.00166903,0.04,...,0.166667,0.00175316,0.00787402,0.0769231,0.00184668,0.0411322,0.0106383,0.00246826,0.125,0.0162587
git,0.0128966,0.0013696,0.0135127,0.00568061,0.000716846,0.00229706,0.0111766,0.000397631,0.00159968,0.0277778,...,0.00273089,0.00173777,0.0078125,0.00740685,0.00131774,0.0399734,0.0106383,0.00255793,0.125,0.0163846
hil,0.0122659,0.00137718,0.0136979,0.00577889,0.00342466,0.00242834,0.0108842,0.000402648,0.00166938,0.04,...,0.166667,0.00175312,0.00787402,0.0769231,0.00185032,0.041677,0.0106383,0.00251,0.125,0.0166593
moo,0.0122591,0.00137719,0.013698,0.00577877,0.00342466,0.00242636,0.0111028,0.000402889,0.0016692,0.04,...,0.166667,0.00175282,0.00787402,0.0769231,0.0018504,0.0416796,0.0106383,0.00250093,0.125,0.0166562
nac,0.0129265,0.00136969,0.0135128,0.00568023,0.000718391,0.00229677,0.0109459,0.000399132,0.00159978,0.0277778,...,0.00273013,0.00174414,0.0078125,0.00740681,0.00131853,0.0400122,0.0106383,0.00255176,0.125,0.0163816
new,0.0127775,0.00136961,0.0135128,0.00567966,0.000718391,0.00229575,0.0109745,0.000398023,0.00160004,0.0277778,...,0.0027087,0.00174385,0.0078125,0.00740682,0.00131832,0.0400113,0.0106383,0.00238933,0.125,0.0163872
pbm,0.0128401,0.000916672,0.0135128,0.00568128,0.000718391,0.00229698,0.011199,0.000399141,0.00159945,0.0277778,...,0.00271991,0.00174471,0.0078125,0.0074072,0.00131075,0.0399911,0.0106383,0.002527,0.125,0.016395
pbs,0.0122437,0.00137716,0.0136979,0.00575669,0.00342466,0.00242275,0.011163,0.000400393,0.00166827,0.04,...,0.166667,0.00175166,0.00787402,0.0769231,0.00185061,0.0416573,0.0106383,0.00253021,0.125,0.0164211
pbz,0.01271,0.00136961,0.0135128,0.00568071,0.000718391,0.00229468,0.0107906,0.000398356,0.0015991,0.0277778,...,0.00273314,0.00174379,0.0078125,0.00740682,0.00131828,0.040002,0.0106383,0.0025186,0.125,0.016308
snd,0.0129152,0.00136956,0.0135129,0.00568079,0.000718391,0.00229737,0.0112502,0.000399123,0.00160026,0.0277778,...,0.00273922,0.00174395,0.0078125,0.00740715,0.00131856,0.0400017,0.0106383,0.00254822,0.125,0.0163866


In [67]:
stab_rank_count = pd.DataFrame(0, columns=range(1, len(technique_list) + 1), index=technique_list)

for column in ratio_df:
    df = ratio_df.sort_values(by=column, ascending=False)[column]
    for rank, tech in enumerate(df.index.values):
        stab_rank_count[rank + 1][tech] += 1

# for column in wratio_df:
#     df = wratio_df.sort_values(by=column, ascending=False)[column]
#     for rank, tech in enumerate(df.index.values):
#         stab_rank_count[rank + 1][tech] += 1
        
# for column in p_df:
#     df = p_df.sort_values(by=column, ascending=False)[column]
#     for rank, tech in enumerate(df.index.values):
#         stab_rank_count[rank + 1][tech] += 1

# for column in wp_df:
#     df = wp_df.sort_values(by=column, ascending=False)[column]
#     for rank, tech in enumerate(df.index.values):
#         stab_rank_count[rank + 1][tech] += 1
    
stab_rank_count

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13
app,2,2,1,3,0,1,0,2,1,3,4,9,4
git,5,7,6,3,4,1,3,0,2,0,1,0,0
hil,2,1,0,3,3,1,3,6,2,4,3,3,1
moo,1,2,1,1,1,5,7,3,1,5,3,1,1
nac,2,7,4,0,6,4,2,3,0,3,1,0,0
new,2,2,1,5,2,3,1,2,9,3,1,0,1
pbm,3,3,4,1,3,3,3,4,0,4,1,1,2
pbs,2,0,1,5,4,3,1,1,3,0,5,3,4
pbz,0,1,1,1,2,2,4,1,8,1,5,2,4
snd,9,6,5,3,2,1,1,1,0,1,1,1,1


In [68]:
stab_rank_count['avg'] = stab_rank_count.apply(lambda row: rank_row(row.values), axis=1)
stab_rank_count.sort_values(by='avg', ascending=True)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,avg
git,5,7,6,3,4,1,3,0,2,0,1,0,0,125
snd,9,6,5,3,2,1,1,1,0,1,1,1,1,125
nac,2,7,4,0,6,4,2,3,0,3,1,0,0,161
pbm,3,3,4,1,3,3,3,4,0,4,1,1,2,200
new,2,2,1,5,2,3,1,2,9,3,1,0,1,215
moo,1,2,1,1,1,5,7,3,1,5,3,1,1,237
hil,2,1,0,3,3,1,3,6,2,4,3,3,1,246
pbs,2,0,1,5,4,3,1,1,3,0,5,3,4,248
spi,1,0,4,2,1,5,0,4,1,3,3,5,3,259
str,2,0,2,1,4,1,3,3,3,3,2,4,4,262


In [70]:
stab_rank_count = pd.DataFrame(0, columns=range(1, len(technique_list) + 1), index=technique_list)

# for column in ratio_df:
#     df = ratio_df.sort_values(by=column, ascending=False)[column]
#     for rank, tech in enumerate(df.index.values):
#         stab_rank_count[rank + 1][tech] += 1

for column in wratio_df:
    df = wratio_df.sort_values(by=column, ascending=False)[column]
    for rank, tech in enumerate(df.index.values):
        stab_rank_count[rank + 1][tech] += 1
        
# for column in p_df:
#     df = p_df.sort_values(by=column, ascending=False)[column]
#     for rank, tech in enumerate(df.index.values):
#         stab_rank_count[rank + 1][tech] += 1

# for column in wp_df:
#     df = wp_df.sort_values(by=column, ascending=False)[column]
#     for rank, tech in enumerate(df.index.values):
#         stab_rank_count[rank + 1][tech] += 1
    
stab_rank_count['avg'] = stab_rank_count.apply(lambda row: rank_row(row.values), axis=1)
stab_rank_count.sort_values(by='avg', ascending=True)

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,11,12,13,avg
hil,7,8,5,1,2,0,2,4,1,1,0,0,1,130
moo,4,7,5,5,1,4,1,2,3,0,0,0,0,132
pbs,4,2,6,10,1,2,1,0,3,2,1,0,0,148
snd,4,2,2,3,4,4,7,1,1,2,1,1,0,179
app,7,2,6,5,0,0,1,1,1,1,2,2,4,181
nac,1,4,3,0,5,4,1,5,2,2,3,1,1,210
pbm,1,1,1,2,7,5,3,0,0,5,2,1,4,236
git,2,3,3,2,3,1,0,3,1,3,2,4,5,244
new,0,1,0,1,1,3,7,3,5,6,2,2,1,266
spi,2,0,1,1,4,2,0,3,4,3,5,4,3,273


### The APP technique looks suspicious. It ranked first 36 times!! 
### Even though I don't know much about it, I believe it is a very unstable technique. Maybe there is an implementation issue.
### Lets see for which datasets it ranks high.

In [62]:
for column in ratio_df:
    df = ratio_df.sort_values(by=column, ascending=False)[column]
    for rank, tech in enumerate(df.index.values):
        if rank == 0 and tech == 'app':
            print(df)
            print()

app    0.998354
git     0.99769
nac    0.997282
hil    0.997236
pbs    0.996703
sqr    0.996615
moo    0.996612
spi    0.996543
pbz    0.996472
pbm    0.996465
new    0.996027
snd    0.993764
str    0.977137
Name: iina, dtype: object

app    0.998829
snd     0.99805
git    0.997347
sqr    0.993673
pbm     0.99238
spi    0.992106
str    0.990784
new    0.988418
pbz    0.988253
nac    0.988002
pbs    0.986101
moo    0.980023
hil    0.977563
Name: Leaflet, dtype: object



### It looks like the metric precision is not enough, whenever values get rounded to 1, it uses alphabetical order to rank indexes, putting APP at the top. I'll try to fix it next. 

In [30]:
ratio_df  = pd.DataFrame(columns=dataset_list, index=technique_list) # Ratio dataframe

for technique in technique_list:
    history = Parser.parse_rectangles(technique, 'brackets')
    for i in range(1, len(history)):
        df = TemporalMetrics.delta_vis(history[i - 1], history[i])
        df = pd.merge(df, TemporalMetrics.delta_data_by_area(history[i - 1], history[i]))

        # ratio metric
        ratio = TemporalMetrics.q_ratio(df)['q_ratio'].mean()
        if (ratio != 1.0):
            ratio_df[dataset][technique]  = ratio

In [31]:
ratio_df

Unnamed: 0,animate.css,AudioKit,bdb,beets,brackets,caffe,calcuta,cpython,earthdata-search,emcee,...,OptiKey,osquery,PhysicsJS,pybuilder,scikitlearn,shellcheck,soundnode-app,spacemacs,standard,uws
app,,,,0.999836,,,,,,,...,,,,,,,,,,
git,,,,0.998272,,,,,,,...,,,,,,,,,,
hil,,,,0.999853,,,,,,,...,,,,,,,,,,
moo,,,,0.999842,,,,,,,...,,,,,,,,,,
nac,,,,0.993026,,,,,,,...,,,,,,,,,,
new,,,,0.99153,,,,,,,...,,,,,,,,,,
pbm,,,,0.997632,,,,,,,...,,,,,,,,,,
pbs,,,,0.99987,,,,,,,...,,,,,,,,,,
pbz,,,,0.900904,,,,,,,...,,,,,,,,,,
snd,,,,0.998025,,,,,,,...,,,,,,,,,,


In [35]:
ratio_df.sort_values(by='beets', ascending=True)

Unnamed: 0,animate.css,AudioKit,bdb,beets,brackets,caffe,calcuta,cpython,earthdata-search,emcee,...,OptiKey,osquery,PhysicsJS,pybuilder,scikitlearn,shellcheck,soundnode-app,spacemacs,standard,uws
pbz,,,,0.900904,,,,,,,...,,,,,,,,,,
spi,,,,0.98083,,,,,,,...,,,,,,,,,,
str,,,,0.990526,,,,,,,...,,,,,,,,,,
sqr,,,,0.990903,,,,,,,...,,,,,,,,,,
new,,,,0.99153,,,,,,,...,,,,,,,,,,
nac,,,,0.993026,,,,,,,...,,,,,,,,,,
pbm,,,,0.997632,,,,,,,...,,,,,,,,,,
snd,,,,0.998025,,,,,,,...,,,,,,,,,,
git,,,,0.998272,,,,,,,...,,,,,,,,,,
app,,,,0.999836,,,,,,,...,,,,,,,,,,


In [53]:
import os
duration = .1  # second
freq = 440  # Hz
os.system('play --no-show-progress --null --channels 1 synth %s sine %f' % (duration, freq))

0