In [1]:
import os
import json
import pandas as pd
import altair as alt
from collections import defaultdict

In [18]:
## Cumulative Chart
# load input data
max_data = defaultdict(lambda: {"recall": 0.0, "precision": 0.0, "f1-score": 0.0})

experiment_folder = '../output_models/cross_subject-batch_1_lr_00005_rs_2021_03_09-16_05_14'
with open(os.path.join(experiment_folder, 'metrics.json'), 'r') as mf:
    for line in mf:
        data = json.loads(line.rstrip('\n'))['thresholds']
        for th, values in data.items():
            max_data[th]['recall'] = max(max_data[th]['recall'], round(values['recall'], 4))
            max_data[th]['precision'] = max(max_data[th]['precision'], round(values['precision'], 4))
            max_data[th]['f1-score'] = max(max_data[th]['f1-score'], round(values['f1-score'], 4))

In [55]:
# generate dataframe
type_column = ['recall'] * 9 + ['precision'] * 9 + ['f1-score'] * 9
th_column = list(max_data.keys()) * 3

value_column = [v['recall'] for _, v in max_data.items()]
value_column += [v['precision'] for _, v in max_data.items()]
value_column += [v['f1-score'] for _, v in max_data.items()]

th_df = pd.DataFrame({'metric': type_column, 'threshold': th_column, 'value': value_column})
th_df

Unnamed: 0,metric,threshold,value
0,recall,0.1,0.2156
1,recall,0.2,0.1626
2,recall,0.3,0.1367
3,recall,0.4,0.1176
4,recall,0.5,0.103
5,recall,0.6,0.09
6,recall,0.7,0.0769
7,recall,0.8,0.061
8,recall,0.9,0.0423
9,precision,0.1,0.1516


In [56]:
# generate chart
chart = alt.Chart(th_df).mark_line().encode(
    x='threshold',
    y='value',
    color='metric',
    #strokeDash='metric',
)
chart.configure_view(
    height=250,
    width=350
)

In [40]:
data_folder = '../data/board-data/detect/'

In [27]:
# list folder
data_files = sorted(list(filter(lambda x: x.endswith('.csv'), os.listdir(data_folder))))
data_files

['cs_oe_lr_00005-AP.csv',
 'cs_overlap_20_10_lr_0001-AP.csv',
 'cs_overlap_20_10_lr_0001-accuracy.csv',
 'cv_oe_lr_00005-AP.csv',
 'cv_overlap_20_10_lr_0001-AP.csv',
 'cv_overlap_20_10_lr_0001-accuracy.csv']

In [20]:
data_files = [
    'cs_overlap_20_10_lr_0001-AP.csv'
]

In [31]:
legend_files = [
    #'cs_base_lr_0001-accuracy.csv',
    #'cs_base_lr_0001-epoch_loss.csv',
    'cs_oe_lr_00005-AP.csv',
    'cv_oe_lr_00005-AP.csv'
]

In [7]:
def load_file_as_df(file_path: str):
    return pd.read_csv(file_path), file_name.split('-')[0]    

In [8]:
def save_chart(chart: alt.Chart, file_path: str):
    print(file_path)
    chart.save(file_path)

In [9]:
def add_legend() -> alt.Chart:
    x_min = 265
    y_min = 10
    b_size = 10
    box = alt.Chart({'values':[{}]}).mark_rect(stroke='#FF7F50', color='#FF7F50', opacity=0.4).encode(
        x=alt.value(x_min),
        y=alt.value(y_min),
        x2=alt.value(x_min + b_size),
        y2=alt.value(y_min + b_size)
    )
    text = alt.Chart({'values':[{}]}).mark_text(align="left", baseline="top").encode(
        x=alt.value(x_min + b_size + 5),  # pixels from left
        y=alt.value(y_min),  # pixels from top
        text=alt.value(['True Values'])
    )
    
    box2 = alt.Chart({'values':[{}]}).mark_rect(stroke='#FF7F50', color='#FF7F50').encode(
        x=alt.value(x_min),
        y=alt.value(y_min + 15),
        x2=alt.value(x_min + b_size),
        y2=alt.value(y_min + b_size + 15)
    )
    text2 = alt.Chart({'values':[{}]}).mark_text(align="left", baseline="top").encode(
        x=alt.value(x_min + b_size + 5),  # pixels from left
        y=alt.value(y_min + 15),  # pixels from top
        text=alt.value(['Scatterplot Smoothing'])
    )
    return box + text + box2 + text2

In [10]:
def generate_loss_chart(df_data: pd.DataFrame, chart_name: str, legend: bool = False) -> alt.Chart:
    line = alt.Chart(df_data).mark_line(interpolate='linear', size=2, opacity=0.4, color='#FF7F50').encode(
        x=alt.X(
            'Step',
            axis=alt.Axis(title='Epoch', titleFontSize=17, labelFontSize=17)
        ),
        y=alt.Y(
            'Value:Q',
            axis=alt.Axis(title='Loss Value', titleFontSize=17, labelFontSize=17),
            scale=alt.Scale(domain=(0.03, 0.13))
        )
    ).properties(
        title=f'{chart_name} Epoch-Loss',
    )

    smooth = line.transform_loess('Step', 'Value').mark_line(color='#FF7F50')
    b = line + smooth
    if legend:
        b += add_legend()
    
    b = b.configure_title(
        fontSize=20,
    )

    save_chart(b, os.path.join(folder, f'{chart_name.lower()}-epoch_loss.png'))
    return b

In [15]:
def generate_accuracy_chart(df_data: pd.DataFrame, chart_name: str, legend: bool = False) -> alt.Chart:
    line = alt.Chart(df_data).mark_line(interpolate='linear', size=2, opacity=0.4, color='#FF7F50').encode(
        x=alt.X(
            'Step',
            axis=alt.Axis(title='Epoch', titleFontSize=17, labelFontSize=17)
        ),
        y=alt.Y(
            'Value:Q',
            axis=alt.Axis(title='Accuracy', titleFontSize=17, labelFontSize=17),
            scale=alt.Scale(domain=(0.0, 0.95))
        )
    ).properties(
        title=f'{chart_name} Accuracy'
    )
    smooth = line.transform_loess('Step', 'Value').mark_line(color='#FF7F50')
    b = line + smooth
    if legend:
        b += add_legend()
    
    b = b.configure_title(
        fontSize=20,
    )
    
    save_chart(b, os.path.join(folder, f'{chart_name.lower()}-accuracy.png'))
    return b

In [29]:
def generate_AP_chart(df_data: pd.DataFrame, chart_name: str, legend: bool = False) -> alt.Chart:
    line = alt.Chart(df_data).mark_line(interpolate='linear', size=2, opacity=0.4, color='#FF7F50').encode(
        x=alt.X(
            'Step',
            axis=alt.Axis(title='Epoch', titleFontSize=17, labelFontSize=17)
        ),
        y=alt.Y(
            'Value:Q',
            axis=alt.Axis(title='AP Score', titleFontSize=17, labelFontSize=17),
            scale=alt.Scale(domain=(0.0, 0.6))
        )
    ).properties(
        title=f'{chart_name} AP Score'
    )
    smooth = line.transform_loess('Step', 'Value').mark_line(color='#FF7F50')
    b = line + smooth
    if legend:
        b += add_legend()
    
    b = b.configure_title(
        fontSize=20,
    )
    
    save_chart(b, os.path.join(folder, f'{chart_name.lower()}-AP.png'))
    return b

In [32]:
for file_name in data_files:
    gen_legend = file_name in legend_files
    
    file_path = os.path.join(folder, file_name)
    df = pd.read_csv(file_path)
    
    split_name = file_name.split('-')
    chart_name = split_name[0]
    chart_type = split_name[-1][:-4]
    
    if chart_type == 'epoch_loss':
        chart = generate_loss_chart(df, chart_name, gen_legend)
    elif chart_type == 'accuracy':
        #pass
        chart = generate_accuracy_chart(df, chart_name, gen_legend)
    elif chart_type == 'AP':
        chart = generate_AP_chart(df, chart_name, gen_legend)
    else:
        print(f"Unknown type of the chart: {chart_type}")

../data/board-data/detect/cs_oe_lr_00005-AP.png
../data/board-data/detect/cs_overlap_20_10_lr_0001-AP.png
../data/board-data/detect/cs_overlap_20_10_lr_0001-accuracy.png
../data/board-data/detect/cv_oe_lr_00005-AP.png
../data/board-data/detect/cv_overlap_20_10_lr_0001-AP.png
../data/board-data/detect/cv_overlap_20_10_lr_0001-accuracy.png


In [162]:
# one chart
file_name = data_files[0]

file_path = os.path.join(folder, file_name)
df = pd.read_csv(file_path)

split_name = file_name.split('-')
chart_name = split_name[0]

generate_accuracy_chart(df, chart_name, True)

../data/board-data/cs_base_lr_0001-accuracy.png


In [45]:
## Generate cumulative Chart
cfolder = '../output_models/cross_view-batch_1_lr_0001_rs_overlap_50_20_2021_03_20-16_01_57' #'../output_models/cross_subject-batch_1_lr_0001_rs_overlap_50_20_2021_03_14-12_23_02/'
cfile = 'cumulative_2021_06_06-17_51_34.csv'

In [46]:
df = pd.read_csv(os.path.join(cfolder, cfile))
df

Unnamed: 0,step,metric,value
0,0.1,precision,0.3641
1,0.1,recall,0.6543
2,0.1,f1-score,0.4595
3,0.15,precision,0.4318
4,0.15,recall,0.6057
5,0.15,f1-score,0.4944
6,0.2,precision,0.4825
7,0.2,recall,0.5678
8,0.2,f1-score,0.5125
9,0.25,precision,0.5242


In [47]:
# generate chart
chart = alt.Chart(df).mark_line().encode(
    x=alt.X(
        'step',
        axis=alt.Axis(title='Threshold', titleFontSize=17, labelFontSize=17)
    ),
    y=alt.Y(
        'value:Q',
        axis=alt.Axis(title='Value', titleFontSize=17, labelFontSize=17),
    ),
    color=alt.Color('metric', legend=alt.Legend(orient='left', offset=-130, title=None, padding=10)),
    #strokeDash='metric',
)
chart.save(os.path.join(data_folder, 'cv_overlap_50_20_lr_0001-cumulative.png'))

chart

In [2]:
## Generate histogram for data samples
with open('../data/other-data/actions_histogram.json', 'r') as jf:
    data = json.load(jf)

{'23': 418,
 '3': 458,
 '22': 413,
 '29': 919,
 '17': 510,
 '30': 417,
 '15': 342,
 '20': 923,
 '8': 439,
 '16': 513,
 '14': 399,
 '32': 416,
 '7': 469,
 '9': 425,
 '35': 431,
 '10': 398,
 '12': 440,
 '36': 422,
 '33': 389,
 '34': 419,
 '41': 503,
 '2': 429,
 '1': 462,
 '39': 456,
 '28': 438,
 '18': 423,
 '24': 552,
 '25': 550,
 '5': 435,
 '6': 429,
 '11': 424,
 '13': 495,
 '27': 433,
 '40': 432,
 '21': 429,
 '42': 459,
 '37': 429,
 '31': 417,
 '0': 402,
 '4': 420,
 '38': 423,
 '26': 397,
 '19': 423}

In [57]:
LABELS = {
    1: 0,
    2: 1,
    3: 2,
    4: 3,
    5: 4,
    6: 5,
    7: 6,
    8: 7,
    9: 8,
    10: 9,
    11: 10,
    13: 11,
    15: 12,
    17: 13,
    19: 14,
    20: 15,
    22: 16,
    23: 17,
    25: 18,
    28: 19,
    29: 20,
    30: 21,
    31: 22,
    32: 23,
    33: 24,
    34: 25,
    35: 26,
    36: 27,
    37: 28,
    38: 29,
    39: 30,
    40: 31,
    41: 32,
    42: 33,
    43: 34,
    44: 35,
    45: 36,
    46: 37,
    47: 38,
    48: 39,
    49: 40,
    50: 41,
    51: 42
}
OPPOSITE_LABELS = dict((v,k) for k, v in LABELS.items())

ldf = pd.read_csv('../data/meta/categories.csv', index_col='Label')

TEXT_LABELS = []

In [80]:
ldf.loc[ldf.index==1]['Action'].values[0]

'bow'

In [58]:
ldf

Unnamed: 0_level_0,Action
Label,Unnamed: 1_level_1
1,bow
2,brushing hair
3,brushing teeth
4,check time (from watch)
5,cheer up
6,clapping
7,cross hands in front (say stop)
8,drink water
9,drop
10,eat meal/snack


In [38]:
ldf.iloc[0]['Action']

'bow'

In [90]:
opposite_keys = [OPPOSITE_LABELS[int(k)] for k in data.keys()]
opposite_keys

[32,
 4,
 31,
 38,
 23,
 39,
 20,
 29,
 9,
 22,
 19,
 41,
 8,
 10,
 44,
 11,
 15,
 45,
 42,
 43,
 50,
 3,
 2,
 48,
 37,
 25,
 33,
 34,
 6,
 7,
 13,
 17,
 36,
 49,
 30,
 51,
 46,
 40,
 1,
 5,
 47,
 35,
 28]

In [111]:
df = pd.DataFrame({
    'number': [ldf.iloc[k - 1]['Action'][:25] + f' ({k})' for k in opposite_keys], #[str(OPPOSITE_LABELS[int(k)]) for k in  data.keys()],
    'values': list(data.values())
})
alt.Chart(df).mark_bar().encode(
    y=alt.Y(
        'number:O',
        axis=alt.Axis(title='Categories')#, titleFontSize=17, labelFontSize=17)
    ),
    x=alt.X(
        'values:Q',
        axis=alt.Axis(title='Number of samples')#, titleFontSize=17, labelFontSize=17),
    )
)