In [None]:
from google.colab import drive

import matplotlib.pyplot as plt
import numpy as np
import json
import os
import pprint as pp

In [None]:
drive.mount('/Drive')
!ln -s '/Drive/MyDrive/google_colab_files_for_CSI' '/content/REU'
root_data_path = os.path.join('REU','csi_data_all')
REU = os.path.join('REU')

In [None]:
from matplotlib.font_manager import FontProperties
# For each person, index 0 is the precision, index 1 is the recall.
# I may automate this in the future if it bothers me enough

def join(*args): # makes life easier
    return os.path.join(*args)

def add_json_to_dict(path, existing_dict): # problematic
    # # DEBUG
    # print(f'adding {path}')

    with open(path, 'r') as file:
        new_dict = json.load(file)
    # pp.pprint(new_dict) # DEBUG

    for key, value in existing_dict.items():
        existing_dict[key] = existing_dict[key] + [new_dict[key]] # new dict is one dimension lower

    return existing_dict

def array_vals_to_numpy(data_dict):
    updated_dict = {}
    for key, arr in data_dict.items():
        updated_dict[key] = np.asarray(arr)
    return updated_dict

def get_label(impact_factor):
    i = impact_factor
    if i == 'dist-phone': return 'Distance of Phone'
    if i == 'dist-AP': return 'Distance of AP'
    if i == 'data-points': return 'Number of Datasets'
    return i.capitalize()

def group_bar_chart(impact_factor, data_dict, groups, exclude = [], save_path = None):
    # set everything to bold
    plt.rcParams['font.weight'] = 'bold'

    # DEBUG
    # print('data_dict', data_dict) # wrong order

    data_dict = array_vals_to_numpy(data_dict)
    fig, ax = plt.subplots(figsize = (4, 3))

    x = np.arange(len(groups))
    width = 0.3
    multiplier = 0
    # global_min = 100 # initialize to largest possible value

    # get the f1 score of each gesture
    # f1 = (precision * recall) / (precision + recall)
    data_dict['f1'] = 2 * (data_dict['precision'] * data_dict['recall']) / (data_dict['precision'] + data_dict['recall'])

    # print('f1 array:')
    # pp.pprint(data_dict['f1'])

    # print(global_min) # CORRECT
    # print(int(global_min))

    # ex: precision [[], []]
    for attribute, attribute_values in data_dict.items():
        if attribute in exclude:
            continue

        # attribute_values = [precision1, precision2]
        # DEBUG
        # print('attribute_values:', attribute_values)
        attribute_values = np.average(attribute_values, axis = 1) * 100
        # print(f'{attribute}:', attribute_values)

        # print('attribute_values', attribute_values) # debug

        val_min = np.min(attribute_values)
        # if val_min < global_min: global_min = val_min

        offset = multiplier * width
        bars = ax.bar(x + offset, attribute_values, width, label = attribute.capitalize())

        if len(groups) < 4:
            ax.bar_label(bars, labels = np.round(attribute_values, 2), label_type = 'center')
        multiplier += 1

    # print(int(global_min))
    ax.set_ylim((80, 100))

    ax.set_xlabel(get_label(impact_factor), weight = 'bold')
    ax.set_ylabel(get_label('percentage'), weight = 'bold')
    ax.set_xticks(x + width / 2, groups)
    # ax.bar_label() # DEBUG

    plt.legend()
    plt.tight_layout(pad=0., w_pad=10, h_pad=5)
    if save_path:
        print(f'saving in {save_path}')
        plt.savefig(save_path)
    plt.show()

def generate_plots(impact_factor):
    # import impact factor jsons
    plots_location = join('REU')
    data_files_parent = join(REU, 'paper_charts')
    data_file_names = sorted(os.listdir(data_files_parent))
    data_file_names = list(filter(lambda x: impact_factor in x, data_file_names)) # limit to only files relevant to this impact factor
    data_json_names = list(filter(lambda x: '.json' in x, data_file_names)) # select json files

    impact_factor_data_dict = { # data will be stored in this dict
        'accuracy': [],
        'precision': [],
        'recall': [],
    }

    # defnie the groups by looking at the file names
    groups = list(map(lambda filename: filename.split('_')[1].split('.')[0], data_json_names))

    for json_name in data_json_names:
        impact_factor_data_dict = add_json_to_dict(
            join(data_files_parent, json_name),
            impact_factor_data_dict,
        ) # update impact_factor data

    # DEBUG
    print('impact_factor:', impact_factor)
    # print('impact_factor_data_dict:', impact_factor_data_dict)
    print('groups:', groups)

    group_bar_chart(
        impact_factor,
        impact_factor_data_dict,
        groups,
        exclude = ['precision','recall'],
        save_path = join(data_files_parent, f'{impact_factor}_compared.pdf')
    )

In [None]:
impact_factors = [
    'overall',
    'phones',
    'people',
    'dist-AP',
    'dist-phone',
    'data-points',
]

for impact_factor in impact_factors:
    generate_plots(impact_factor)