# charts

In [None]:
import os

import numpy as np
import matplotlib.pyplot as plt

from sites.bookcave import bookcave

In [None]:
metric_names = ['Accuracy', 'F1 Score', 'Mean Squared Error']
category_names = [bookcave.CATEGORY_NAMES[category] for category in bookcave.CATEGORIES] + ['Average']

## Paragraphs

In [None]:
def read_model_category_metrics(path):
    with open(path, 'r', encoding='utf-8') as fd:
        n_models = int(fd.readline()[:-1])
        model_names = []
        for m in range(n_models):
            model_name = fd.readline()[:-1]
            model_names.append(model_name)
        model_category_metrics = []
        for m in range(n_models):
            category_metrics = []
            for j in range(len(category_names)):
                metrics = []
                for i in range(len(metric_names)):
                    metrics.append(float(fd.readline()[:-1]))
                category_metrics.append(metrics)
            model_category_metrics.append(category_metrics)
        return model_names, np.array(model_category_metrics)

In [None]:
baseline_names, baseline_category_metrics = read_model_category_metrics('ppb.txt')
baseline_category_metrics.shape

In [None]:
# models/paragraph_cnn_max_ordinal/33063788_overall_max-agg.h5
# models/paragraph_rnn_max_ordinal/33063789_overall_max-agg.h5
# models/paragraph_rnncnn_max_ordinal/33063790_overall_max-agg.h5
model_names, model_category_metrics = read_model_category_metrics('pp.txt')
model_category_metrics.shape

In [None]:
classifier_names = baseline_names + model_names
classifier_category_metrics = np.concatenate([baseline_category_metrics, model_category_metrics])
classifier_category_metrics.shape

## Books

In [None]:
def read_model_category_metrics_book(path, metric_indices):
    with open(path, 'r', encoding='utf-8') as fd:
        n_models = int(fd.readline()[:-1])
        model_names = []
        for m in range(n_models):
            model_name = fd.readline()[:-1]
            model_names.append(model_name)
        model_category_metrics = []
        for m in range(n_models):
            category_metrics = []
            for j in range(len(category_names)):
                metrics = []
                all_metrics = [float(value.strip()) for value in fd.readline()[:-1].split('|')[1:-1]]
                for index in metric_indices:
                    metrics.append(all_metrics[index])
                category_metrics.append(metrics)
            model_category_metrics.append(category_metrics)
        return model_names, np.array(model_category_metrics)

In [None]:
metric_indices = [0, 3, 7]  # [Accuracy, F1 Macro, MSE]
model_names_book, model_category_metrics_book = read_model_category_metrics_book('pb.txt', metric_indices)
model_category_metrics_book.shape

In [None]:
baseline_names_book, baseline_category_metrics_book = read_model_category_metrics_book('pbb.txt', metric_indices)
baseline_category_metrics_book.shape

In [None]:
classifier_names_book = baseline_names_book + model_names_book
classifier_category_metrics_book = np.concatenate([baseline_category_metrics_book, model_category_metrics_book])
classifier_category_metrics_book.shape

## Plot

In [None]:
def plot_bar(classifier_values, classifier_names, tick_names, title, ylabel, xlabel=None, legend=False, save_path=None, figsize=(16, 4.8), gap=.15):
    plt.figure(figsize=figsize)
    ticks = np.arange(len(tick_names))
    width = (1 - gap) / len(classifier_values)
    for i, values in enumerate(classifier_values):
        plt.bar(ticks + i * width - (1 - gap) / 2 + width / 2, values, width=width)
    plt.xticks(ticks, tick_names, rotation=-16.875, ha='left')
    plt.title(title)
    plt.ylabel(ylabel)
    if xlabel is not None:
        plt.xlabel(xlabel)
    if legend:
        plt.legend(classifier_names, loc='upper center', bbox_to_anchor=(.5, -0.25), ncol=5)
    if save_path is not None:
        plt.savefig(save_path, bbox_inches='tight')
    plt.show()

In [None]:
for i in range(classifier_category_metrics.shape[2]):
    title = '{} of All Classifiers over Individual Paragraphs by Category'.format(metric_names[i])
    ylabel = metric_names[i]
    legend = i == 2
    save_path = os.path.join('..', 'figures', 'classifier_category_metrics_{:d}'.format(i))
    plot_bar(classifier_category_metrics[:, :, i], classifier_names, category_names, title, ylabel, legend=legend, save_path=save_path)

In [None]:
for i in range(classifier_category_metrics_book.shape[2]):
    title = '{} of All Classifiers over Entire Books by Category'.format(metric_names[i])
    ylabel = metric_names[i]
    legend = i == 2
    save_path = os.path.join('..', 'figures', 'classifier_category_metrics_book_{:d}'.format(i))
    plot_bar(classifier_category_metrics_book[:, :, i], classifier_names_book, category_names, title, ylabel, legend=legend, save_path=save_path)