# plot_results

In [None]:
import os

import numpy as np
import matplotlib.pyplot as plt

import folders
from sites.bookcave import bookcave

## History

In [None]:
def plot_history(path):
    # Collect history data.
    history = dict()
    with open(path, 'r') as fd:
        for line in fd:
            parts = line.split()
            key = parts[0]
            values = np.asarray(parts[1:], dtype=np.float32)
            history[key] = values
    
    # Plot loss.
    try:
        plt.plot(history['loss'], color='red', label='loss')
        plt.plot(history['val_loss'], color='blue', label='val_loss')
    except KeyError:
        print(history.keys())
    plt.legend()
    plt.title('Overall')
    plt.ylabel('Loss')
    plt.xlabel('Epochs')
    
    # Divide into categories.
    categories = [category for category in bookcave.CATEGORIES if '{}_loss'.format(category) in history]
    
    # Plot catgories.
    figure, axes = plt.subplots(nrows=len(categories), ncols=2, figsize=(1*len(categories), 18))
    for category_i, category in enumerate(categories):
        axes[category_i][0].plot(history['{}_loss'.format(category)], color='red', label='loss')
        axes[category_i][0].plot(history['val_{}_loss'.format(category)], color='blue', label='val_loss')
        axes[category_i][0].legend()
        axes[category_i][0].set_title(category)
        axes[category_i][0].set_ylabel('Loss')
        axes[category_i][0].set_xlabel('Epochs')
        try:
            axes[category_i][1].plot(history['{}_binary_accuracy'.format(category)], color='orange', label='binary_accuracy')
            axes[category_i][1].plot(history['val_{}_binary_accuracy'.format(category)], color='green', label='val_binary_accuracy')
        except KeyError:
            try:
                axes[category_i][1].plot(history['{}_categorical_accuracy'.format(category)], color='orange', label='categorical_accuracy')
                axes[category_i][1].plot(history['val_{}_categorical_accuracy'.format(category)], color='green', label='val_categorical_accuracy')
            except KeyError:
                try:
                    axes[category_i][1].plot(history['{}_accuracy'.format(category)], color='orange', label='accuracy')
                    axes[category_i][1].plot(history['val_{}_accuracy'.format(category)], color='green', label='val_accuracy')
                except KeyError:
                    axes[category_i][1].plot(history['{}_acc'.format(category)], color='orange', label='acc')
                    axes[category_i][1].plot(history['val_{}_acc'.format(category)], color='green', label='val_acc')
        axes[category_i][1].legend()
        axes[category_i][1].set_title(category)
        axes[category_i][1].set_ylabel('Accuracy')
        axes[category_i][1].set_xlabel('Epochs')
    figure.tight_layout()
    plt.show()

In [None]:
model_names = sorted(os.listdir(folders.HISTORY_PATH))
model_names

In [None]:
skip_names = {'.DS_Store'}
for model_name in model_names:
    if model_name in skip_names:
        continue
    fnames = sorted(os.listdir(os.path.join(folders.HISTORY_PATH, model_name)))
    for fname in fnames:
        print('{}: {}'.format(model_name, fname))
        path = os.path.join(folders.HISTORY_PATH, model_name, fname)
        plot_history(path)

## Accuracy

In [None]:
acc_sv = [.69507, .66380, .83737, .69820, .74668, .59578, .64816, .89836, .70733]  # 'paragraph_tokens'
acc_bn = [.66562, .62594, .79030, .68073, .69018, .55353, .52456, .89798, .67861]  # 
acc_pr = [.68100, .63409, .82408, .66302, .67866, .52463, .46130, .89289, .66996]  # glove300-emb
acc_pc = [.66849, .62471, .82486, .67240, .69977, .51759, .56998, .89289, .68384]  # 
acc_sr = [.15181, .57038, .69049, .15770, .60575, .19823, .31540, .90420, .44924]  # glove300-emb
acc_sc = [.42741, .60648, .74282, .54016, .60575, .32056, .38836, .90420, .56697]  # regression

In [None]:
def plot_results_bar(classifier_values, classifier_names, tick_names, gap=.2, figsize=(12, 9)):
    plt.figure(figsize=figsize)
    ticks = np.arange(len(tick_names))
    width = (1. - gap) / len(classifier_values)
    for i, values in enumerate(classifier_values):
        plt.bar(ticks + (i - .5) * width, values, width=width)
    plt.xticks(ticks, tick_names, rotation=-45, ha='left')
    plt.legend(classifier_names)
    plt.title('Accuracy for Classifiers')
    plt.xlabel('Maturity Categories')
    plt.ylabel('Classification Accuracy')
    plt.show()

In [None]:
classifier_values = [acc_sv, acc_pr, acc_pc, acc_sr, acc_sc]
classifier_names = ['SVM', 'ParaRNN', 'ParaCNN', 'SentRNN', 'SentCNN']
tick_names = [bookcave.CATEGORY_NAMES[category] for category in bookcave.CATEGORIES] + ['Average']
plot_results_bar(classifier_values, classifier_names, tick_names)

## Overall

In [None]:
def plot_overall_bar(scores, names, ticks, title, save_path=None):
    plt.bar(ticks, scores)
    plt.xticks(ticks, names, rotation=-45, ha='left')
    plt.title(title)
    plt.xlabel('Classifiers')
    plt.ylabel('Classification Accuracy')
    if save_path is not None:
        plt.savefig(save_path, bbox_inches='tight')
    plt.show()

In [None]:
names = ['Zero Rule', 'KNN', 'Linear Regression', 'Logistic Regression', 'Multinomial Naive Bayes', 'Random Forest', 'SVM', 'Multi-layer Perceptron', 'Paragraph CNN', 'Paragraph RNN']
ticks = np.arange(len(names))

In [None]:
book_scores = [.2783, .6521, .5559, .6388, .5520, .5714, .7091, .6247, .6286, .5762]
book_save_path = os.path.join(folders.FIGURES_PATH, 'overall_book.png')
plot_overall_bar(book_scores, names, ticks, 'Overall Accuracy for Classifiers for Books', save_path=book_save_path)

In [None]:
paragraph_scores = [.5193, .2876, .2318, .5308, .0773, .3977, .4578, .0572, .5107, .5293]
paragraph_save_path = os.path.join(folders.FIGURES_PATH, 'overall_paragraph.png')
plot_overall_bar(paragraph_scores, names, ticks, 'Overall Accuracy for Classifiers for Paragraphs', save_path=paragraph_save_path)