# Plotting results

This notebook contains the code for generating the bar plots of model performance for various chunk sizes. The graphs are generated using Plotnine, which is a Python data visualization package that replicates R's ggplot.

In [1]:
import pandas as pd
from plotnine import *
from pathlib import Path
from pandas.api.types import CategoricalDtype
import numpy as np

In [2]:
data_path = Path('./results')

In [15]:
colors = ["#00BA38", "#619CFF", "#e60000"]

In [16]:
model_categories = CategoricalDtype(categories=["CNN", "AWD-LSTM", "RoBERTa", "GPT-2"], ordered=True)

### Plot of proxy task accuracy for various pretraining levels

In [17]:
chunks_df = pd.read_csv(data_path/'accuracy_by_chunk_size.csv')

In [18]:
chunks_df.head()

Unnamed: 0,Chunk,Pretraining amount,Model,Accuracy
0,Fragment 64,,CNN,37.34
1,Fragment 64,,AWD-LSTM,39.75
2,Fragment 64,,RoBERTa,34.5
3,Fragment 64,,GPT-2,32.1
4,Fragment 64,Labeled,AWD-LSTM,42.71


In [19]:
# convert these columns from object type to category and impose an ordering for plotting
model_categories = CategoricalDtype(categories=["CNN", "AWD-LSTM", "RoBERTa", "GPT-2"], ordered=True)
chunks_df.Model = chunks_df.Model.astype(model_categories)
chunk_categories = CategoricalDtype(categories=["Fragment 64", "Fragment 128", "Fragment 256"], ordered=True)
chunks_df.Chunk = chunks_df.Chunk.astype(chunk_categories)
pretraining_categories = CategoricalDtype(categories=["None", "Labeled", "Unlabeled"], ordered=True)
chunks_df['Pretraining amount'] = chunks_df['Pretraining amount'].astype(pretraining_categories)

In [20]:
# generate abbreviated labels for each model
model_to_name = {"CNN": "CNN", "AWD-LSTM":"ALSTM", "RoBERTa": "RBRTa", "GPT-2": "GPT2"}
chunks_df.Model = chunks_df.Model.apply(lambda x: model_to_name[x])

In [22]:
# proxy task accuracies
(ggplot(chunks_df, aes(x="Model", y="Accuracy", fill="Pretraining amount")) +
    geom_bar(width = 0.75, position=position_dodge2(preserve='single', width=0.95), stat='identity') +
    scale_y_continuous(expand = [0, 0], breaks=range(0, 65, 10), limits = [0, 65]) +
    scale_fill_manual(values=colors) +
    facet_grid('. ~ Chunk') +
    theme_bw() + 
    labs(y = "Accuracy (%)") +
    theme(dpi=300, legend_position=(0.5, 0.03), legend_direction="horizontal", legend_title_align='bottom', 
            legend_background=element_blank(),
            legend_title = element_text(size=10),
            strip_background = element_rect(fill="#ebebeb"),
            plot_title = element_text(size=14, colour='black', margin={'b': 12}),
            axis_text_x = element_text(size = 8, colour='black'),
            axis_text_y = element_text(size = 12, colour='black'), 
            axis_title_x = element_blank(),
            axis_title_y = element_text(size = 12, margin={'r': 6})) +
    guides(fill=guide_legend(title="Pretraining:", title_position='left', label_position="right"))).save('./accuracy_plot.png')



### Plot of Macro F1 for various pretraining levels

In [34]:
finalized_f1_df = pd.read_csv(data_path/'finalized_f1.csv')

In [35]:
# convert these columns from object type to category and impose an ordering for plotting
model_categories = CategoricalDtype(categories=["CNN", "AWD-LSTM", "RoBERTa", "GPT-2"], ordered=True)
finalized_f1_df.Model = finalized_f1_df.Model.astype(model_categories)
chunk_categories = CategoricalDtype(categories=["Fragment 64", "Fragment 128", "Fragment 256"], ordered=True)
finalized_f1_df.Chunk = finalized_f1_df.Chunk.astype(chunk_categories)
pretraining_categories = CategoricalDtype(categories=["None", "Labeled", "Unlabeled"], ordered=True)
finalized_f1_df['Pretraining'] = finalized_f1_df['Pretraining'].astype(pretraining_categories)

In [36]:
# generate abbreviated labels for each model
model_to_name = {"CNN": "CNN", "AWD-LSTM":"ALSTM", "RoBERTa": "RBRTa", "GPT-2": "GPT2"}
finalized_f1_df.Model = finalized_f1_df.Model.apply(lambda x: model_to_name[x])

In [37]:
finalized_f1_df.head()

Unnamed: 0,Chunk,Pretraining,Model,Macro F1
0,Fragment 64,,CNN,0.3932
1,Fragment 64,,ALSTM,0.331
2,Fragment 64,,RBRTa,0.452
3,Fragment 64,,GPT2,0.441
4,Fragment 64,Labeled,ALSTM,0.506


In [38]:
# proxy task F1 scores
(ggplot(finalized_f1_df, aes(x="Model", y="Macro F1", fill="Pretraining")) +
    geom_bar(width = 0.75, position=position_dodge2(preserve='single', width=0.95), stat='identity') +
    scale_y_continuous(expand = [0, 0], breaks=np.arange(0.0, 0.75, 0.1), limits = [0, 0.71]) +
    scale_fill_manual(values=colors) +
    facet_grid('. ~ Chunk') +
    theme_bw() + 
    labs(y = "Macro F1") +
    theme(dpi=300, legend_position=(0.5, 0.03), legend_direction="horizontal", legend_title_align='bottom', 
            legend_background=element_blank(),
            legend_title = element_text(size=10),
            strip_background = element_rect(fill="#ebebeb"),
            plot_title = element_text(size=14, colour='black', margin={'b': 12}),
            axis_text_x = element_text(size = 8, colour='black'),
            axis_text_y = element_text(size = 12, colour='black'), 
            axis_title_x = element_blank(),
            axis_title_y = element_text(size = 12, margin={'r': 6})) +
    guides(fill=guide_legend(title="Pretraining:", title_position='left', label_position="right"))).save('./f1_plot.png')

