# Plotting
This book is used for producing various plots related to data.

In [1]:
import random
import matplotlib

from datasets import load_dataset
from data_manipulation.data_processing import count_and_reformat
from data_manipulation.data_processing import sample_uniform_subset
from data_manipulation.data_analysis import plot_distribution, plot_histogram, plot_scatter, plot_loss_curves, plot_newline_frequencies, plot_confusion_matrix, plot_confidence_scores, plot_metric_score_thresholds

#### Load datasets

In [11]:
# Only process if needed. Takes a couple of minutes to count.
arxiv = count_and_reformat(dataset=load_dataset("gfissore/arxiv-abstracts-2021")['train'],
                           count_column='abstract',
                           retain_columns=['title', 'abstract'])

Downloading readme:   0%|          | 0.00/6.75k [00:00<?, ?B/s]

Downloading and preparing dataset json/gfissore--arxiv-abstracts-2021 to /Users/nicolaisivesind/.cache/huggingface/datasets/gfissore___json/gfissore--arxiv-abstracts-2021-23556c248bdbe0fc/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/940M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Generating train split: 0 examples [00:00, ? examples/s]

Dataset json downloaded and prepared to /Users/nicolaisivesind/.cache/huggingface/datasets/gfissore___json/gfissore--arxiv-abstracts-2021-23556c248bdbe0fc/0.0.0/fe5dd6ea2639a6df622901539cb550cf8797e5a6b2dd7af1cf934bed8e233e6e. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

 Counting words: 100%

#### Plotting

In [2]:
chatgpt_abstracts_raw = load_dataset('csv', data_files='../../datasets/origins/research-abstracts/research_abstracts-raw.csv')[
    'train']
chatgpt_abstracts_clean = load_dataset('csv', data_files='../../datasets/origins/ChatGPT-Research-Abstracts/research_abstracts-final.csv')[
    'train']

Found cached dataset csv (/Users/nicolaisivesind/.cache/huggingface/datasets/csv/default-b7975ec30e73b117/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)


  0%|          | 0/1 [00:00<?, ?it/s]

Found cached dataset csv (/Users/nicolaisivesind/.cache/huggingface/datasets/csv/default-6a0f094caf40a32a/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)


  0%|          | 0/1 [00:00<?, ?it/s]

In [12]:
arxiv_10k = random.sample(arxiv, k=10000)
uniform = sample_uniform_subset(arxiv, 'word_count', 10000, 325, 900)
print(len(uniform))

 Sorting into lists: 99%
 Sampling data points: 63%6368


In [None]:
plot_distribution(plots=[{'dataset': arxiv, 'column_name': 'word_count', 'color': 'darkmagenta', 'alpha': 0.6,
                          'display': 'arXiv-abstracts-2021 (~2m)', 'mode':False},
                         {'dataset': arxiv_10k, 'column_name': 'word_count', 'color': 'red', 'alpha': 0.6,
                          'display': 'arXiv-abstracts-2021 random subset (10k)', 'mode':True},
                         {'dataset': chatgpt_abstracts_raw, 'column_name': 'real_word_count', 'color': 'blue', 'alpha': 0.6,
                          'display': 'ChatGPT-Research-Abstracts, real (10k)', 'mode':False},
                         {'dataset': chatgpt_abstracts_raw, 'column_name': 'generated_word_count', 'color': 'limegreen', 'alpha': 0.8,
                          'display': 'ChatGPT-Research-Abstracts, generated (10k)', 'mode':True}],
                   h_lines=[{'value': 18, 'color': 'grey', 'alpha': 0.8, 'text': 'IASS_10k ≈ 18', 'offset': (400, 1)}],
                   v_lines=[{'value': 360, 'color': 'grey', 'alpha': 0.8, 'text': 'WC = 360', 'offset': (5, 50)}],
                   start=50,
                   end=600,
                   x_label='WC (length of data points in words)',
                   y_label='n (number of data points)',
                   y_lim=(0, 75),
                   legend_offset=1.02)

In [5]:
plot_histogram(plots=[{'dataset': arxiv, 'column_name': 'word_count', 'color': 'red', 'alpha': 0.6,
                       'display': 'arXiv-abstracts-2021 (~2m)'},
                      {'dataset': chatgpt_abstracts_clean, 'column_name': 'real_word_count', 'color': 'blue', 'alpha': 0.4,
                       'display': 'ChatGPT-Research-Abstracts, real (10k)'},
                      {'dataset': chatgpt_abstracts_clean, 'column_name': 'generated_word_count', 'color': 'limegreen', 'alpha': 0.8,
                       'display': 'ChatGPT-Research-Abstracts, generated (10k)'}],
                     start=350,
                     end=600,
                x_label="WC (length of data points in words)",
                y_label="n (number of data points)",
                y_lim=(0, 52))

In [6]:
plot_scatter(plots=[{'dataset': chatgpt_abstracts_raw, 'x': 'real_word_count', 'y': 'generated_word_count', 'color': 'blue', 'alpha': 0.1,
                     'display': 'Data point in ChatGPT-Research-Abstracts'}],
             correlations=[{'interval': (50, 325), 'spaces': (2, 2, 1), 'positioning':(400, 160), 'color':'magenta', 'alpha':0.8},
                           {'interval': (325, 420), 'spaces': (0, 2, 1),'positioning':(400, 120), 'color':'green', 'alpha':0.8},
                           {'interval': (420, 600), 'spaces': (0, 4, 1), 'positioning':(400, 80), 'color':'darkblue', 'alpha':0.8},
                           {'interval': (50, 600), 'spaces': (2, 0, 1), 'positioning':(400, 40), 'color':'black', 'alpha':0.95}],
             d_lines=[{'start': (0, 0), 'increment': (1, 1), 'color': 'orange', 'alpha': 0.8, 'display': 'Perfect correlation', 'offset': (0, 0)}],
             v_lines=[{'value': 325, 'color': 'grey', 'alpha': 0.8, 'text': 'x=325', 'offset': (5, 520)},
                      {'value': 420, 'color': 'grey', 'alpha': 0.8, 'text': 'x=420', 'offset': (5, 520)}],
             x_label='x: Real abstract word count',
             y_label='y: Generated abstract word count',
             y_lim=(0, 600),
             legend_offset=(0.43, 0.95),
             average_curve={'color': 'red', 'alpha': 0.8, 'display': 'Average word count correlation', 'offset': (10, 10)},
             sigma=2)

In [10]:
plot_loss_curves(plots=[{'dataset': chatgpt_abstracts_raw,
                         'benchmark': 'real_word_count', 'predicted': 'generated_word_count',
                         'positive-color': 'blue','negative-color': 'red', 'alpha': 0.6,
                         'positive-display': 'Mean absolute positive deviation (MAPD)',
                         'negative-display': 'Mean absolute negative deviation (MAND)',
                         'mean-abs-display': 'Mean absolute total deviation (MATD)',
                         'mean-abs-color': 'purple'}],
                 deviations=[{'zero-text': 'Non-deviates:          ', 'positioning':(465, 30), 'color':'black', 'alpha':0.7},
                             {'positive-text': 'Positive deviates:    ', 'positioning':(465, 20), 'color':'blue', 'alpha':0.8},
                             {'negative-text': 'Negative deviates:  ', 'positioning':(465, 10), 'color':'red', 'alpha':0.8}],
                 v_lines=[{'value': 325, 'color': 'grey', 'alpha': 0.8, 'text': 'x=325', 'offset': (5, 175)},
                      {'value': 420, 'color': 'grey', 'alpha': 0.8, 'text': 'x=420', 'offset': (5, 175)}],
                 x_label='Target word count',
                 y_label='Average absolute deviation',
                 legend_offset=(0.43, 0.95),
                 sigma=5)

In [None]:
plot_newline_frequencies(plots=[{'dataset': chatgpt_abstracts_clean, 'column': 'real_abstract', 'word_count': 'real_word_count', 'color': 'lavender', 'alpha': 0.8, 'display': 'Real abstracts, (CRA-Real)', 'mean_color': 'blue'},
                                {'dataset': chatgpt_abstracts_clean, 'column': 'generated_abstract', 'word_count': 'generated_word_count', 'color': 'mistyrose', 'alpha': 0.8, 'display': 'Generated abstracts (CRA-Generated)', 'mean_color': 'red'}],
                         x_label='x: Word Count',
                         y_label='y: Number of words per paragraph',
                         sigma=4,
                         legend_coords=(0.45, 0.95),
                         text_coords=(0.039, 0.66),
                         v_lines=[{'value': 185, 'color': 'lightgrey', 'alpha': 0.8, 'text': 'x=185', 'offset': (5, 280)}])

2543
27


## In-context learning

In [2]:
icl_human_zero_shot = load_dataset("csv", data_files="../results/in-context-learning/icl-human-zero-shot.csv")['train']
icl_human_few_shot = load_dataset("csv", data_files="../results/in-context-learning/icl-human-few-shot.csv")['train']
icl_inductive_zero_shot = load_dataset("csv", data_files="../results/in-context-learning/icl-inductive-zero-shot.csv")['train']
icl_inductive_few_shot = load_dataset("csv", data_files="../results/in-context-learning/icl-inductive-few-shot.csv")['train']

Found cached dataset csv (/Users/nicolaisivesind/.cache/huggingface/datasets/csv/default-9d2b72df7f5e5ead/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)


  0%|          | 0/1 [00:00<?, ?it/s]

Found cached dataset csv (/Users/nicolaisivesind/.cache/huggingface/datasets/csv/default-b1e90685e2ae0d34/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)


  0%|          | 0/1 [00:00<?, ?it/s]

Found cached dataset csv (/Users/nicolaisivesind/.cache/huggingface/datasets/csv/default-653b07a1df5e0e7d/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)


  0%|          | 0/1 [00:00<?, ?it/s]

Found cached dataset csv (/Users/nicolaisivesind/.cache/huggingface/datasets/csv/default-e821fc1ced51601f/0.0.0/6954658bab30a358235fa864b05cf819af0e179325c740e4bc853bcc7ec513e1)


  0%|          | 0/1 [00:00<?, ?it/s]

### ICL Confidence box plots

In [3]:
plot_confidence_scores(plots=[{'dataset':icl_human_zero_shot, 'true_label_column':'label', 'human_score_column':'human_probability', 'generated_score_column':'generated_probability'}], human_label='Human', generated_label='Machine')

In [None]:
plot_confidence_scores(plots=[{'dataset':icl_inductive_zero_shot, 'true_label_column':'label', 'human_score_column':'human_probability', 'generated_score_column':'generated_probability'}], human_label='Human', generated_label='AI')

In [None]:
plot_confidence_scores(plots=[{'dataset':icl_human_few_shot, 'true_label_column':'label', 'human_score_column':'human_probability', 'generated_score_column':'generated_probability'}], human_label='Human', generated_label='Machine')

In [None]:
plot_confidence_scores(plots=[{'dataset':icl_inductive_few_shot, 'true_label_column':'label', 'human_score_column':'human_probability', 'generated_score_column':'generated_probability'}], human_label='Human', generated_label='AI')

### ICL Confision matrices

In [6]:
plot_confusion_matrix(dataset=icl_human_zero_shot, true_label_column='label', human_score_column='human_probability',
                       generated_score_column='generated_probability', human_label='Human', generated_label='Machine', generated_threshold=0.000)

In [8]:
plot_confusion_matrix(dataset=icl_inductive_zero_shot, true_label_column='label', human_score_column='human_probability',
                       generated_score_column='generated_probability', human_label='Human', generated_label='AI', generated_threshold=0.078)

In [9]:
plot_confusion_matrix(dataset=icl_human_few_shot, true_label_column='label', human_score_column='human_probability',
                       generated_score_column='generated_probability', human_label='Human', generated_label='Machine', generated_threshold=0.002)

In [10]:
plot_confusion_matrix(dataset=icl_inductive_few_shot, true_label_column='label', human_score_column='human_probability',
                       generated_score_column='generated_probability', human_label='Human', generated_label='AI', generated_threshold=0.168)

### ICL GCS thresholds

In [103]:
plot_metric_score_thresholds(plots=[{'dataset':icl_human_zero_shot, 'true_label_column':'label', 'human_score_column':'human_probability',
                               'generated_score_column':'generated_probability','human_label': 'Human', 'generated_label': 'Machine',
                               'color': 'green', 'alpha': 0.6, 'display': 'Human-zero-shot'},
                              {'dataset':icl_inductive_zero_shot, 'true_label_column':'label', 'human_score_column':'human_probability', 'generated_score_column':'generated_probability','human_label': 'Human', 'generated_label': 'AI',
                               'color': 'orange', 'alpha': 0.6, 'display': 'Inductive-zero-shot'},
                               {'dataset':icl_human_few_shot, 'true_label_column':'label', 'human_score_column':'human_probability',
                               'generated_score_column':'generated_probability','human_label': 'Human', 'generated_label': 'Machine',
                               'color': 'red', 'alpha': 0.8, 'display': 'Human-few-shot'},
                              {'dataset':icl_inductive_few_shot, 'true_label_column':'label', 'human_score_column':'human_probability', 'generated_score_column':'generated_probability','human_label': 'Human', 'generated_label': 'AI',
                               'color': 'blue', 'alpha': 0.8, 'display': 'Inductive-few-shot'}], start=0.0, end=1, step_size=0.001, metric='Accuracy')

In [None]:
plot_metric_score_thresholds(plots=[{'dataset':icl_human_zero_shot, 'true_label_column':'label', 'human_score_column':'human_probability',
                               'generated_score_column':'generated_probability','human_label': 'Human', 'generated_label': 'Machine',
                               'color': 'green', 'alpha': 0.6, 'display': 'Human-zero-shot'},
                              {'dataset':icl_inductive_zero_shot, 'true_label_column':'label', 'human_score_column':'human_probability', 'generated_score_column':'generated_probability','human_label': 'Human', 'generated_label': 'AI',
                               'color': 'orange', 'alpha': 0.6, 'display': 'Inductive-zero-shot'},
                               {'dataset':icl_human_few_shot, 'true_label_column':'label', 'human_score_column':'human_probability',
                               'generated_score_column':'generated_probability','human_label': 'Human', 'generated_label': 'Machine',
                               'color': 'red', 'alpha': 0.8, 'display': 'Human-few-shot'},
                              {'dataset':icl_inductive_few_shot, 'true_label_column':'label', 'human_score_column':'human_probability', 'generated_score_column':'generated_probability','human_label': 'Human', 'generated_label': 'AI',
                               'color': 'blue', 'alpha': 0.8, 'display': 'Inductive-few-shot'}], start=0.0, end=1, step_size=0.001, metric='F1')