# Install

## Install Package to write CAS XMI files
See https://github.com/dkpro/dkpro-cassis

In [1]:
#pip install numpy dkpro-cassis "scikit-learn==0.23.1" datasets transformers[torch] ipywidgets

In [2]:
import torch
import numpy as np
import random
import pandas as pd
from IPython.display import display, HTML

# Import Project Data

## Load CAS

In [3]:
from cassis import *

with open('./data/TypeSystem.xml', 'rb') as f:
    typesystem = load_typesystem(f)

cas = []
# Similarity Dataset    
with open('./data/similarity.xmi', 'rb') as f:
   cas.append(load_cas_from_xmi(f, typesystem=typesystem))

# Similarity Categories Dataset    
with open('./data/similarity_categories.xmi', 'rb') as f:
   cas.append(load_cas_from_xmi(f, typesystem=typesystem))

# Random Dataset    
with open('./data/random.xmi', 'rb') as f:
   cas.append(load_cas_from_xmi(f, typesystem=typesystem))

# Random Categories Dataset    
with open('./data/random_categories.xmi', 'rb') as f:
   cas.append(load_cas_from_xmi(f, typesystem=typesystem))

# NSP Dataset    
with open('./data/nsp.xmi', 'rb') as f:
   cas.append(load_cas_from_xmi(f, typesystem=typesystem))

# NSP Categories Dataset    
with open('./data/nsp_categories.xmi', 'rb') as f:
   cas.append(load_cas_from_xmi(f, typesystem=typesystem))

# Similarity Categories Dataset 03.06.    
with open('./data/similarity_categories_03_06.xmi', 'rb') as f:
   cas.append(load_cas_from_xmi(f, typesystem=typesystem))


## Get Labels

In [4]:
# Without Translation (not used)
labels = ["none", "attribution", "causal", "conditional", "contrast", "description", "equivalence", "fulfillment", "identity", "purpose", "summary", "temporal"]

## Get Annotations

### Set annotation Preference
- Set whether to include news article headings or not
- If news headings are included, define separator (heading1 + separator + sentence1)
- Set whether to include timestamp of article
- If timestamp is used, define separator

In [5]:
annotation_with_news_title = True
annotation_title_separator = ". "
annotation_with_timestamp = True
annotation_timestamp_separator = " "

### Get Additional Doc Meta data (timestamp)

In [6]:
if annotation_with_timestamp:
    doc_df = pd.read_csv("./malte-candidates/meta-output.docs.tsv", sep="\t")
    doc_df = doc_df.set_index("doc_id")
    display(doc_df)

Unnamed: 0_level_0,url,title,categories,timestamp
doc_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
736,https://en.wikinews.org/wiki?curid=736,President of China lunches with Brazilian Pres...,"Politics and conflicts,South America,Asia,Braz...","November 12, 2004"
741,https://en.wikinews.org/wiki?curid=741,Palestinians to elect new president on January 9,"Palestine,Elections,Mahmoud Abbas,Yasser Arafa...","November 14, 2004"
743,https://en.wikinews.org/wiki?curid=743,Brazilian delegation returns from Arafat funeral,"Palestine,Brazil,Politics and conflicts,Middle...","November 13, 2004"
764,https://en.wikinews.org/wiki?curid=764,Hearing begins over David Hookes death,"Australia,Cricket,Crime and law,Oceania","November 15, 2004"
797,https://en.wikinews.org/wiki?curid=797,Brazilian soccer player's mother has been kidn...,"South America,Brazil,Football (soccer),Crime a...","November 15, 2004"
...,...,...,...,...
2909791,https://en.wikinews.org/wiki?curid=2909791,"Mohsen Fakhrizadeh, leader of Iranian nuclear ...","Iran,Asia,Middle East,Nuclear technology,Obitu...",2020-11-30T21:59:34Z
2909805,https://en.wikinews.org/wiki?curid=2909805,Former US national security advisor Michael Fl...,"United States,North America,Crime and law,Poli...",2020-11-28T22:49:40Z
2909818,https://en.wikinews.org/wiki?curid=2909818,"Wikinews interviews Sandra Jephcott, Sustainab...","Australia,Elections,Climate change,COVID-19,Qu...",2020-11-27T15:33:59Z
2909884,https://en.wikinews.org/wiki?curid=2909884,"Wikinews interviews Craig Farquharson, Liberal...","Australia,Elections,Queensland,Democracy,Polit...","November 27, 2020"


In [7]:
#doc_df.loc[741]

In [8]:
def get_timestamp_from_doc(doc_id):
    global doc_df
    try: 
        item = doc_df.loc[int(doc_id)]
        return item.timestamp
    except (TypeError, ValueError, KeyError) as e:
        print(f"Cannot find Doc #{doc_id}: {e}")
        return ""

### Read Annotations

In [9]:
def read_annotations(tmp_cas, controlPairList = []):
    global labels, annotation_with_news_title, annotation_title_separator, annotation_with_timestamp, annotation_timestamp_separator
    origin = []
    target = []
    label = []
    for sentence in tmp_cas.select('webanno.custom.Sentence'):
        for token in tmp_cas.select_covered('webanno.custom.SentenceRelation', sentence):
            # Only use annotated data
            if token.label != "unset":
                # Check for redundant pairs
                uid = f"g{token.Governor.sent_id}_d_{token.Dependent.sent_id}"
                if uid not in controlPairList:
                    origin_string = ""
                    target_string = ""
                    label.append(labels.index(token.label))
                    # Also add meta data: title
                    if annotation_with_news_title:
                        origin_string += token.Governor.title + annotation_title_separator
                        target_string += token.Dependent.title + annotation_title_separator
                    # Add Sentences
                    origin_string += token.Governor.get_covered_text()
                    target_string += token.Dependent.get_covered_text()
                    # Also add meta data: timestamp
                    if annotation_with_timestamp:
                        origin_string += annotation_timestamp_separator + get_timestamp_from_doc(token.Governor.doc_id)
                        target_string += annotation_timestamp_separator + get_timestamp_from_doc(token.Dependent.doc_id)
                    # Add String to list
                    origin.append(origin_string)
                    target.append(target_string)
                    controlPairList.append(uid)
                    #print('Dependent: ' + token.Dependent.get_*covered_text())
                    #print('Governor: ' + token.Governor.get_covered_text())
                    #print('Label: ' + token.label)
                    #print('')
    return origin, target, label, controlPairList


## Combine CAS Systems

In [10]:
def combined_cas_read(cas_list):
    origin = []
    target = []
    label = []
    # control list of pairs to not add redundant pairs
    controlPairList = []
    for cas in cas_list:
        origin_tmp, target_tmp, label_tmp, controlPairList_tmp = read_annotations(cas, controlPairList)
        origin += origin_tmp
        target += target_tmp
        label += label_tmp
        controlPairList += controlPairList_tmp
    return origin, target, label

origin, target, label = combined_cas_read(cas)

## Split validation, test & train data

### Set Random Seeds for reproducability

In [11]:
def set_seed(seed_number: int):
    global seed
    seed = seed_number
    np.random.seed(seed_number)
    torch.manual_seed(seed_number)
    torch.cuda.manual_seed_all(seed_number)

set_seed(122)

### Split with Seed State

In [12]:
#from sklearn.model_selection import train_test_split
#tmp_train_origin, test_origin, tmp_train_target, test_target, tmp_train_labels, test_labels = train_test_split(origin, target, label, test_size=.15, random_state=seed)
#train_origin, val_origin, train_target, val_target, train_labels, val_labels = train_test_split(tmp_train_origin, tmp_train_target, tmp_train_labels, test_size=.2, random_state=seed)

### k-Fold Split with same distribution

In [13]:
from sklearn.model_selection import StratifiedKFold, train_test_split

# o: origin, t: target, l: label list
def k_fold_train_test_split(o, t, l, random_state=None):
    skf = StratifiedKFold(n_splits=4, shuffle=True, random_state=random_state)
    # Empty dataset (not needed)
    empty_X = np.zeros(len(l))
    skf.get_n_splits(empty_X, l)
    origin_array = np.array(o)
    target_array = np.array(t)
    label_array = np.array(l)
    # Store all k-folds
    k_fold_origin = []
    k_fold_target = []
    k_fold_labels = []
    for train_index, test_index in skf.split(empty_X, l):
        #print("TRAIN:", train_index, "TEST:", test_index)
        train_origin, test_origin = origin_array[train_index], origin_array[test_index]
        train_target, test_target = target_array[train_index], target_array[test_index]
        train_labels, test_labels = label_array[train_index], label_array[test_index]
        k_fold_origin.append([train_origin.tolist(), test_origin.tolist()])
        k_fold_target.append([train_target.tolist(), test_target.tolist()])
        k_fold_labels.append([train_labels.tolist(), test_labels.tolist()])
    return k_fold_origin, k_fold_target, k_fold_labels

train_origin, test_origin, train_target, test_target, train_labels, test_labels = train_test_split(origin, target, label,random_state=seed, stratify=label)

#tmp_train_origin, val_origin, tmp_train_target, val_target, tmp_train_labels, val_labels = train_test_split(origin, target, label, random_state=seed)
#train_origin, test_origin, train_target, test_target, train_labels, test_labels = train_test_split(tmp_train_origin, tmp_train_target, tmp_train_labels,random_state=seed)
k_fold_origin, k_fold_target, k_fold_labels = k_fold_train_test_split(train_origin, train_target, train_labels,random_state=seed)

### Split Information

In [14]:
print("Dataset Size \n   train: {0} \n   test: {1} ".format(len(train_origin), len(test_origin)))

Dataset Size 
   train: 3168 
   test: 1056 


In [15]:
def get_label_statistics(label_list):
    global labels
    count = [0] * len(labels)
    total = 0
    # total numbers
    for l in label_list:
        total = total + 1
        count[l] = count[l] + 1
    # display percentages
    if total > 0:
        for i in range(len(count)):
            count[i] = str(count[i]) + " (" + str(round(count[i] * 100 / total, 2)) + "%)"
    return count

def print_label_statistics(*results, names=None):
    global labels
    df = pd.DataFrame(results, columns=labels, index=names)
    display(df)
    
    
print_label_statistics(
    get_label_statistics(train_labels),
    get_label_statistics(test_labels),
    #get_label_statistics(val_labels),
    #names = ["train", "test", "val"],
    names = ["train", "test"]
)


Unnamed: 0,none,attribution,causal,conditional,contrast,description,equivalence,fulfillment,identity,purpose,summary,temporal
train,2072 (65.4%),16 (0.51%),279 (8.81%),35 (1.1%),104 (3.28%),29 (0.92%),204 (6.44%),24 (0.76%),53 (1.67%),14 (0.44%),12 (0.38%),326 (10.29%)
test,690 (65.34%),5 (0.47%),93 (8.81%),12 (1.14%),35 (3.31%),10 (0.95%),68 (6.44%),8 (0.76%),17 (1.61%),5 (0.47%),4 (0.38%),109 (10.32%)


## Get Metric from MNLI Glue Task

In [16]:
#from datasets import load_metric
#metric = load_metric('glue', 'mnli')
#metric_name = "accuracy"

### Create Metric Function

In [17]:
#import numpy as np
#def compute_metrics(eval_pred):
#    predictions, labels = eval_pred
#    predictions = np.argmax(predictions, axis=1)
#    return metric.compute(predictions=predictions, references=labels)

## New Metric 

In [18]:
from sklearn.metrics import classification_report
import collections

#classification_threshold = 0.

def flatten(d, parent_key='', sep='__'):
    items = []
    for k, v in d.items():
        new_key = parent_key + sep + k if parent_key else k
        if isinstance(v, collections.MutableMapping):
            items.extend(flatten(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

def compute_metrics(eval_pred):
    global labels
    predictions, true_labels = eval_pred
    # take most probable guess
    predictions = np.argmax(predictions, axis=-1)
    return flatten(classification_report(
        y_true=true_labels,
        y_pred=predictions,
        target_names=labels,
        zero_division=0,
        output_dict=True))

In [19]:
#TEST
#flatten(classification_report(
#    y_true=[0,1,2,3,4,5,6,7,8,9,10,11,12],
#    y_pred=[0,0,0,1,3,0,0,0,0,0,0,0,0],
#    target_names=labels,
#    zero_division=0,
#    output_dict=True))

## Model Settings

In [20]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

model_checkpoint = 'bert-base-uncased'
batch_size = 8

args = TrainingArguments(
    "semantic-test",
    evaluation_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=5,
    weight_decay=0.01,
    load_best_model_at_end=True,
#    metric_for_best_model=metric_name,
)

## Tokenize

In [21]:
from transformers import BertTokenizerFast
tokenizer = BertTokenizerFast.from_pretrained(model_checkpoint)

In [22]:
# k-fold
def tokenize_train_val(k_fold_origin, k_fold_target):
    train_encodings = []
    val_encodings = []
    for i in range(len(k_fold_origin)):
        train_encodings.append(tokenizer(k_fold_origin[i][0], k_fold_target[i][0], truncation=True, padding=True, return_token_type_ids=True))
        val_encodings.append(tokenizer(k_fold_origin[i][1], k_fold_target[i][1], truncation=True, padding=True, return_token_type_ids=True))
    return train_encodings, val_encodings

train_encodings, val_encodings = tokenize_train_val(k_fold_origin, k_fold_target)


test_encodings = tokenizer(test_origin, test_target, truncation=True, padding=True, return_token_type_ids=True)

## Print Examples

In [23]:
#train_encodings

In [24]:
def show_random_elements(origin_list, target_list, label_list, encodings, num_examples=10):
    global labels
    assert num_examples <= len(origin_list), "Can't pick more elements than there are in the dataset."
    picks = []
    for _ in range(num_examples):
        pick = random.randint(0, len(origin_list)-1)
        while pick in picks:
            pick = random.randint(0, len(origin_list)-1)
        picks.append(pick)
    data = []
    for n in picks:
        data.append([n, origin_list[n], labels[label_list[n]], target_list[n], encodings.input_ids[n], encodings.token_type_ids[n], encodings.attention_mask[n]])
    df = pd.DataFrame(data, columns=['index', 'Origin', 'Label', 'Target', 'Input_ids', 'Token_type_ids', 'Attention_mask'])
    display(HTML(df.to_html()))

In [25]:
# show_random_elements(train_origin, train_target, train_labels, train_encodings)
# Output adjustet to Folds
show_random_elements(k_fold_origin[0][0], k_fold_target[0][0], k_fold_labels[0][0], train_encodings[0])

Unnamed: 0,index,Origin,Label,Target,Input_ids,Token_type_ids,Attention_mask
0,2068,"Egypt opens border crossing with Gaza. Israel has raised concerns about the opening of the border, claiming that weapons will be smuggled through. May 28, 2011",none,"Palestinians to elect new president on January 9. Futtuh, head of the Palestinian parliament, was sworn in hours after the death of Yasser Arafat on Thursday, and Palestinian Basic Law dictates that he may only serve up to two months before elections are held. November 14, 2004","[101, 5279, 7480, 3675, 5153, 2007, 14474, 1012, 3956, 2038, 2992, 5936, 2055, 1996, 3098, 1997, 1996, 3675, 1010, 6815, 2008, 4255, 2097, 2022, 20673, 11533, 2083, 1012, 2089, 2654, 1010, 2249, 102, 21524, 2000, 11322, 2047, 2343, 2006, 2254, 1023, 1012, 11865, 4779, 27225, 1010, 2132, 1997, 1996, 9302, 3323, 1010, 2001, 10741, 1999, 2847, 2044, 1996, 2331, 1997, 8038, 18116, 19027, 27753, 2006, 9432, 1010, 1998, 9302, 3937, 2375, 4487, 25572, 4570, 2008, 2002, 2089, 2069, 3710, 2039, 2000, 2048, 2706, 2077, 3864, 2024, 2218, 1012, 2281, 2403, 1010, 2432, 102, 0, 0, 0, 0, 0, 0, 0, ...]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, ...]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, ...]"
1,315,"President of China lunches with Brazilian President. Lunch was a traditional Brazilian barbecue with different kinds of meat. November 12, 2004",none,"Brazilian President meets President of Colombia. Brazilian President Luiz Inácio Lula da Silva travelled to the Colombian city of Leticia, in the triborder region, where he met with Colombian President Alvaro Uribe. November 3, 2002","[101, 2343, 1997, 2859, 6265, 2229, 2007, 6142, 2343, 1012, 6265, 2001, 1037, 3151, 6142, 26375, 2007, 2367, 7957, 1997, 6240, 1012, 2281, 2260, 1010, 2432, 102, 6142, 2343, 6010, 2343, 1997, 7379, 1012, 6142, 2343, 11320, 10993, 27118, 9793, 11320, 2721, 4830, 11183, 7837, 2000, 1996, 13598, 2103, 1997, 2292, 24108, 1010, 1999, 1996, 13012, 12821, 4063, 2555, 1010, 2073, 2002, 2777, 2007, 13598, 2343, 24892, 24471, 20755, 1012, 2281, 1017, 1010, 2526, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]"
2,1161,"Massive flooding in China; over 550 thousand evacuated. Overall, 40 rivers have risen above safe limits, stressing dykes and dams including those on China's largest waterway, the Yangtze River, which is flooding downstream while experiencing a severe drought near its source in the north. April 17, 2011",none,"Much of inland New South Wales, Australia affected by flooding. Like many river crossings in inland New South Wales, this low-level bridge in Bathurst is closed due to flooding. December 5, 2010","[101, 5294, 9451, 1999, 2859, 1025, 2058, 13274, 4595, 13377, 1012, 3452, 1010, 2871, 5485, 2031, 13763, 2682, 3647, 6537, 1010, 6911, 2075, 22212, 2015, 1998, 17278, 2164, 2216, 2006, 2859, 1005, 1055, 2922, 23668, 1010, 1996, 8675, 23102, 2314, 1010, 2029, 2003, 9451, 13248, 2096, 13417, 1037, 5729, 14734, 2379, 2049, 3120, 1999, 1996, 2167, 1012, 2258, 2459, 1010, 2249, 102, 2172, 1997, 9514, 2047, 2148, 3575, 1010, 2660, 5360, 2011, 9451, 1012, 2066, 2116, 2314, 20975, 1999, 9514, 2047, 2148, 3575, 1010, 2023, 2659, 1011, 2504, 2958, 1999, 21897, 2003, 2701, 2349, 2000, 9451, 1012, 2285, 1019, 1010, ...]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...]"
3,264,"2007 Rugby World Cup: England 36 - 20 Tonga. At the interval England led 19-10 after Paul Sackey scored two tries and Jonny Wilkinson added another penalty and a drop goal. September 28, 2007",temporal,"2007 Rugby World Cup: South Africa 37 - 20 Fiji. Francois Steyn opened the scoring with a penalty for South Africa, and Jaque Fourie and John Smit scored tries. October 7, 2007","[101, 2289, 4043, 2088, 2452, 1024, 2563, 4029, 1011, 2322, 20188, 1012, 2012, 1996, 13483, 2563, 2419, 2539, 1011, 2184, 2044, 2703, 12803, 3240, 3195, 2048, 5363, 1998, 26937, 16237, 2794, 2178, 6531, 1998, 1037, 4530, 3125, 1012, 2244, 2654, 1010, 2289, 102, 2289, 4043, 2088, 2452, 1024, 2148, 3088, 4261, 1011, 2322, 11464, 1012, 8173, 26261, 6038, 2441, 1996, 4577, 2007, 1037, 6531, 2005, 2148, 3088, 1010, 1998, 14855, 4226, 2176, 2666, 1998, 2198, 15488, 4183, 3195, 5363, 1012, 2255, 1021, 1010, 2289, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]"
4,1775,"Alberta premier Ralph Klein joke outrages Liberal MP Belinda Stronach. I've worked with this gentleman for 17 years, and have never been treated with anything more than the utmost respect. October 27, 2006",temporal,"Canadian Liberal Gerard Kennedy to run in Parkdale-High Park. The NDP's Cheri DiNovo also took the provincial riding, formerly held by Kennedy, in a by-election last year. February 5, 2007","[101, 7649, 4239, 6798, 12555, 8257, 19006, 2015, 4314, 6131, 24574, 2358, 26788, 2818, 1012, 1045, 1005, 2310, 2499, 2007, 2023, 10170, 2005, 2459, 2086, 1010, 1998, 2031, 2196, 2042, 5845, 2007, 2505, 2062, 2084, 1996, 27917, 4847, 1012, 2255, 2676, 1010, 2294, 102, 3010, 4314, 11063, 5817, 2000, 2448, 1999, 2380, 5634, 1011, 2152, 2380, 1012, 1996, 21915, 1005, 1055, 24188, 2072, 22412, 6767, 2036, 2165, 1996, 4992, 5559, 1010, 3839, 2218, 2011, 5817, 1010, 1999, 1037, 2011, 1011, 2602, 2197, 2095, 1012, 2337, 1019, 1010, 2289, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]"
5,819,"Palestinians to elect new president on January 9. The Haaretz had initially reported that former prime minister Mahmoud Abbas was selected by the Fatah central committee as their candidate for president, but Abbas has denied this, saying, the matter is still being discussed. November 14, 2004",none,"Egypt seals border with Gaza after 11 days. He said he would be heading home to Gaza later in the day, but once the border crossing is officially reopened, he wants to return to Egypt legally, allowing him freedom of movement. February 3, 2008","[101, 21524, 2000, 11322, 2047, 2343, 2006, 2254, 1023, 1012, 1996, 5292, 12069, 5753, 2018, 3322, 2988, 2008, 2280, 3539, 2704, 27278, 17532, 2001, 3479, 2011, 1996, 6638, 4430, 2430, 2837, 2004, 2037, 4018, 2005, 2343, 1010, 2021, 17532, 2038, 6380, 2023, 1010, 3038, 1010, 1996, 3043, 2003, 2145, 2108, 6936, 1012, 2281, 2403, 1010, 2432, 102, 5279, 13945, 3675, 2007, 14474, 2044, 2340, 2420, 1012, 2002, 2056, 2002, 2052, 2022, 5825, 2188, 2000, 14474, 2101, 1999, 1996, 2154, 1010, 2021, 2320, 1996, 3675, 5153, 2003, 3985, 11882, 1010, 2002, 4122, 2000, 2709, 2000, 5279, 10142, 1010, 4352, 2032, 4071, ...]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...]"
6,1516,"New BP oil spill plan. Two relief oil wells are also currently under construction for what is hoped will be a permanent solution to the oil spill. May 25, 2010",none,"Philippines ferry disaster: ship owner sues over toxic cargo; recovery deadline set & more. Only the small amount of oil in the base tank may leak but oil spill containment booms are already in place should this occur. July 7, 2008","[101, 2047, 17531, 3514, 14437, 2933, 1012, 2048, 4335, 3514, 7051, 2024, 2036, 2747, 2104, 2810, 2005, 2054, 2003, 5113, 2097, 2022, 1037, 4568, 5576, 2000, 1996, 3514, 14437, 1012, 2089, 2423, 1010, 2230, 102, 5137, 7115, 7071, 1024, 2911, 3954, 9790, 2015, 2058, 11704, 6636, 1025, 7233, 15117, 2275, 1004, 2062, 1012, 2069, 1996, 2235, 3815, 1997, 3514, 1999, 1996, 2918, 4951, 2089, 17271, 2021, 3514, 14437, 29174, 8797, 2015, 2024, 2525, 1999, 2173, 2323, 2023, 5258, 1012, 2251, 1021, 1010, 2263, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...]"
7,368,"Sinn Fein expels Denis Donaldson for spying. Sinn Fein has expel its former head of administration, Denis Donaldson, after he confessed to being a British spy since 1980. 2019-09-11T03:05:17Z",none,"Northern Ireland's Ian Paisley to resign and retire. Jim Allister, a former DUP member who resigned over the power-sharing agreement with Sinn Fein and formed his own party, the Traditional Unionist Voice, gave his word, It has been quite clear for some time that Ian Paisley was going to be hung out to dry by the DUP pragmatists. March 4, 2008","[101, 26403, 27132, 4654, 11880, 2015, 11064, 23164, 2005, 22624, 1012, 26403, 27132, 2038, 4654, 11880, 2049, 2280, 2132, 1997, 3447, 1010, 11064, 23164, 1010, 2044, 2002, 14312, 2000, 2108, 1037, 2329, 8645, 2144, 3150, 1012, 10476, 1011, 5641, 1011, 2340, 2102, 2692, 2509, 1024, 5709, 1024, 2459, 2480, 102, 2642, 3163, 1005, 1055, 4775, 23321, 2000, 12897, 1998, 11036, 1012, 3958, 2035, 12911, 1010, 1037, 2280, 4241, 2361, 2266, 2040, 5295, 2058, 1996, 2373, 1011, 6631, 3820, 2007, 26403, 27132, 1998, 2719, 2010, 2219, 2283, 1010, 1996, 3151, 17104, 2376, 1010, 2435, 2010, 2773, 1010, 2009, 2038, 2042, 3243, ...]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...]"
8,1503,"Mugabe says he's open to talks with Zimbabwe opposition. The day before Zimbabwe's presidential runoff is to be held, President Robert Mugabe says he is open to discussion with the Movement for Democratic Change (MDC), the opposition party that has decided to boycott what they and other African leaders have called a sham election. June 26, 2008",none,"Zimbabwean police arrest two opposition figures. The Movement for Democratic Change broke into two factions in 2005, following a dispute over whether to contest Senate elections. May 16, 2008","[101, 14757, 16336, 2758, 2002, 1005, 1055, 2330, 2000, 7566, 2007, 11399, 4559, 1012, 1996, 2154, 2077, 11399, 1005, 1055, 4883, 19550, 2003, 2000, 2022, 2218, 1010, 2343, 2728, 14757, 16336, 2758, 2002, 2003, 2330, 2000, 6594, 2007, 1996, 2929, 2005, 3537, 2689, 1006, 9108, 2278, 1007, 1010, 1996, 4559, 2283, 2008, 2038, 2787, 2000, 17757, 2054, 2027, 1998, 2060, 3060, 4177, 2031, 2170, 1037, 25850, 2602, 1012, 2238, 2656, 1010, 2263, 102, 11399, 2319, 2610, 6545, 2048, 4559, 4481, 1012, 1996, 2929, 2005, 3537, 2689, 3631, 2046, 2048, 13815, 1999, 2384, 1010, 2206, 1037, 7593, 2058, 3251, 2000, 5049, ...]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...]"
9,1249,"Palestinians to elect new president on January 9. New leadership could prove to be the key to revitalizing the peace process in the Middle East, as both Israel and the United States had refused to work with Arafat. November 14, 2004",contrast,"Former US president Carter meets with former deputy-PM for Hamas. Jimmy Carter's view of the forces at work in the Middle East and how he likes to attribute blame and responsibility is so warped to my way of thinking that I'm skeptical of any initiative he undertakes, said Howard Berman. April 15, 2008","[101, 21524, 2000, 11322, 2047, 2343, 2006, 2254, 1023, 1012, 2047, 4105, 2071, 6011, 2000, 2022, 1996, 3145, 2000, 7065, 18400, 6026, 1996, 3521, 2832, 1999, 1996, 2690, 2264, 1010, 2004, 2119, 3956, 1998, 1996, 2142, 2163, 2018, 4188, 2000, 2147, 2007, 19027, 27753, 1012, 2281, 2403, 1010, 2432, 102, 2280, 2149, 2343, 5708, 6010, 2007, 2280, 4112, 1011, 7610, 2005, 22129, 1012, 5261, 5708, 1005, 1055, 3193, 1997, 1996, 2749, 2012, 2147, 1999, 1996, 2690, 2264, 1998, 2129, 2002, 7777, 2000, 17961, 7499, 1998, 5368, 2003, 2061, 25618, 2000, 2026, 2126, 1997, 3241, 2008, 1045, 1005, 1049, 18386, 1997, ...]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...]","[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...]"


## Create Dataset

In [26]:
class SemanticDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# k-Folds Datasets
def create_train_val_dataset(k_train_encodings, k_val_encodings, k_labels):
    train_datasets = []
    val_datasets = []
    for i in range(len(k_train_encodings)):
        train_datasets.append(SemanticDataset(k_train_encodings[i], k_labels[i][0]))
        val_datasets.append(SemanticDataset(k_val_encodings[i], k_labels[i][1]))
    return train_datasets, val_datasets

train_datasets, val_datasets = create_train_val_dataset(train_encodings, val_encodings, k_fold_labels)

test_dataset = SemanticDataset(test_encodings, test_labels)

# Model

## Create Model

In [27]:
num_labels = len(labels)
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

## Create Trainer

In [28]:
validation_key = "validation_matched"

trainers = []

for i in range(len(train_datasets)):
    trainers.append(
        Trainer(
            model,
            args,
            train_dataset=train_datasets[i],
            eval_dataset=val_datasets[i],
            tokenizer=tokenizer,
            compute_metrics=compute_metrics
        )
    )


## Train

In [29]:
for trainer in trainers:
    trainer.train()

Epoch,Training Loss,Validation Loss,None Precision,None Recall,None F1-score,None Support,Attribution Precision,Attribution Recall,Attribution F1-score,Attribution Support,Causal Precision,Causal Recall,Causal F1-score,Causal Support,Conditional Precision,Conditional Recall,Conditional F1-score,Conditional Support,Contrast Precision,Contrast Recall,Contrast F1-score,Contrast Support,Description Precision,Description Recall,Description F1-score,Description Support,Equivalence Precision,Equivalence Recall,Equivalence F1-score,Equivalence Support,Fulfillment Precision,Fulfillment Recall,Fulfillment F1-score,Fulfillment Support,Identity Precision,Identity Recall,Identity F1-score,Identity Support,Purpose Precision,Purpose Recall,Purpose F1-score,Purpose Support,Summary Precision,Summary Recall,Summary F1-score,Summary Support,Temporal Precision,Temporal Recall,Temporal F1-score,Temporal Support,Accuracy,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support
1,No log,1.043265,0.722302,0.969112,0.8277,518,0.0,0.0,0.0,4,0.0,0.0,0.0,70,0.0,0.0,0.0,9,0.0,0.0,0.0,26,0.0,0.0,0.0,7,0.230769,0.235294,0.23301,51,0.0,0.0,0.0,6,0.0,0.0,0.0,13,0.0,0.0,0.0,3,0.0,0.0,0.0,3,0.244444,0.134146,0.173228,82,0.662879,0.099793,0.111546,0.102828,792,0.512584,0.662879,0.574289,792
2,1.091900,0.948541,0.846011,0.880309,0.862819,518,0.0,0.0,0.0,4,0.347826,0.114286,0.172043,70,0.0,0.0,0.0,9,0.0,0.0,0.0,26,0.0,0.0,0.0,7,0.27907,0.470588,0.350365,51,0.0,0.0,0.0,6,0.0,0.0,0.0,13,0.0,0.0,0.0,3,0.0,0.0,0.0,3,0.305556,0.536585,0.389381,82,0.671717,0.148205,0.166814,0.147884,792,0.633674,0.671717,0.642401,792
3,1.091900,1.013106,0.86064,0.882239,0.871306,518,0.0,0.0,0.0,4,0.297872,0.2,0.239316,70,0.0,0.0,0.0,9,0.0,0.0,0.0,26,0.0,0.0,0.0,7,0.27451,0.54902,0.366013,51,0.0,0.0,0.0,6,0.0,0.0,0.0,13,0.0,0.0,0.0,3,0.0,0.0,0.0,3,0.383929,0.52439,0.443299,82,0.684343,0.151413,0.179637,0.159995,792,0.646648,0.684343,0.660487,792
4,0.705800,1.043713,0.844485,0.901544,0.872082,518,0.0,0.0,0.0,4,0.358974,0.4,0.378378,70,0.0,0.0,0.0,9,0.0,0.0,0.0,26,0.0,0.0,0.0,7,0.333333,0.529412,0.409091,51,0.0,0.0,0.0,6,0.0,0.0,0.0,13,0.0,0.0,0.0,3,0.0,0.0,0.0,3,0.4375,0.426829,0.432099,82,0.703283,0.164524,0.188149,0.174304,792,0.650816,0.703283,0.6749,792
5,0.705800,1.08264,0.834225,0.903475,0.86747,518,0.0,0.0,0.0,4,0.328125,0.3,0.313433,70,0.0,0.0,0.0,9,0.0,0.0,0.0,26,0.0,0.0,0.0,7,0.337662,0.509804,0.40625,51,0.0,0.0,0.0,6,0.0,0.0,0.0,13,0.0,0.0,0.0,3,0.0,0.0,0.0,3,0.388889,0.426829,0.406977,82,0.694444,0.157408,0.178342,0.166177,792,0.636625,0.694444,0.663359,792


  if isinstance(v, collections.MutableMapping):


Epoch,Training Loss,Validation Loss,None Precision,None Recall,None F1-score,None Support,Attribution Precision,Attribution Recall,Attribution F1-score,Attribution Support,Causal Precision,Causal Recall,Causal F1-score,Causal Support,Conditional Precision,Conditional Recall,Conditional F1-score,Conditional Support,Contrast Precision,Contrast Recall,Contrast F1-score,Contrast Support,Description Precision,Description Recall,Description F1-score,Description Support,Equivalence Precision,Equivalence Recall,Equivalence F1-score,Equivalence Support,Fulfillment Precision,Fulfillment Recall,Fulfillment F1-score,Fulfillment Support,Identity Precision,Identity Recall,Identity F1-score,Identity Support,Purpose Precision,Purpose Recall,Purpose F1-score,Purpose Support,Summary Precision,Summary Recall,Summary F1-score,Summary Support,Temporal Precision,Temporal Recall,Temporal F1-score,Temporal Support,Accuracy,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support
1,No log,0.640395,0.958984,0.947876,0.953398,518,0.0,0.0,0.0,4,0.514706,0.507246,0.510949,69,0.0,0.0,0.0,9,0.0,0.0,0.0,26,0.0,0.0,0.0,7,0.378378,0.823529,0.518519,51,0.0,0.0,0.0,6,0.0,0.0,0.0,13,0.0,0.0,0.0,4,0.0,0.0,0.0,3,0.633663,0.780488,0.699454,82,0.79798,0.207144,0.254928,0.223527,792,0.762028,0.79798,0.773883,792
2,0.765400,0.600744,0.962672,0.945946,0.954236,518,0.0,0.0,0.0,4,0.529412,0.521739,0.525547,69,0.0,0.0,0.0,9,0.0,0.0,0.0,26,0.0,0.0,0.0,7,0.381818,0.823529,0.521739,51,0.0,0.0,0.0,6,0.0,0.0,0.0,13,0.0,0.0,0.0,4,0.0,0.0,0.0,3,0.628571,0.804878,0.705882,82,0.800505,0.208539,0.258008,0.225617,792,0.765415,0.800505,0.776576,792
3,0.765400,0.607339,0.931818,0.949807,0.940727,518,0.0,0.0,0.0,4,0.519481,0.57971,0.547945,69,0.0,0.0,0.0,9,0.0,0.0,0.0,26,0.0,0.0,0.0,7,0.363636,0.862745,0.511628,51,0.0,0.0,0.0,6,0.0,0.0,0.0,13,0.0,0.0,0.0,4,0.0,0.0,0.0,3,0.69697,0.560976,0.621622,82,0.785354,0.209325,0.246103,0.218493,792,0.750281,0.785354,0.760316,792
4,0.507500,0.606354,0.933837,0.953668,0.943649,518,0.0,0.0,0.0,4,0.505155,0.710145,0.590361,69,0.0,0.0,0.0,9,0.0,0.0,0.0,26,0.0,0.0,0.0,7,0.446809,0.823529,0.57931,51,0.0,0.0,0.0,6,0.0,0.0,0.0,13,0.0,0.0,0.0,4,0.0,0.0,0.0,3,0.680556,0.597561,0.636364,82,0.800505,0.213863,0.257075,0.22914,792,0.75401,0.800505,0.771807,792
5,0.507500,0.592688,0.954724,0.936293,0.945419,518,0.0,0.0,0.0,4,0.539326,0.695652,0.607595,69,0.5,0.111111,0.181818,9,0.0,0.0,0.0,26,0.0,0.0,0.0,7,0.43299,0.823529,0.567568,51,0.0,0.0,0.0,6,0.0,0.0,0.0,13,0.0,0.0,0.0,4,0.0,0.0,0.0,3,0.617021,0.707317,0.659091,82,0.800505,0.253672,0.272825,0.246791,792,0.768862,0.800505,0.77813,792


Epoch,Training Loss,Validation Loss,None Precision,None Recall,None F1-score,None Support,Attribution Precision,Attribution Recall,Attribution F1-score,Attribution Support,Causal Precision,Causal Recall,Causal F1-score,Causal Support,Conditional Precision,Conditional Recall,Conditional F1-score,Conditional Support,Contrast Precision,Contrast Recall,Contrast F1-score,Contrast Support,Description Precision,Description Recall,Description F1-score,Description Support,Equivalence Precision,Equivalence Recall,Equivalence F1-score,Equivalence Support,Fulfillment Precision,Fulfillment Recall,Fulfillment F1-score,Fulfillment Support,Identity Precision,Identity Recall,Identity F1-score,Identity Support,Purpose Precision,Purpose Recall,Purpose F1-score,Purpose Support,Summary Precision,Summary Recall,Summary F1-score,Summary Support,Temporal Precision,Temporal Recall,Temporal F1-score,Temporal Support,Accuracy,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support
1,No log,0.363408,0.996,0.96139,0.978389,518,0.0,0.0,0.0,4,0.875,0.8,0.835821,70,0.333333,0.125,0.181818,8,0.5,0.038462,0.071429,26,0.0,0.0,0.0,8,0.559524,0.921569,0.696296,51,0.0,0.0,0.0,6,0.413793,0.923077,0.571429,13,0.0,0.0,0.0,4,0.0,0.0,0.0,3,0.7,0.950617,0.806283,81,0.873737,0.364804,0.393343,0.345122,792,0.862954,0.873737,0.854638,792
2,0.440600,0.295214,0.988506,0.996139,0.992308,518,0.0,0.0,0.0,4,0.895522,0.857143,0.875912,70,0.0,0.0,0.0,8,0.75,0.346154,0.473684,26,0.0,0.0,0.0,8,0.594937,0.921569,0.723077,51,0.0,0.0,0.0,6,0.5,0.769231,0.606061,13,0.0,0.0,0.0,4,0.0,0.0,0.0,3,0.829545,0.901235,0.863905,81,0.902778,0.379876,0.399289,0.377912,792,0.881651,0.902778,0.88684,792
3,0.440600,0.291419,0.994208,0.994208,0.994208,518,0.0,0.0,0.0,4,0.821918,0.857143,0.839161,70,0.428571,0.375,0.4,8,0.727273,0.307692,0.432432,26,0.0,0.0,0.0,8,0.671642,0.882353,0.762712,51,0.0,0.0,0.0,6,0.55,0.846154,0.666667,13,0.0,0.0,0.0,4,0.0,0.0,0.0,3,0.791667,0.938272,0.858757,81,0.906566,0.41544,0.433402,0.412828,792,0.884344,0.906566,0.890541,792
4,0.259200,0.26349,0.994231,0.998069,0.996146,518,0.0,0.0,0.0,4,0.825,0.942857,0.88,70,0.5,0.5,0.5,8,0.777778,0.538462,0.636364,26,0.0,0.0,0.0,8,0.818182,0.882353,0.849057,51,0.0,0.0,0.0,6,0.52,1.0,0.684211,13,0.0,0.0,0.0,4,0.0,0.0,0.0,3,0.870588,0.91358,0.891566,81,0.925505,0.442148,0.481277,0.453112,792,0.904026,0.925505,0.912327,792
5,0.259200,0.272809,0.996139,0.996139,0.996139,518,0.0,0.0,0.0,4,0.901408,0.914286,0.907801,70,0.428571,0.375,0.4,8,0.882353,0.576923,0.697674,26,0.0,0.0,0.0,8,0.746032,0.921569,0.824561,51,0.0,0.0,0.0,6,0.481481,1.0,0.65,13,0.0,0.0,0.0,4,0.0,0.0,0.0,3,0.853933,0.938272,0.894118,81,0.926768,0.440826,0.476849,0.447524,792,0.907757,0.926768,0.913904,792


Epoch,Training Loss,Validation Loss,None Precision,None Recall,None F1-score,None Support,Attribution Precision,Attribution Recall,Attribution F1-score,Attribution Support,Causal Precision,Causal Recall,Causal F1-score,Causal Support,Conditional Precision,Conditional Recall,Conditional F1-score,Conditional Support,Contrast Precision,Contrast Recall,Contrast F1-score,Contrast Support,Description Precision,Description Recall,Description F1-score,Description Support,Equivalence Precision,Equivalence Recall,Equivalence F1-score,Equivalence Support,Fulfillment Precision,Fulfillment Recall,Fulfillment F1-score,Fulfillment Support,Identity Precision,Identity Recall,Identity F1-score,Identity Support,Purpose Precision,Purpose Recall,Purpose F1-score,Purpose Support,Summary Precision,Summary Recall,Summary F1-score,Summary Support,Temporal Precision,Temporal Recall,Temporal F1-score,Temporal Support,Accuracy,Macro avg Precision,Macro avg Recall,Macro avg F1-score,Macro avg Support,Weighted avg Precision,Weighted avg Recall,Weighted avg F1-score,Weighted avg Support
1,No log,0.14843,0.994242,1.0,0.997113,518,0.0,0.0,0.0,4,0.957746,0.971429,0.964539,70,0.666667,0.666667,0.666667,9,0.894737,0.653846,0.755556,26,0.0,0.0,0.0,7,0.862069,0.980392,0.917431,51,0.0,0.0,0.0,6,0.482759,1.0,0.651163,14,0.0,0.0,0.0,3,0.0,0.0,0.0,3,0.941176,0.987654,0.963855,81,0.950758,0.483283,0.521666,0.493027,792,0.932174,0.950758,0.938945,792
2,0.243400,0.18655,0.996132,0.994208,0.995169,518,0.0,0.0,0.0,4,0.969231,0.9,0.933333,70,0.8,0.444444,0.571429,9,0.636364,0.538462,0.583333,26,0.0,0.0,0.0,7,0.735294,0.980392,0.840336,51,0.5,0.166667,0.25,6,0.466667,1.0,0.636364,14,0.0,0.0,0.0,3,0.5,0.333333,0.4,3,0.962963,0.962963,0.962963,81,0.934343,0.547221,0.526706,0.514411,792,0.926921,0.934343,0.926271,792
3,0.243400,0.127979,0.996154,1.0,0.998073,518,0.0,0.0,0.0,4,0.944444,0.971429,0.957746,70,0.857143,0.666667,0.75,9,0.846154,0.846154,0.846154,26,0.75,0.428571,0.545455,7,0.959184,0.921569,0.94,51,0.4,0.333333,0.363636,6,0.636364,1.0,0.777778,14,0.0,0.0,0.0,3,0.5,0.333333,0.4,3,0.941176,0.987654,0.963855,81,0.960859,0.652552,0.624059,0.628558,792,0.953341,0.960859,0.955676,792
4,0.132600,0.131616,0.998069,0.998069,0.998069,518,0.0,0.0,0.0,4,0.985714,0.985714,0.985714,70,0.75,0.666667,0.705882,9,0.884615,0.884615,0.884615,26,1.0,0.428571,0.6,7,0.924528,0.960784,0.942308,51,0.333333,0.166667,0.222222,6,0.608696,1.0,0.756757,14,0.0,0.0,0.0,3,1.0,0.333333,0.5,3,0.91954,0.987654,0.952381,81,0.963384,0.700375,0.617673,0.628996,792,0.956951,0.963384,0.9573,792
5,0.132600,0.123004,0.996146,0.998069,0.997107,518,0.0,0.0,0.0,4,0.985714,0.985714,0.985714,70,0.875,0.777778,0.823529,9,0.884615,0.884615,0.884615,26,1.0,0.142857,0.25,7,0.907407,0.960784,0.933333,51,0.5,0.333333,0.4,6,0.56,1.0,0.717949,14,0.0,0.0,0.0,3,1.0,0.333333,0.5,3,0.963855,0.987654,0.97561,81,0.963384,0.722728,0.617012,0.622321,792,0.960946,0.963384,0.957372,792


## Evaluate

### Train Dataset

In [30]:
# Run Evaluation on Dataset with all trainers and return k-fold index of highest accuracy
def evaluations_and_format(trainers, datasets):
    result = []
    for i in range(len(datasets)):
        result.append(trainers[i].evaluate(datasets[i]))
    df = pd.DataFrame(result)
    display(HTML(df.to_html()))
    return df['eval_accuracy'].idxmax()
    
evaluations_and_format(trainers, train_datasets)

Unnamed: 0,eval_loss,eval_none__precision,eval_none__recall,eval_none__f1-score,eval_none__support,eval_attribution__precision,eval_attribution__recall,eval_attribution__f1-score,eval_attribution__support,eval_causal__precision,eval_causal__recall,eval_causal__f1-score,eval_causal__support,eval_conditional__precision,eval_conditional__recall,eval_conditional__f1-score,eval_conditional__support,eval_contrast__precision,eval_contrast__recall,eval_contrast__f1-score,eval_contrast__support,eval_description__precision,eval_description__recall,eval_description__f1-score,eval_description__support,eval_equivalence__precision,eval_equivalence__recall,eval_equivalence__f1-score,eval_equivalence__support,eval_fulfillment__precision,eval_fulfillment__recall,eval_fulfillment__f1-score,eval_fulfillment__support,eval_identity__precision,eval_identity__recall,eval_identity__f1-score,eval_identity__support,eval_purpose__precision,eval_purpose__recall,eval_purpose__f1-score,eval_purpose__support,eval_summary__precision,eval_summary__recall,eval_summary__f1-score,eval_summary__support,eval_temporal__precision,eval_temporal__recall,eval_temporal__f1-score,eval_temporal__support,eval_accuracy,eval_macro avg__precision,eval_macro avg__recall,eval_macro avg__f1-score,eval_macro avg__support,eval_weighted avg__precision,eval_weighted avg__recall,eval_weighted avg__f1-score,eval_weighted avg__support,eval_runtime,eval_samples_per_second,epoch
0,0.071928,0.998714,0.999356,0.999035,1554,1.0,0.083333,0.153846,12,0.995215,0.995215,0.995215,209,0.793103,0.884615,0.836364,26,0.949367,0.961538,0.955414,78,1.0,0.5,0.666667,22,0.967742,0.980392,0.974026,153,0.8125,0.722222,0.764706,18,0.714286,1.0,0.833333,40,0.857143,0.545455,0.666667,11,0.5,0.666667,0.571429,9,0.987805,0.995902,0.991837,244,0.980219,0.881323,0.777891,0.784045,2376,0.982696,0.980219,0.978068,2376,18.6915,127.117,5.0
1,0.07161,0.998714,0.999356,0.999035,1554,0.666667,0.333333,0.444444,12,0.995238,0.995238,0.995238,210,0.884615,0.884615,0.884615,26,0.9375,0.961538,0.949367,78,1.0,0.5,0.666667,22,0.961538,0.980392,0.970874,153,0.857143,0.666667,0.75,18,0.714286,1.0,0.833333,40,0.833333,0.5,0.625,10,0.6,0.666667,0.631579,9,0.987805,0.995902,0.991837,244,0.981061,0.869737,0.790309,0.811832,2376,0.981902,0.981061,0.979743,2376,18.7505,126.716,5.0
2,0.069776,0.998714,0.999356,0.999035,1554,0.6,0.25,0.352941,12,0.995215,0.995215,0.995215,209,0.851852,0.851852,0.851852,27,0.949367,0.961538,0.955414,78,1.0,0.52381,0.6875,21,0.961783,0.986928,0.974194,153,0.8125,0.722222,0.764706,18,0.754717,1.0,0.860215,40,0.833333,0.5,0.625,10,0.636364,0.777778,0.7,9,0.987854,0.995918,0.99187,245,0.981902,0.865142,0.797052,0.813162,2376,0.98203,0.981902,0.98042,2376,18.7485,126.73,5.0
3,0.045155,1.0,1.0,1.0,1554,0.666667,0.333333,0.444444,12,1.0,1.0,1.0,209,0.827586,0.923077,0.872727,26,0.975,1.0,0.987342,78,1.0,0.681818,0.810811,22,0.993464,0.993464,0.993464,153,0.941176,0.888889,0.914286,18,0.866667,1.0,0.928571,39,1.0,0.727273,0.842105,11,0.533333,0.888889,0.666667,9,1.0,1.0,1.0,245,0.989899,0.900324,0.869729,0.871701,2376,0.990786,0.989899,0.989398,2376,18.7577,126.668,5.0


3

### Validation Dataset

In [31]:
best_fold_idx = evaluations_and_format(trainers, val_datasets)

Unnamed: 0,eval_loss,eval_none__precision,eval_none__recall,eval_none__f1-score,eval_none__support,eval_attribution__precision,eval_attribution__recall,eval_attribution__f1-score,eval_attribution__support,eval_causal__precision,eval_causal__recall,eval_causal__f1-score,eval_causal__support,eval_conditional__precision,eval_conditional__recall,eval_conditional__f1-score,eval_conditional__support,eval_contrast__precision,eval_contrast__recall,eval_contrast__f1-score,eval_contrast__support,eval_description__precision,eval_description__recall,eval_description__f1-score,eval_description__support,eval_equivalence__precision,eval_equivalence__recall,eval_equivalence__f1-score,eval_equivalence__support,eval_fulfillment__precision,eval_fulfillment__recall,eval_fulfillment__f1-score,eval_fulfillment__support,eval_identity__precision,eval_identity__recall,eval_identity__f1-score,eval_identity__support,eval_purpose__precision,eval_purpose__recall,eval_purpose__f1-score,eval_purpose__support,eval_summary__precision,eval_summary__recall,eval_summary__f1-score,eval_summary__support,eval_temporal__precision,eval_temporal__recall,eval_temporal__f1-score,eval_temporal__support,eval_accuracy,eval_macro avg__precision,eval_macro avg__recall,eval_macro avg__f1-score,eval_macro avg__support,eval_weighted avg__precision,eval_weighted avg__recall,eval_weighted avg__f1-score,eval_weighted avg__support,eval_runtime,eval_samples_per_second,epoch
0,0.042685,1.0,1.0,1.0,518,0.6,0.75,0.666667,4,1.0,1.0,1.0,70,1.0,0.888889,0.941176,9,0.962963,1.0,0.981132,26,1.0,0.714286,0.833333,7,0.980769,1.0,0.990291,51,1.0,0.833333,0.909091,6,0.928571,1.0,0.962963,13,1.0,0.666667,0.8,3,0.75,1.0,0.857143,3,1.0,1.0,1.0,82,0.992424,0.935192,0.904431,0.911816,792,0.993406,0.992424,0.992335,792,6.3328,125.064,5.0
1,0.043639,1.0,1.0,1.0,518,0.0,0.0,0.0,4,1.0,1.0,1.0,69,0.727273,0.888889,0.8,9,1.0,1.0,1.0,26,1.0,0.714286,0.833333,7,1.0,1.0,1.0,51,0.857143,1.0,0.923077,6,0.928571,1.0,0.962963,13,1.0,0.75,0.857143,4,0.5,1.0,0.666667,3,1.0,1.0,1.0,82,0.989899,0.834416,0.862765,0.836932,792,0.987702,0.989899,0.988029,792,6.3559,124.608,5.0
2,0.049141,1.0,1.0,1.0,518,1.0,0.25,0.4,4,1.0,1.0,1.0,70,0.8,1.0,0.888889,8,0.962963,1.0,0.981132,26,1.0,0.625,0.769231,8,1.0,0.980392,0.990099,51,1.0,0.833333,0.909091,6,0.764706,1.0,0.866667,13,1.0,0.75,0.857143,4,0.4,0.666667,0.5,3,1.0,1.0,1.0,81,0.987374,0.910639,0.842116,0.846854,792,0.990629,0.987374,0.986767,792,5.4998,144.004,5.0
3,0.123004,0.996146,0.998069,0.997107,518,0.0,0.0,0.0,4,0.985714,0.985714,0.985714,70,0.875,0.777778,0.823529,9,0.884615,0.884615,0.884615,26,1.0,0.142857,0.25,7,0.907407,0.960784,0.933333,51,0.5,0.333333,0.4,6,0.56,1.0,0.717949,14,0.0,0.0,0.0,3,1.0,0.333333,0.5,3,0.963855,0.987654,0.97561,81,0.963384,0.722728,0.617012,0.622321,792,0.960946,0.963384,0.957372,792,5.1012,155.259,5.0


### Test Dataset

In [32]:
evaluations_and_format(trainers, [test_dataset, test_dataset, test_dataset, test_dataset])

Unnamed: 0,eval_loss,eval_none__precision,eval_none__recall,eval_none__f1-score,eval_none__support,eval_attribution__precision,eval_attribution__recall,eval_attribution__f1-score,eval_attribution__support,eval_causal__precision,eval_causal__recall,eval_causal__f1-score,eval_causal__support,eval_conditional__precision,eval_conditional__recall,eval_conditional__f1-score,eval_conditional__support,eval_contrast__precision,eval_contrast__recall,eval_contrast__f1-score,eval_contrast__support,eval_description__precision,eval_description__recall,eval_description__f1-score,eval_description__support,eval_equivalence__precision,eval_equivalence__recall,eval_equivalence__f1-score,eval_equivalence__support,eval_fulfillment__precision,eval_fulfillment__recall,eval_fulfillment__f1-score,eval_fulfillment__support,eval_identity__precision,eval_identity__recall,eval_identity__f1-score,eval_identity__support,eval_purpose__precision,eval_purpose__recall,eval_purpose__f1-score,eval_purpose__support,eval_summary__precision,eval_summary__recall,eval_summary__f1-score,eval_summary__support,eval_temporal__precision,eval_temporal__recall,eval_temporal__f1-score,eval_temporal__support,eval_accuracy,eval_macro avg__precision,eval_macro avg__recall,eval_macro avg__f1-score,eval_macro avg__support,eval_weighted avg__precision,eval_weighted avg__recall,eval_weighted avg__f1-score,eval_weighted avg__support,eval_runtime,eval_samples_per_second,epoch
0,1.357155,0.883285,0.888406,0.885838,690,0.0,0.0,0.0,5,0.611111,0.591398,0.601093,93,0.166667,0.083333,0.111111,12,0.404762,0.485714,0.441558,35,0.166667,0.1,0.125,10,0.779661,0.676471,0.724409,68,0.0,0.0,0.0,8,0.590909,0.764706,0.666667,17,0.0,0.0,0.0,5,0.0,0.0,0.0,4,0.455285,0.513761,0.482759,109,0.75947,0.338196,0.341982,0.336536,1056,0.754566,0.75947,0.756043,1056,5.9218,178.324,5.0
1,1.357155,0.883285,0.888406,0.885838,690,0.0,0.0,0.0,5,0.611111,0.591398,0.601093,93,0.166667,0.083333,0.111111,12,0.404762,0.485714,0.441558,35,0.166667,0.1,0.125,10,0.779661,0.676471,0.724409,68,0.0,0.0,0.0,8,0.590909,0.764706,0.666667,17,0.0,0.0,0.0,5,0.0,0.0,0.0,4,0.455285,0.513761,0.482759,109,0.75947,0.338196,0.341982,0.336536,1056,0.754566,0.75947,0.756043,1056,5.9281,178.135,5.0
2,1.357155,0.883285,0.888406,0.885838,690,0.0,0.0,0.0,5,0.611111,0.591398,0.601093,93,0.166667,0.083333,0.111111,12,0.404762,0.485714,0.441558,35,0.166667,0.1,0.125,10,0.779661,0.676471,0.724409,68,0.0,0.0,0.0,8,0.590909,0.764706,0.666667,17,0.0,0.0,0.0,5,0.0,0.0,0.0,4,0.455285,0.513761,0.482759,109,0.75947,0.338196,0.341982,0.336536,1056,0.754566,0.75947,0.756043,1056,5.9579,177.243,5.0
3,1.357155,0.883285,0.888406,0.885838,690,0.0,0.0,0.0,5,0.611111,0.591398,0.601093,93,0.166667,0.083333,0.111111,12,0.404762,0.485714,0.441558,35,0.166667,0.1,0.125,10,0.779661,0.676471,0.724409,68,0.0,0.0,0.0,8,0.590909,0.764706,0.666667,17,0.0,0.0,0.0,5,0.0,0.0,0.0,4,0.455285,0.513761,0.482759,109,0.75947,0.338196,0.341982,0.336536,1056,0.754566,0.75947,0.756043,1056,5.9385,177.823,5.0


0

## Prediction

In [33]:
# Take best trainer
trainer = trainers[best_fold_idx]

In [34]:
prediction_output = trainer.predict(test_dataset)

### Prediction metrics

In [35]:
df = pd.DataFrame([prediction_output.metrics])
display(df.T)

Unnamed: 0,0
test_loss,1.357155
test_none__precision,0.883285
test_none__recall,0.888406
test_none__f1-score,0.885838
test_none__support,690.0
test_attribution__precision,0.0
test_attribution__recall,0.0
test_attribution__f1-score,0.0
test_attribution__support,5.0
test_causal__precision,0.611111


### Prediction Examples

In [36]:
def example_prediction_output(result, origin_list, target_list, label_list, success=True, max_number=10):
    global labels
    # Real Relation
    data = []
    for n in range(len(origin_list)):
        data.append([origin_list[n], target_list[n], labels[label_list[n]]])
    df_real = pd.DataFrame(data, columns=['Origin', 'Target', 'Label'])
    # Predicted Label
    label_data = []
    for n in result.label_ids:
        i = np.argmax(result.predictions[n], axis=-1)
        label_data.append([labels[i]])
    df_pred_labels = pd.DataFrame(label_data, columns=["Prediction"])
    # Prediction Metrics
    df_predictions = pd.DataFrame(result.predictions, columns=labels)
    df_predictions = df_predictions.add_prefix("Pred_")
    df = pd.concat([df_real, df_pred_labels, df_predictions], axis=1)
    # Filter complete output to show example
    if success:
        # If possible only show examples with right predicted label that is not none
        df_tmp = df[(df.Label == df.Prediction) & (df.Label != "none")]
        if(df_tmp.size == 0):
            # if there is no match without label "none", print the "none"-matches
            df = df[(df.Label == df.Prediction)]
        else:
            df = df_tmp
    else:
        # If possible only show examples with right predicted label that is not none
        df_tmp = df[(df.Label != df.Prediction) & (df.Label != "none") & (df.Prediction != "none")]
        if(df_tmp.size == 0):
            df_tmp = df[(df.Label != df.Prediction) & ((df.Label != "none") | (df.Prediction != "none"))]
            if(df_tmp.size == 0):
                df = df[df.Label != df.Prediction]
            else:
                df = df_tmp
        else:
            df = df_tmp
    display(HTML(df.head(max_number).to_html()))

#### Successes

In [37]:
example_prediction_output(prediction_output, test_origin, test_target, test_labels)

Unnamed: 0,Origin,Target,Label,Prediction,Pred_none,Pred_attribution,Pred_causal,Pred_conditional,Pred_contrast,Pred_description,Pred_equivalence,Pred_fulfillment,Pred_identity,Pred_purpose,Pred_summary,Pred_temporal
0,"Israel begins ground assault on Gaza Strip. The objective of this stage is to destroy the terrorist infrastructure of the Hamas in the area of operation, while taking control of some of rocket launching area used by the Hamas, in order to greatly reduce the quantity of rockets fired at Israel and Israeli civilians, said a spokesman for the Israeli Ministry of Foreign Affairs in a statement on their website. January 4, 2008","Wail of sirens marks Holocaust Remembrance Day in Israel. Other speakers at Yad Vashem emphasized the importance of the Israel Defense Forces, Israel's military forces, to prevent tragedies such as the Holocaust in the future. January 9, 2008",none,none,9.889198,-1.208641,3.038765,-1.489986,0.955281,-1.859512,-1.218817,-1.329232,-3.3398,-0.069723,-1.23921,0.767364
1,"On the campaign trail in the USA, October 2020. In opening the debate, Carroll introduced himself as pro-life and said climate change must be addressed. October 29, 2020","On the campaign trail in the USA, September 2016. Her list of scheduled campaign events for the remainder of October does not bring her to North Dakota or anywhere near. August 16, 2015",none,none,10.948295,-0.648215,0.273543,-1.018713,0.062701,-1.183368,-0.840184,-0.405012,-3.13213,0.213033,-0.472391,0.716071
2,"Massive ice deposits found on Mars. Some sections of the ice deposits are up to 2.3 - 2.5 miles deep. March 15, 2007","NASA's Spitzer space telescope views alien worlds. More observations from Spitzer could provide information about the planets winds and atmospheric compositions. March 22, 2005",none,none,10.524177,-1.142731,1.540061,-1.72969,0.575975,-1.498401,-0.640231,-1.354847,-3.180449,-0.440539,-1.178906,0.195397
3,"Rupert Grint, Harry Potter's ""Ron Weasley"", recovers from H1N1 swine flu virus. The New Zealand director of public health said the swine flu will be a mild illness but in some instances the infection can cause more severe illness and, in a few tragic instances, death. July 5, 2009","International experts probe deadly Ebola Reston virus outbreak in Philippine pigs. Global animal and health authorities' emergency mission to the Philippines is investigating whether the strain of deadly Reston ebolavirus, has never caused human illness or death, and it's not immediately clear there is a public-health issue. September 16, 2007",none,none,11.277282,-0.901875,0.707094,-1.375629,0.330771,-1.388958,-0.815127,-0.87975,-3.127187,-0.068816,-0.772198,0.335841
4,"Canadian inspectors to test food ingredients from China. You wouldn't normally expect to find those compounds in pet food, and hence nobody was really looking for it, said John Melichercik, director of analytical laboratory services. April 21, 2007","Pupils fed through gates, school criticised. Schoolchildren at Rawmarsh Comprehensive School in South Yorkshire, England, are being fed fish and chips by their parents through the school gates at lunchtimes because parents do not believe their children are being given enough choice of food at lunchtime. September 15, 2006",none,none,9.979913,-1.452835,0.311616,-1.795963,0.704983,-1.355395,0.074032,-1.158063,-2.782577,-0.680394,-1.404697,0.259361
7,"Palestinians to elect new president on January 9. The Haaretz had initially reported that former prime minister Mahmoud Abbas was selected by the Fatah central committee as their candidate for president, but Abbas has denied this, saying, the matter is still being discussed. November 14, 2004","Cheney meets with Israeli and Palestinian leaders. Cheney met earlier in the day with Shimon Peres, the Israeli president. March 23, 2008",none,none,-0.44043,-2.183033,-0.586653,0.195107,-2.964717,-0.160543,0.033032,0.161396,-1.676032,-1.835143,-2.17039,8.183244
8,"ICRC: 28 bodies, 19 children, pulled from rubble after Israeli airstrike, Qana. She said We have repeatedly urged Israel to act proportionately, Pope Benedict said In the name of God, I call on all those responsible for this spiral of violence so that weapons are immediately laid down on all sides. July 30, 2006","Palestinians to elect new president on January 9. The Haaretz had initially reported that former prime minister Mahmoud Abbas was selected by the Fatah central committee as their candidate for president, but Abbas has denied this, saying, the matter is still being discussed. November 14, 2004",none,none,11.19988,-1.065791,1.163333,-1.212839,0.226483,-1.506371,-1.033806,-0.876065,-3.393178,-0.053584,-0.94733,0.947858
11,"Bloggers investigate social networking websites. MySpace's CEO Chris DeWolfe and its most prominent spokesperson Tom Anderson first met in 1999 at Xdrive Technologies, a dot-com startup which gave away free storage space to Internet users. January 10, 2005","'There's been another murder': UK's Wright Stuff presenter apologises for teen murder comments. Angered viewers as well as Liam Aitchison's father complained on the Channel 5 website. December 5, 2011",none,none,11.09447,-0.72615,0.438307,-1.080673,0.020978,-1.286138,-0.880392,-0.442065,-3.243506,0.127608,-0.594771,0.916142
12,"Pakistani PM: Anti-Taliban offensive in South Waziristan is over. The United Nations reported that about 40,000 of people have fled the region, and need humanitarian aid. December 12, 2009","Palestinians to elect new president on January 9. New leadership could prove to be the key to revitalizing the peace process in the Middle East, as both Israel and the United States had refused to work with Arafat. November 14, 2004",none,none,10.781116,-0.614143,0.291794,-0.998212,0.316421,-1.246172,-0.923802,-0.265232,-3.222421,0.313797,-0.459552,0.709992
14,"Many US TV stations preparing to make digital switch despite new legislation. Logo for the DTV transition before the DTV Delay Act. During these challenging economic times, the needs of American consumers are a top priority of my administration, President Obama said in a statement about the DTV Delay Act. Millions of Americans, including those in our most vulnerable communities, would have been left in the dark if the conversion had gone on as planned, and this solution is an important step forward as we work to get the nation ready for digital TV. February 11, 2009","Indian Ocean tsunami, one year after. Efforts to implement a warning system similar to that coordinated by the Hawaii have past a number of significant milestones. December 26, 2005",none,none,11.199415,-0.866654,1.063985,-1.260006,0.314398,-1.364293,-1.116893,-0.836855,-3.249065,0.089241,-0.698113,0.440318


#### Fails

In [38]:
example_prediction_output(prediction_output, test_origin, test_target, test_labels, False)

Unnamed: 0,Origin,Target,Label,Prediction,Pred_none,Pred_attribution,Pred_causal,Pred_conditional,Pred_contrast,Pred_description,Pred_equivalence,Pred_fulfillment,Pred_identity,Pred_purpose,Pred_summary,Pred_temporal
26,"New book Blown for Good reveals details inside Scientology headquarters. He faced repercussions from Miscavige if he could not meet production quotas. January 25, 2008","Blown for Good author discusses life inside international headquarters of Scientology. This is, the Scientology cult, is I guarantee you, a thousand times more bizarre than you could have ever imagined. November 5, 2009",equivalence,contrast,-2.038034,-0.665065,0.343781,-0.630838,-1.50589,0.500489,0.804771,-2.108115,1.111776,-1.744272,-1.703441,-0.022857
32,"Wikinews interviews Bill Hammons, Unity Party of America presidential nominee. Should the number of justices remain at nine?' ::'Hammons': The only litmus test will be a nominee’s Common Sense desire to reach legal conclusions in keeping with both a strict Originalist interpretation of the Constitution and the best interests of the country. August 31, 2020","Wikinews interviews Phil Collins, U.S. Prohibition Party presidential nominee. Should the number of justices remain at nine?' ::'Collins': I haven't decided whom I'd nominate. September 30, 2020",equivalence,contrast,-3.274367,-1.45122,-3.239599,-2.156454,0.372777,-0.391214,6.356559,-1.017838,1.615751,-2.720227,-1.677299,-1.480731
33,"Hamshahri newspaper plans cartoon response. __NOTOC__ Farid Mortazavi, the paper's graphics editor, said to the Guardian newspaper that The western papers printed these sacrilegious cartoons on the pretext of freedom of expression, so let's see if they mean what they say and also print these Holocaust cartoons. February 14,2006","Jyllands-Posten reconsiders printing holocaust denial cartoons. According to the Associated Press, the Post's culture editor, Flemming Rose had offered to print the planned holocaust-denying cartoons commissioned by Iranian paper Hamshahri as a response to the riot-provoking cartoons depicting the prophet Mohammed. February 7, 2006",fulfillment,temporal,-5.093661,1.977577,1.120206,1.671532,-0.127275,1.824551,-2.382162,0.343666,1.542498,1.400025,1.846285,-2.979157
41,"Palestinians to elect new president on January 9. The Haaretz had initially reported that former prime minister Mahmoud Abbas was selected by the Fatah central committee as their candidate for president, but Abbas has denied this, saying, the matter is still being discussed. November 14, 2004","Palestinian official newspapers: Israel uses super rats against Jerusalem Arabs. The Wafa agency, which is controlled and funded by Palestinian Authority President Mahmoud Abbas's office, has in the past accused Israel of using wild pigs to drive Palestinians out of their homes citing Palestinian 'victims' quoted as saying that they had seen Israelis release herds of wild pigs, which later attacked them. 2014-01-23T00:33:22Z",fulfillment,temporal,-3.271356,-1.268338,0.173267,1.559145,-2.887454,0.326412,-0.893438,1.139564,-1.414716,-1.408485,-1.585231,7.0348
45,"EU maintains ban on Indonesian airlines amid accusations of political motivation. Indonesia had implemented a program to bring state-owned flag carrier Garuda Indonesia up to EU standards, as well as private rivals Mandala Airlines and Airfast. July 26, 2008","Indonesian air industry signs safety deal ahead of EU ban review. A team from the European Aviation Security Agency has inspected Indonesian airports and interviewed the managers of Garuda Indonesia, Mandala Airlines and Air Fast to gauge how far safety has improved. June 29, 2007",equivalence,contrast,-2.741669,-1.416165,-2.464517,-2.66916,1.52818,-0.744573,6.069729,-1.386893,1.327575,-2.424706,-1.804112,-2.334862
62,"US hands Iraq high-profile prisoners. President Obama is no different from Bush, who has Iraqi blood on his hands. 14 July 2010","Obama's 100-day speech warns of U.S.'s problems. that all you had to worry about was Iraq, Afghanistan, North Korea, getting healthcare passed, figuring out how to deal with energy independence, deal with Iran and a pandemic flu, I would take that deal. 2013-12-24T19:08:16Z",equivalence,contrast,-3.363449,-1.687502,-0.948588,-2.157974,1.957691,-1.07168,5.583953,-0.521706,-0.358144,-2.340426,-2.057019,-0.968831
65,"I'll Have Another wins 2012 Preakness Stakes. The winning horse is owned by financier Doug O'Neill. May 6, 2012",I'll Have Another wins 2012 Kentucky Derby. His trainer is Dale Romans and his jockey was Blue Grass Stakes. 2020-09-09T23:46:16Z,description,causal,9.735649,-1.690026,-0.038077,-1.420341,-1.069127,-0.7417,-0.580007,-1.204668,-2.812553,-0.732213,-1.39766,2.894685
84,"Congressional computers continue to be used to vandalize Wikipedia. 143.231.249.141 also added racial slurs and references to gay pedophilia into 's article in February, saying he has participated in the controversial act of 'tabeling', in which he takes a small child, places him upon a table, and 'puts the lord inside him.' In an attempt to find out where the edits were being made and by whom, Wikinews contacted the Electronic Frontier Foundation to receive advice on how to file an information request with the U.S. government under the Freedom of Information Act (FOIA). August 26, 2008","Staffs for US presidential candidates John McCain and Barack Obama caught making questionable edits to Wikipedia. On August 6, 2007 another staff member using a different IP address from Obama's campaign, 208.116.214.67, added Adam Goldfarb is the biggest liar I know to the Wikipedia article Liar. August 23, 2008",equivalence,contrast,-2.739438,-1.857562,0.850071,-1.701322,0.336899,-0.82536,4.284129,-0.910924,-0.970343,-2.486151,-2.490554,0.657129
85,"BP: One oil leak in Gulf of Mexico plugged. The oil company BP has said that one of three leaks from a ruptured oil well in the Gulf of Mexico has been sealed off, in an attempt to stop the oil from the destroyed Deepwater Horizon rig from contaminating the sea further. May 5, 2010","BP lose laptop containing sensitive claimant data. The data on the laptop concerned claimants against the company after one of its oil platforms in the Gulf of Mexico exploded, sending up to 4.9 million barrels of oil into the ocean, which eventually reached the southern U.S. coastline. March 29, 2011",description,causal,6.219705,-1.976684,1.338032,-0.534131,-1.098193,-0.95218,-1.108201,-0.185784,-3.651024,-0.747116,-1.872672,6.117576
92,"Hezbollah and Israel exchange fire for fifth day. Israel has ordered over two million civilians to stay in bomb shelters. July 16, 2006","Hezbollah-Israel war continues for a third day. What Israel is undertaking is an act of aggression and devastation aimed at bringing Lebanon to its knees and subverting it by any means. July 15, 2006",equivalence,contrast,-2.626571,-1.056604,-2.780476,-2.409682,3.496631,-0.818039,5.267318,0.048196,0.561213,-1.671463,-1.11001,-2.279648


### Prediction output

In [39]:
def format_prediction_output(result, origin_list, target_list, label_list):
    global labels
    # Real Relation
    data = []
    for n in range(len(origin_list)):
        data.append([origin_list[n], target_list[n], labels[label_list[n]]])
    df_real = pd.DataFrame(data, columns=['Origin', 'Target', 'Label'])
    # Predicted Label
    label_data = []
    for n in result.label_ids:
        i = np.argmax(result.predictions[n], axis=-1)
        label_data.append([labels[i]])
    df_pred_labels = pd.DataFrame(label_data, columns=["Prediction"])
    # Prediction Metrics
    df_predictions = pd.DataFrame(result.predictions, columns=labels)
    df_predictions = df_predictions.add_prefix("Pred_")
    df = pd.concat([df_real, df_pred_labels, df_predictions], axis=1)
    display(HTML(df.to_html()))
    
format_prediction_output(prediction_output, test_origin, test_target, test_labels)

Unnamed: 0,Origin,Target,Label,Prediction,Pred_none,Pred_attribution,Pred_causal,Pred_conditional,Pred_contrast,Pred_description,Pred_equivalence,Pred_fulfillment,Pred_identity,Pred_purpose,Pred_summary,Pred_temporal
0,"Israel begins ground assault on Gaza Strip. The objective of this stage is to destroy the terrorist infrastructure of the Hamas in the area of operation, while taking control of some of rocket launching area used by the Hamas, in order to greatly reduce the quantity of rockets fired at Israel and Israeli civilians, said a spokesman for the Israeli Ministry of Foreign Affairs in a statement on their website. January 4, 2008","Wail of sirens marks Holocaust Remembrance Day in Israel. Other speakers at Yad Vashem emphasized the importance of the Israel Defense Forces, Israel's military forces, to prevent tragedies such as the Holocaust in the future. January 9, 2008",none,none,9.889198,-1.208641,3.038765,-1.489986,0.955281,-1.859512,-1.218817,-1.329232,-3.3398,-0.069723,-1.23921,0.767364
1,"On the campaign trail in the USA, October 2020. In opening the debate, Carroll introduced himself as pro-life and said climate change must be addressed. October 29, 2020","On the campaign trail in the USA, September 2016. Her list of scheduled campaign events for the remainder of October does not bring her to North Dakota or anywhere near. August 16, 2015",none,none,10.948295,-0.648215,0.273543,-1.018713,0.062701,-1.183368,-0.840184,-0.405012,-3.13213,0.213033,-0.472391,0.716071
2,"Massive ice deposits found on Mars. Some sections of the ice deposits are up to 2.3 - 2.5 miles deep. March 15, 2007","NASA's Spitzer space telescope views alien worlds. More observations from Spitzer could provide information about the planets winds and atmospheric compositions. March 22, 2005",none,none,10.524177,-1.142731,1.540061,-1.72969,0.575975,-1.498401,-0.640231,-1.354847,-3.180449,-0.440539,-1.178906,0.195397
3,"Rupert Grint, Harry Potter's ""Ron Weasley"", recovers from H1N1 swine flu virus. The New Zealand director of public health said the swine flu will be a mild illness but in some instances the infection can cause more severe illness and, in a few tragic instances, death. July 5, 2009","International experts probe deadly Ebola Reston virus outbreak in Philippine pigs. Global animal and health authorities' emergency mission to the Philippines is investigating whether the strain of deadly Reston ebolavirus, has never caused human illness or death, and it's not immediately clear there is a public-health issue. September 16, 2007",none,none,11.277282,-0.901875,0.707094,-1.375629,0.330771,-1.388958,-0.815127,-0.87975,-3.127187,-0.068816,-0.772198,0.335841
4,"Canadian inspectors to test food ingredients from China. You wouldn't normally expect to find those compounds in pet food, and hence nobody was really looking for it, said John Melichercik, director of analytical laboratory services. April 21, 2007","Pupils fed through gates, school criticised. Schoolchildren at Rawmarsh Comprehensive School in South Yorkshire, England, are being fed fish and chips by their parents through the school gates at lunchtimes because parents do not believe their children are being given enough choice of food at lunchtime. September 15, 2006",none,none,9.979913,-1.452835,0.311616,-1.795963,0.704983,-1.355395,0.074032,-1.158063,-2.782577,-0.680394,-1.404697,0.259361
5,"Netscape navigating the World Wide Web no more. The most recent version of the Netscape browser, Navigator 9, was for the most part was a re-skinned version of Firefox developed internally by a small group of people inside AOL. December 28, 2007","Mozilla Foundation rolls out second security update for Firefox browser. The flaw was discovered and reported by Internet Security Systems, Inc. The upgrade was the second major security fix in less than a month for the browser, which has grown in popularity to ten percent of the browser market in the United States. March 1, 2005",causal,none,-0.984384,-0.780878,7.850018,-0.959815,1.948277,-1.086379,-1.786647,-1.547249,-2.415886,-0.593763,-1.623516,0.005313
6,"British soldier dies in Afghanistan. He is the fifth UK soldier to die this month in Afghanistan, and in total, eighteen British troops have died there since operations began. August 11, 2006","British soldier shot and killed in southern Afghanistan. Eight British soldiers have died in Afghanistan this month. August 20, 2006",contrast,none,-1.946303,0.385587,0.658991,-0.695323,6.071516,-0.927039,0.464157,1.504502,-1.152804,0.651811,0.561763,-2.283505
7,"Palestinians to elect new president on January 9. The Haaretz had initially reported that former prime minister Mahmoud Abbas was selected by the Fatah central committee as their candidate for president, but Abbas has denied this, saying, the matter is still being discussed. November 14, 2004","Cheney meets with Israeli and Palestinian leaders. Cheney met earlier in the day with Shimon Peres, the Israeli president. March 23, 2008",none,none,-0.44043,-2.183033,-0.586653,0.195107,-2.964717,-0.160543,0.033032,0.161396,-1.676032,-1.835143,-2.17039,8.183244
8,"ICRC: 28 bodies, 19 children, pulled from rubble after Israeli airstrike, Qana. She said We have repeatedly urged Israel to act proportionately, Pope Benedict said In the name of God, I call on all those responsible for this spiral of violence so that weapons are immediately laid down on all sides. July 30, 2006","Palestinians to elect new president on January 9. The Haaretz had initially reported that former prime minister Mahmoud Abbas was selected by the Fatah central committee as their candidate for president, but Abbas has denied this, saying, the matter is still being discussed. November 14, 2004",none,none,11.19988,-1.065791,1.163333,-1.212839,0.226483,-1.506371,-1.033806,-0.876065,-3.393178,-0.053584,-0.94733,0.947858
9,"Olympic highlights: August 12, 2008. Australian Leisel Jones won the gold medal and set a new Olympic record in the final of the women's 100m breaststroke with a time of 1:05.17 minutes. August 12, 2008","Beijing 2008 Olympics comes to a close. People's Republic of China topped the medals table at the Beijing Olympics with 51 golds while the United States of America came second with 36 and the Russian Federation finished third with 23. August 25, 2008",temporal,none,10.124522,-1.577543,0.876116,-1.560234,-0.454731,-1.506702,-0.462778,-1.088022,-3.341151,-0.721945,-1.643753,3.004012
