# Parameters 

In [1]:
# Model
model_checkpoint = 'bert-large-cased'
batch_size = 1
metric_name = "accuracy"
num_epoch = 10

# Fold
num_folds = 4

# Experiment
labels = ["none", "attribution", "causal", "conditional", "contrast", "description", "equivalence", "fulfillment", "identity", "purpose", "summary", "temporal"]

# Import

In [2]:
import torch
import numpy as np
import random
import pandas as pd
from IPython.display import display, HTML

In [3]:
def import_fold(path, fold):
    train = pd.read_csv(f"{path}/train.{fold}.csv")
    test = pd.read_csv(f"{path}/test.{fold}.csv")
    train_origin = train["origin"].tolist()
    train_target = train["target"].tolist()
    train_labels = train["label"].tolist()
    test_origin = test["origin"].tolist()
    test_target = test["target"].tolist()
    test_labels = test["label"].tolist()
    return train_origin, train_target, train_labels, test_origin, test_target, test_labels

# Model 

## Metric

In [4]:
from sklearn.metrics import classification_report
import collections

#classification_threshold = 0.

def flatten(d, parent_key='', sep='__'):
    items = []
    for k, v in d.items():
        new_key = parent_key + sep + k if parent_key else k
        if isinstance(v, collections.MutableMapping):
            items.extend(flatten(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

def compute_metrics(eval_pred):
    global labels
    predictions, true_labels = eval_pred
    # take most probable guess
    predictions = np.argmax(predictions, axis=-1)
    return flatten(classification_report(
        y_true=true_labels,
        y_pred=predictions,
        target_names=labels,
        zero_division=0,
        output_dict=True))

In [5]:
#TEST
#flatten(classification_report(
#    y_true=[0,1,2,3,4,5,6,7,8,9,10,11,12],
#    y_pred=[0,0,0,1,3,0,0,0,0,0,0,0,0],
#    target_names=labels,
#    zero_division=0,
#    output_dict=True))

## Model Settings

In [6]:
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer

args = TrainingArguments(
    "semantic-test",
    evaluation_strategy = "epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=num_epoch,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=metric_name,
)

## Tokenize

In [7]:
from transformers import BertTokenizerFast, DebertaTokenizerFast
tokenizer = BertTokenizerFast.from_pretrained(model_checkpoint)
#tokenizer = DebertaTokenizerFast.from_pretrained(model_checkpoint)

## Print Examples

In [8]:
#train_encodings

In [9]:
def show_random_elements(origin_list, target_list, label_list, encodings, num_examples=10):
    global labels
    assert num_examples <= len(origin_list), "Can't pick more elements than there are in the dataset."
    picks = []
    for _ in range(num_examples):
        pick = random.randint(0, len(origin_list)-1)
        while pick in picks:
            pick = random.randint(0, len(origin_list)-1)
        picks.append(pick)
    data = []
    for n in picks:
        data.append([n, origin_list[n], labels[label_list[n]], target_list[n], encodings.input_ids[n], encodings.token_type_ids[n], encodings.attention_mask[n]])
    df = pd.DataFrame(data, columns=['index', 'Origin', 'Label', 'Target', 'Input_ids', 'Token_type_ids', 'Attention_mask'])
    display(HTML(df.to_html()))

In [10]:
# show_random_elements(train_origin, train_target, train_labels, train_encodings)
# Output adjustet to Folds
#show_random_elements(k_fold_origin[0][0], k_fold_target[0][0], k_fold_labels[0][0], train_encodings[0])

## Create Dataset

In [11]:
class SemanticDataset(torch.utils.data.Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Model

## Train & Evaluate

In [None]:
result = []
num_labels = len(labels)
models = []

for i in range(num_folds):
    model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)
    # import Fold data
    train_origin, train_target, train_labels, test_origin, test_target, test_labels = import_fold("data/export-umgekehrt", i)
    # tokenize
    train_encodings = tokenizer(train_origin, train_target, truncation=True, padding=True, return_token_type_ids=True)
    test_encodings = tokenizer(test_origin, test_target, truncation=True, padding=True, return_token_type_ids=True)
    # dataset creation
    train_dataset = SemanticDataset(train_encodings, train_labels)
    test_dataset = SemanticDataset(test_encodings, test_labels)
    # create Trainer
    trainer = Trainer(
        model,
        args,
        train_dataset=train_dataset,
        eval_dataset=test_dataset,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )
    # train & evaluate
    trainer.train()
    ev = trainer.evaluate(test_dataset)
    acc = ev["eval_accuracy"]
    print(f"Accuracy: {acc}")
    result.append(ev)
    models.append(trainer)

Some weights of the model checkpoint at bert-large-cased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at 

Epoch,Training Loss,Validation Loss


## Interpret evaluation

### Helper functions

In [None]:
def mean(data):
    """Return the sample arithmetic mean of data."""
    n = len(data)
    if n < 1:
        raise ValueError('mean requires at least one data point')
    return sum(data)/n # in Python 2 use sum(data)/float(n)

def _ss(data):
    """Return sum of square deviations of sequence data."""
    c = mean(data)
    ss = sum((x-c)**2 for x in data)
    return ss

def stddev(data, ddof=0):
    """Calculates the population standard deviation
    by default; specify ddof=1 to compute the sample
    standard deviation."""
    n = len(data)
    if n < 2:
        raise ValueError('variance requires at least two data points')
    ss = _ss(data)
    pvar = ss/(n-ddof)
    return pvar**0.5

### Prepare Data

In [None]:
result

In [None]:

def transform_to_regular_dict(result):
    output_dict = {}
    count = 0
    for eval_item in result:
        for key in eval_item:
            if count == 0:
              output_dict[key] = [float(eval_item[key])]
            else:
              output_dict[key].append(eval_item[key]) 
        count += 1
    return output_dict
            
eval_dict = transform_to_regular_dict(result)
eval_df = pd.DataFrame(eval_dict)

def add_mean_std_row(df):
    row_mean = []
    row_std = []
    for column in df:
        row_mean.append(mean(df[column]))
        row_std.append(stddev(df[column], ddof=1))
    df = df.append(pd.DataFrame([row_mean], columns=df.columns), ignore_index=True)
    df = df.append(pd.DataFrame([row_std], columns=df.columns), ignore_index=True)
    # add better readable Index
    df["fold"] = ["1", "2", "3", "4", "avg", "std"]
    df = df.set_index("fold")
    return df

eval_df = add_mean_std_row(eval_df)
display(HTML(eval_df.to_html()))

# Save

## Model

In [None]:
count = 0
for model in models:
    model.save_model(f"/data/experiments/raring/semantic_storytelling/data/model-umgekehrt/{model_checkpoint.replace(r'/', '-')}/epoch_{num_epoch}/fold_{count}")
    count += 1

## Metrics

In [None]:
eval_df.to_csv(f"data/eval-umgekehrt/{model_checkpoint.replace(r'/', '-')}_epoch_{num_epoch}.csv")