In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.metrics import classification_report, r2_score
import pickle

from src.annotator_features import get_most_controversial_annotations, get_annotator_biases, get_text_entropies
from src.train import prepare_dataloader, Classifier, predict
from src.models import Net

import torch
device = torch.device("cpu")

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Data loading

In [22]:
base_path = './data/selected_texts/'

texts_df = pd.read_csv(base_path + 'cawi2_selected_texts.csv', sep=',').iloc[:, 1:].copy()
annotations_df = pd.read_csv(base_path + 'cawi2_selected_annotations.csv', sep=',')
annotators_df = pd.read_csv(base_path + 'cawi2_selected_annotators.csv', sep=',')
folds_df = pd.read_csv(base_path + 'annotator_folds.csv', sep=',')

merged_annotations = texts_df.merge(annotations_df).merge(folds_df).dropna()
#merged_annotations = merged_annotations.loc[merged_annotations.annotator_id.isin(annotators_df.identyfikator)].copy()

personal_df = merged_annotations[merged_annotations.split == 'past']

emotion_columns = annotations_df.columns[2:].tolist()

In [32]:
folds_df

Unnamed: 0,annotator_id,fold
0,384622968,8
1,129936705,7
2,987741290,8
3,662287953,5
4,988028021,4
...,...,...
4705,200703762,3
4706,988443670,8
4707,988446860,3
4708,457756720,9


In [23]:
def normlize_annotations(df, max_1=False):
    df = df.copy()
    
    mins = df.loc[:, emotion_columns].values.min(axis=0)
    df.loc[:, emotion_columns] = (df.loc[:, emotion_columns] - mins)

    if max_1:
        maxes = df.loc[:, emotion_columns].values.max(axis=0)
        df.loc[:, emotion_columns] = df.loc[:, emotion_columns] / maxes
            
    return df

## Model embeddngs

In [24]:
from src.embeddings import prepare_embeddings

prepare_embeddings()

MODEL_NAME = 'herbert'
#MODEL_NAME = 'xlmr'
#MODEL_NAME = 'polish_roberta'

all_embeddings = pickle.load(open(f'./data/{MODEL_NAME}_embeddings.p', 'rb'))

## Preprocessing

In [25]:
annotator_features = annotators_df.iloc[:, 1:].fillna('empty')

onehots = []
for col in annotator_features.columns:
    onehot = pd.get_dummies(annotator_features[col]).values
    onehots.append(onehot)
    
annotator_features_onehot = np.hstack(onehots)

In [26]:
annotator_features_onehot.shape

(8853, 232)

In [27]:
annotation_values_df = merged_annotations.loc[:, emotion_columns].fillna('empty')

class_dims = []
for col in annotation_values_df.columns:
    onehot = pd.get_dummies(annotation_values_df[col]).values
    class_dims.append(onehot.shape[1])

sum(class_dims)

0

## Reset ids to enumerate from 0  

In [None]:
text_id_idx_dict = texts_df.loc[:, ['text_id']].reset_index().set_index('text_id').to_dict()['index']
annotator_id_idx_dict = annotators_df.loc[:, ['identyfikator']].reset_index().set_index('identyfikator').to_dict()['index']

## Experiments for regression

In [None]:
from src.annotator_features import get_most_controversial_annotations, get_annotator_biases, get_random_annotations


In [None]:
def get_r2_score_from_results(test_predictions, true_labels):
    true_labels = true_labels.cpu().numpy()#[:, i]
    test_predictions = test_predictions.cpu().numpy()#[:, i]
    
    losses = [r2_score(true_labels[:, i], test_predictions[:, i]) for i in range(test_predictions.shape[1])]
    
    return losses

In [None]:
results = {}

In [None]:
train_df = merged_annotations.loc[merged_annotations.split == 'present'].copy()
dev_df = merged_annotations.loc[merged_annotations.split == 'future1'].copy()
test_df = merged_annotations.loc[merged_annotations.split == 'future2'].copy()

train_df = normlize_annotations(train_df, True)
dev_df = normlize_annotations(dev_df, True)
test_df = normlize_annotations(test_df, True)

for df in [train_df, dev_df, test_df]:
    df['text_idx'] = df['text_id'].apply(lambda w_id: text_id_idx_dict[w_id])
    df['annotator_idx'] = df['annotator_id'].apply(lambda r_id: annotator_id_idx_dict[r_id])

In [None]:
for scenario in ['s3']:
    results[scenario] = {}
    for annotations_ordering in ['random', 'std']:
        results[scenario][annotations_ordering] = {}
        for num_annotations in range(15):
            results[scenario][annotations_ordering][num_annotations] = {}
            
            if annotations_ordering == 'std':
                filtered_annotations = get_most_controversial_annotations(personal_df, emotion_columns, num_annotations)
            else:
                filtered_annotations = get_random_annotations(personal_df, num_annotations)
                
            annotator_biases = get_annotator_biases(filtered_annotations, emotion_columns)
            annotator_biases = (pd.DataFrame(annotators_df.loc[:, 'identyfikator'])
                                .merge(annotator_biases, right_on='annotator_id', left_on='identyfikator', how='left')
                                .fillna(0))

            for fold_num in range(10):
                future1_fold_num = fold_num
                future2_fold_num = (fold_num + 1) % 10

                present_X = train_df.loc[~train_df.fold.isin([future1_fold_num, future2_fold_num]), ['text_idx', 'annotator_idx']].values
                present_y = train_df.loc[~train_df.fold.isin([future1_fold_num, future2_fold_num]), emotion_columns].values

                future1_X = dev_df.loc[dev_df.fold == future1_fold_num, ['text_idx', 'annotator_idx']].values
                future1_y = dev_df.loc[dev_df.fold == future1_fold_num, emotion_columns].values

                future2_X = test_df.loc[test_df.fold == future2_fold_num, ['text_idx', 'annotator_idx']].values
                future2_y = test_df.loc[test_df.fold == future2_fold_num, emotion_columns].values

                filtered_personal_df = personal_df[~personal_df.fold.isin([future1_fold_num, future2_fold_num])]
                filtered_annotations = get_most_controversial_annotations(filtered_personal_df, emotion_columns, None)
                annotator_biases = get_annotator_biases(filtered_annotations, emotion_columns)
                annotator_biases = (pd.DataFrame(annotators_df.loc[:, 'identyfikator'])
                                    .merge(annotator_biases, right_on='annotator_id', left_on='identyfikator', how='left')
                                    .fillna(0))

                filtered_annotations = get_most_controversial_annotations(personal_df, emotion_columns, None)
                test_annotator_biases = get_annotator_biases(filtered_annotations, emotion_columns)
                test_annotator_biases = (pd.DataFrame(annotators_df.loc[:, 'identyfikator'])
                                    .merge(test_annotator_biases, right_on='annotator_id', left_on='identyfikator', how='left')
                                    .fillna(0))

                features = all_embeddings, annotator_features_onehot, annotator_biases.iloc[:, 1:].values
                test_features = all_embeddings, annotator_features_onehot, test_annotator_biases.iloc[:, 1:].values


                dataloader = prepare_dataloader(present_X, present_y, features, scenario)
                text_feature_num = next(iter(dataloader))[0].size(-1)
                additional_feature_num = next(iter(dataloader))[1].size(-1)

                classes_num = 10
                model = Net(classes_num, text_feature_num, additional_feature_num).to(device)
                classifer = Classifier(model=model, output_type='mse', output_dims=None).to(device)

                test_predictions, true_labels = predict(classifer,
                                                        present_X, 
                                                        future1_X, 
                                                        future2_X, 
                                                        present_y, 
                                                        future1_y, 
                                                        future2_y, 
                                                        features,
                                                        test_features,
                                                        scenario,
                                                        epochs=15)

                results[scenario][annotations_ordering][num_annotations][fold_num] = get_r2_score_from_results(test_predictions, true_labels)

In [None]:
result_tuples = []
for scenario in results.keys():
    for sorting in results[scenario].keys():
        for num_annotations in results[scenario][sorting].keys():
            for fold_num in results[scenario][sorting][num_annotations].keys():
                result_tuples.append((
                    scenario,
                    sorting,
                    num_annotations,
                    fold_num,
                    np.mean(results[scenario][sorting][num_annotations][fold_num])
                ))
results_df = pd.DataFrame(result_tuples)
results_df.columns = ['scenario', 'sorting', 'num_annotations', 'fold_num', 'R^2']

results_df = results_df.groupby(['scenario', 'sorting', 'num_annotations'])['R^2'].mean().reset_index()

results_df['R^2'] = results_df['R^2'] * 100
results_df.loc[results_df.sorting=='std', 'sorting'] = r'$contr^{{std}}$'
results_df = results_df.sort_values(by='sorting')

In [None]:
import seaborn as sns
sns.set_context("poster")
plt.figure(figsize=(15,10))
g =  sns.lineplot(
    data=results_df[results_df.num_annotations > 0],
    x="num_annotations", y="R^2", hue='sorting',
    style='sorting',
    markers=True, dashes=False
)
g.set_xticks(range(15))

baseline_score = results_df[results_df.num_annotations == 0]['R^2'].mean()

ax = g.axes

ax.axhline(baseline_score, ls='--', label='baseline')
plt.legend()
plt.xlabel('Number of annotations')
plt.ylabel('R-squared (%)')

print('S5 herBERT Regression, R^2 score')

In [None]:
english_emotion_columns = ['anticipation',
   'arousal',
   'joy',
   'sadness',
   'fear',
   'disgust',
    'surprise',
    'trust',
    'valence',
    'anger',
   ]

result_tuples = []
for scenario in results.keys():
    for sorting in results[scenario].keys():
        for num_annotations in results[scenario][sorting].keys():
            for fold_num in results[scenario][sorting][num_annotations].keys():
                for emotion_num, result in enumerate(results[scenario][sorting][num_annotations][fold_num]):
                    result_tuples.append((
                        scenario,
                        sorting,
                        num_annotations,
                        fold_num,
                        result,
                        english_emotion_columns[emotion_num]
                    ))
results_df = pd.DataFrame(result_tuples)
results_df.columns = ['scenario', 'sorting', 'num_annotations', 'fold_num', 'r^2', 'emotion'] 
results_df['r^2'] = results_df['r^2'] * 100

In [None]:
plt.figure(figsize=(15,10))
g =  sns.lineplot(
    data=results_df[(results_df.num_annotations > 0) & (results_df.sorting =='random')],
    x="num_annotations", y="r^2", hue='emotion', ci=None,
    style='emotion',
    markers=True, dashes=False
)
g.set_xticks(range(15))


ax = g.axes

for idx, col in enumerate(english_emotion_columns):
    baseline_score = results_df[(results_df.num_annotations == 0) & (results_df.emotion == col)]['r^2'].mean()

    ax.axhline(baseline_score, ls='--', label= ' ', color=sns.color_palette()[idx])

plt.legend(ncol=2)
plt.xlabel('Number of annotations')
plt.ylabel('R-squared (%)')

print('S5 herBERT Regression, R^2 score')