In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from sklearn.metrics import classification_report, r2_score
import pickle

from src.annotator_features import get_most_controversial_annotations, get_annotator_biases, get_text_entropies
from src.train import prepare_dataloader, Classifier, predict
from src.models import Net

import torch
device = torch.device("cpu")

%load_ext autoreload
%autoreload 2

## Data loading

In [None]:
base_path = './data/selected_texts/'

texts_df = pd.read_csv(base_path + 'cawi2_selected_texts.csv', sep=',').iloc[:, 1:].copy()
annotations_df = pd.read_csv(base_path + 'cawi2_selected_annotations.csv', sep=',')
annotators_df = pd.read_csv(base_path + 'cawi2_selected_annotators.csv', sep=',')
folds_df = pd.read_csv(base_path + 'annotator_folds.csv', sep=',')

merged_annotations = texts_df.merge(annotations_df).merge(folds_df).dropna()
merged_annotations = merged_annotations.loc[merged_annotations.annotator_id.isin(annotators_df.identyfikator)].copy()

personal_df = merged_annotations[merged_annotations.split == 'past']

emotion_columns = annotations_df.columns[2:].tolist()

In [None]:
def normlize_annotations(df, max_1=False):
    df = df.copy()
    
    mins = df.loc[:, emotion_columns].values.min(axis=0)
    df.loc[:, emotion_columns] = (df.loc[:, emotion_columns] - mins)

    if max_1:
        maxes = df.loc[:, emotion_columns].values.max(axis=0)
        df.loc[:, emotion_columns] = df.loc[:, emotion_columns] / maxes
            
    return df

## Model embeddngs

In [None]:
from src.embeddings import prepare_embeddings
prepare_embeddings()

language_embeddings = {}
languages = ['english',
 'dutch',
 'french',
 'german',
 'italian',
 'russian',
 'portuguese',
 'spanish']

for language in languages:
    language_embeddings[language] = pickle.load(open(f'./data/multilingual/{language}_xlm_embeddings.p', 'rb'))

## Preprocessing

In [None]:
annotator_features = annotators_df.iloc[:, 1:].fillna('empty')

onehots = []
for col in annotator_features.columns:
    onehot = pd.get_dummies(annotator_features[col]).values
    onehots.append(onehot)
    
annotator_features_onehot = np.hstack(onehots)

In [None]:
annotator_features_onehot.shape

In [None]:
annotation_values_df = merged_annotations.loc[:, emotion_columns].fillna('empty')

class_dims = []
for col in annotation_values_df.columns:
    onehot = pd.get_dummies(annotation_values_df[col]).values
    class_dims.append(onehot.shape[1])

sum(class_dims)

## Reset ids to enumerate from 0  

In [None]:
text_id_idx_dict = texts_df.loc[:, ['text_id']].reset_index().set_index('text_id').to_dict()['index']
annotator_id_idx_dict = annotators_df.loc[:, ['identyfikator']].reset_index().set_index('identyfikator').to_dict()['index']

## Experiments for classification

In [None]:
def get_f1_score_from_results(test_predictions, true_labels, class_dims):
    dims_results = {}
    for cls_idx in range(len(class_dims)):
        start_idx =  sum(class_dims[:cls_idx])
        end_idx =  start_idx + class_dims[cls_idx]
        preds = torch.argmax(test_predictions[:, start_idx:end_idx], dim=1)

        dims_results[cls_idx] = classification_report(true_labels[:, cls_idx].cpu(), preds.cpu(), output_dict=True)

    return dims_results

In [None]:
results = {}

In [None]:
train_df = merged_annotations.loc[merged_annotations.split == 'present'].copy()
dev_df = merged_annotations.loc[merged_annotations.split == 'future1'].copy()
test_df = merged_annotations.loc[merged_annotations.split == 'future2'].copy()

train_df = normlize_annotations(train_df)
dev_df = normlize_annotations(dev_df)
test_df = normlize_annotations(test_df)

for df in [train_df, dev_df, test_df]:
    df['text_idx'] = df['text_id'].apply(lambda w_id: text_id_idx_dict[w_id])
    df['annotator_idx'] = df['annotator_id'].apply(lambda r_id: annotator_id_idx_dict[r_id])

In [None]:
for scenario in ['s0', 's1', 's2', 's3', 's4', 's5']:
    results[scenario] = {}
    for language in languages:
        results[scenario][language] = {}
        
        for fold_num in range(1):
            future1_fold_num = fold_num
            future2_fold_num = (fold_num + 1) % 10

            present_X = train_df.loc[~train_df.fold.isin([future1_fold_num, future2_fold_num]), ['text_idx', 'annotator_idx']].values
            present_y = train_df.loc[~train_df.fold.isin([future1_fold_num, future2_fold_num]), emotion_columns].values

            future1_X = dev_df.loc[dev_df.fold == future1_fold_num, ['text_idx', 'annotator_idx']].values
            future1_y = dev_df.loc[dev_df.fold == future1_fold_num, emotion_columns].values

            future2_X = test_df.loc[test_df.fold == future2_fold_num, ['text_idx', 'annotator_idx']].values
            future2_y = test_df.loc[test_df.fold == future2_fold_num, emotion_columns].values

            # biases for train datset
            filtered_personal_df = personal_df[~personal_df.fold.isin([future1_fold_num, future2_fold_num])]
            filtered_annotations = get_most_controversial_annotations(filtered_personal_df, emotion_columns, None)
            annotator_biases = get_annotator_biases(filtered_annotations, emotion_columns)
            annotator_biases = (pd.DataFrame(annotators_df.loc[:, 'identyfikator'])
                                .merge(annotator_biases, right_on='annotator_id', left_on='identyfikator', how='left')
                                .fillna(0))

            # biases for test dataset
            filtered_annotations = get_most_controversial_annotations(personal_df, emotion_columns, None)
            test_annotator_biases = get_annotator_biases(filtered_annotations, emotion_columns)
            test_annotator_biases = (pd.DataFrame(annotators_df.loc[:, 'identyfikator'])
                                .merge(test_annotator_biases, right_on='annotator_id', left_on='identyfikator', how='left')
                                .fillna(0))

            features = language_embeddings[language], annotator_features_onehot, annotator_biases.iloc[:, 1:].values
            test_features = language_embeddings[language], annotator_features_onehot, test_annotator_biases.iloc[:, 1:].values

            if scenario == 's0':
                s0_predictions = np.tile(present_y.mean(axis=0).round(), (future2_y.shape[0], 1))
                results[scenario][fold_num] = ([classification_report(future2_y[:, i], s0_predictions[:, i], output_dict=True) 
                                                for i in range(future2_y.shape[1])])
            else:
                dataloader = prepare_dataloader(present_X, present_y, features, scenario)
                text_feature_num = next(iter(dataloader))[0].size(-1)
                additional_feature_num = next(iter(dataloader))[1].size(-1)

                classes_num = sum(class_dims)
                model = Net(classes_num, text_feature_num, additional_feature_num).to(device)
                classifer = Classifier(model=model, output_type='onehot', output_dims=class_dims).to(device)

                test_predictions, true_labels = predict(classifer,
                                                        present_X, 
                                                        future1_X, 
                                                        future2_X, 
                                                        present_y, 
                                                        future1_y, 
                                                        future2_y, 
                                                        features,
                                                        test_features,
                                                        scenario,
                                                        epochs=2)

                results[scenario][language][fold_num] = get_f1_score_from_results(test_predictions, true_labels, class_dims)

In [None]:
result_tuples = []
for scenario in results.keys():
    for language in results[scenario].keys():
        for fold_num in results[scenario][language].keys():
            
            f1_result = np.mean([results[scenario][language][fold_num][i]['macro avg']['f1-score'] for i in range(10)])
            result_tuples.append((scenario, language, fold_num, f1_result))
            
results_df = pd.DataFrame(result_tuples)
results_df.columns = ['scenario', 'language', 'fold_num', 'macro f1']
results_df = results_df.groupby(['scenario', 'language'])['macro f1'].mean().reset_index()

results_df = results_df.pivot(index='language', columns='scenario', values='macro f1')
results_df.columns = ['s0 (AVG)', 's1 (TXT)', 's3 (PEB)', 's2 (TXT+DEM)', 's4 (TXT+PEB)', 's5 (TXT+PEB+DEM)']
results_df = results_df.reindex(sorted(results_df.columns), axis=1)

print('Macro F1')
results_df *= 100
results_df

## Experiments for regression

In [None]:
def get_r2_score_from_results(test_predictions, true_labels):
    true_labels = true_labels.cpu().numpy()#[:, i]
    test_predictions = test_predictions.cpu().numpy()#[:, i]
    
    losses = [r2_score(true_labels[:, i], test_predictions[:, i]) for i in range(test_predictions.shape[1])]
    
    return losses

In [None]:
results = {}

In [None]:
train_df = merged_annotations.loc[merged_annotations.split == 'present'].copy()
dev_df = merged_annotations.loc[merged_annotations.split == 'future1'].copy()
test_df = merged_annotations.loc[merged_annotations.split == 'future2'].copy()

train_df = normlize_annotations(train_df, True)
dev_df = normlize_annotations(dev_df, True)
test_df = normlize_annotations(test_df, True)

for df in [train_df, dev_df, test_df]:
    df['text_idx'] = df['text_id'].apply(lambda w_id: text_id_idx_dict[w_id])
    df['annotator_idx'] = df['annotator_id'].apply(lambda r_id: annotator_id_idx_dict[r_id])

In [None]:
for scenario in ['s0', 's1', 's2', 's3', 's4', 's5']:
    results[scenario] = {}
    for language in languages:
        results[scenario][language] = {}
        for fold_num in range(10):
            future1_fold_num = fold_num
            future2_fold_num = (fold_num + 1) % 10

            present_X = train_df.loc[~train_df.fold.isin([future1_fold_num, future2_fold_num]), ['text_idx', 'annotator_idx']].values
            present_y = train_df.loc[~train_df.fold.isin([future1_fold_num, future2_fold_num]), emotion_columns].values

            future1_X = dev_df.loc[dev_df.fold == future1_fold_num, ['text_idx', 'annotator_idx']].values
            future1_y = dev_df.loc[dev_df.fold == future1_fold_num, emotion_columns].values

            future2_X = test_df.loc[test_df.fold == future2_fold_num, ['text_idx', 'annotator_idx']].values
            future2_y = test_df.loc[test_df.fold == future2_fold_num, emotion_columns].values

            filtered_personal_df = personal_df[~personal_df.fold.isin([future1_fold_num, future2_fold_num])]
            filtered_annotations = get_most_controversial_annotations(filtered_personal_df, emotion_columns, None)
            annotator_biases = get_annotator_biases(filtered_annotations, emotion_columns)
            annotator_biases = (pd.DataFrame(annotators_df.loc[:, 'identyfikator'])
                                .merge(annotator_biases, right_on='annotator_id', left_on='identyfikator', how='left')
                                .fillna(0))

            filtered_annotations = get_most_controversial_annotations(personal_df, emotion_columns, None)
            test_annotator_biases = get_annotator_biases(filtered_annotations, emotion_columns)
            test_annotator_biases = (pd.DataFrame(annotators_df.loc[:, 'identyfikator'])
                                .merge(test_annotator_biases, right_on='annotator_id', left_on='identyfikator', how='left')
                                .fillna(0))

            features = language_embeddings[language], annotator_features_onehot, annotator_biases.iloc[:, 1:].values
            test_features = language_embeddings[language], annotator_features_onehot, test_annotator_biases.iloc[:, 1:].values

            if scenario == 's0':
                s0_predictions = torch.tensor(np.tile(present_y.mean(axis=0), (future2_y.shape[0], 1)))
                results[scenario][fold_num] = np.array([r2_score(future2_y[:, i], s0_predictions[:, i]) 
                                                        for i in range(future2_y.shape[1])])

            else:
                dataloader = prepare_dataloader(present_X, present_y, features, scenario)
                text_feature_num = next(iter(dataloader))[0].size(-1)
                additional_feature_num = next(iter(dataloader))[1].size(-1)

                classes_num = 10
                model = Net(classes_num, text_feature_num, additional_feature_num).to(device)
                classifer = Classifier(model=model, output_type='mse', output_dims=None).to(device)

                test_predictions, true_labels = predict(classifer,
                                                        present_X, 
                                                        future1_X, 
                                                        future2_X, 
                                                        present_y, 
                                                        future1_y, 
                                                        future2_y, 
                                                        features,
                                                        test_features,
                                                        scenario,
                                                        epochs=15)

                results[scenario][language][fold_num] = get_r2_score_from_results(test_predictions, true_labels)

In [None]:
result_tuples = []
for scenario in results.keys():
    for language in results[scenario].keys():
        for fold_num in results[scenario][language].keys():
            
            f1_result = np.mean([results[scenario][language][fold_num][i] for i in range(10)])
            result_tuples.append((scenario, language, fold_num, f1_result))
            
results_df = pd.DataFrame(result_tuples)
results_df.columns = ['scenario', 'language', 'fold_num', 'r^2']
results_df = results_df.groupby(['scenario', 'language'])['r^2'].mean().reset_index()

results_df = results_df.pivot(index='language', columns='scenario', values='r^2')
results_df.columns = ['s0', 's1 (TXT)', 's3 (PEB)', 's2 (TXT+DEM)', 's4 (TXT+PEB)', 's5 (TXT+PEB+DEM)']
results_df = results_df.reindex(sorted(results_df.columns), axis=1)

print('R score')
results_df *= 100
results_df.round(2)