In [7]:
import os
import json
import pandas as pd
import numpy as np
import torch

In [2]:
repo = os.path.dirname(os.path.dirname(os.path.abspath('__file__')))

In [11]:
pred_base = os.path.join(repo, 'predictions', 'roberta-large-mnli_only')
data_base = os.path.join(repo, 'tasks', 'data')

In [15]:
pred_dirs = [
    'baseline_5',
    'LotS_5',
    'LitL_5',
    'mnlieval_baseline_1',
    'anlieval_baseline_1',
    'eval_baseline_1',
]

data_dirs = [
    os.path.join('baseline_5', 'val_round5_base_combined.jsonl'),
    os.path.join('LotS_5', 'val_round5_LotS_combined.jsonl'),
    os.path.join('LitL_5', 'val_round5_LitL_combined.jsonl'),
    os.path.join('mnli_mismatched', 'val_mismatched_mnli.jsonl'),
    os.path.join('anli_combined', 'val_anli.jsonl'),
    os.path.join('iterative_eval', 'val_itercombined.jsonl'),
]

In [13]:
def read_jsonl(file):
    with open(file, 'r') as f:
        return [json.loads(line) for line in f.readlines()]

In [16]:
preds_and_data = {}

for pred, data in zip(pred_dirs, data_dirs):
    temp_pred = torch.load(os.path.join(pred_base, pred, 'val_preds.p'))
    temp_data = read_jsonl(os.path.join(data_base, data))
    preds_and_data[pred.split('_')[0]] = {'pred': temp_pred, 'data':temp_data}

In [24]:
print(type(preds_and_data['baseline']['pred']['mnli']))
print(preds_and_data['baseline']['pred']['mnli']['preds'])

<class 'dict'>
[0 0 0 ... 2 0 0]


In [86]:
def get_acc(
    preds,
    data,
    int2pred={0:'contradiction', 1:'entailment', 2:'neutral'}
):
    df = pd.DataFrame(data)
    df['preds'] = pd.Series(preds).apply(lambda x: int2pred[x])
    df['correct'] = df['label'].eq(df['preds'])
    return df

In [87]:
accs = {
    key: get_acc(val['pred']['mnli']['preds'], val['data']) for key, val in preds_and_data.items()
}

# Overall


In [94]:
for key, acc in accs.items():
    print(f"{key}: {acc['correct'].sum()/acc.shape[0]:.4f}")

baseline: 0.8781
LotS: 0.8323
LitL: 0.8482
mnlieval: 0.8972
anlieval: 0.3366
eval: 0.6811


# HANS non-entailment

In [73]:
hans = accs['eval']
hans = hans.loc[hans['dataset'] == 'hans', :]

In [78]:
tempdict = {'contradiction':'contradiction', 'neutral':'contradiction', 'entailment':'entailment'}
hans['preds'] = hans['preds'].apply(lambda x: tempdict[x])
hans['case'] = hans['case'].apply(lambda x: x[0])
hans['correct'] = hans['label'].eq(hans['preds'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [95]:
for case in hans['case'].unique():
    temp = hans.loc[hans['case'] == case, :]
    temp2 = temp.loc[temp['label'] == 'contradiction', :]
    print(f"{case} non-entailment: {temp2['correct'].sum()/temp2.shape[0]:.4f}")

lexical_overlap non-entailment: 0.9102
subsequence non-entailment: 0.2568
constituent non-entailment: 0.2478
