In [1]:
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
pd.set_option('display.max_colwidth', None)

# 1. Read in the predicted labels

In [2]:
bert = pd.read_csv('../results/classification_results/predicted_labels_testset_bert-base-uncased.tsv', 
                   sep='\t')
roberta = pd.read_csv('../results/classification_results/predicted_labels_testset_roberta-base.tsv', 
                   sep='\t')
xlnet = pd.read_csv('../results/classification_results/predicted_labels_testset_xlnet-base-cased.tsv', 
                   sep='\t')
rf = pd.read_csv('../results/classification_results/predicted_labels_testset_rf.tsv', 
                   sep='\t')

lr = pd.read_csv('../results/classification_results/predicted_labels_testset_lr.tsv', 
                   sep='\t')

**Change the labels to consistent format:**

The white-box models uses the encoding 1/2/3/4 for the four figurative language classes as in the dataset after preprocessing. However, for the black-box models, the encoding had to be changed to 0/1/2/3, because otherwise the transformer library doesn't work.

Now change the encoding for the black-box models to 1/2/3/4.

In [3]:
bert['model'] = 'bert'
roberta['model'] = 'roberta'
xlnet['model'] = 'xlnet'
rf['model'] = 'rf'
lr['model'] = 'lr'

bert['pred'] = bert['pred'] + 1
bert['label'] = bert['label'] + 1

roberta['pred'] = roberta['pred'] + 1
roberta['label'] = roberta['label'] + 1

xlnet['pred'] = xlnet['pred'] + 1
xlnet['label'] = xlnet['label'] + 1

# 2. Get misclassified and correctly classified samples

## 2.1 Misclassified

In [4]:
mis_bert = bert[bert['label'] != bert['pred']]
mis_roberta = roberta[roberta['label'] != roberta['pred']]
mis_xlnet = xlnet[xlnet['label'] != xlnet['pred']]
mis_rf = rf[rf['label'] != rf['pred']]
mis_lr = lr[lr['label'] != lr['pred']]

In [5]:
# number of misclassified examples by each model
print('bert-misclassified:')
print(len(mis_bert))
print('roberta-misclassified:')
print(len(mis_roberta))
print('xlnet-misclassified:')
print(len(mis_xlnet))
print('rf-misclassified:')
print(len(mis_rf))
print('lr-misclassified:')
print(len(mis_lr))

bert-misclassified:
28
roberta-misclassified:
24
xlnet-misclassified:
30
rf-misclassified:
91
lr-misclassified:
88


**Concatenate misclassified samples from all models:**

In [6]:
mis = pd.concat([mis_bert, mis_roberta, mis_xlnet, mis_rf, mis_lr], axis=0)
print(mis.drop_duplicates('text').value_counts('label'))
mis.drop_duplicates('text') # we have 136 misclassified examples in total.

label
2    72
4    47
3    10
1     7
dtype: int64


Unnamed: 0,label,text,pred,model
23,4,I don't approve of political jokes; I have seen too many of them get elected.,1,bert
34,4,Seems to be wide awake could of got up.,2,bert
48,2,The situation was cut and dried.,4,bert
59,4,Primal Scream Not a name which would lead you to expect self-absorbed acoustic doodlings .,2,bert
78,4,Paula perched herself against the cutting table trying to look nonchalant .,2,bert
...,...,...,...,...
697,4,She was dolled up yellow that day.,1,lr
706,4,She had to juggle her job and her children.,2,lr
721,4,Though his pace he checked and slackened.,2,lr
740,4,The play is going to burn few people.,2,lr


## 2.2 Correctly classified

In [7]:
#well classified
good_bert = bert[bert['label'] == bert['pred']]
good_roberta = roberta[roberta['label'] == roberta['pred']]
good_xlnet = xlnet[xlnet['label'] == xlnet['pred']]
good_rf = rf[rf['label'] == rf['pred']]
good_lr = lr[lr['label'] == lr['pred']]

**Concatenate correctly classified samples from all models:**

In [8]:
good = pd.concat([good_bert, good_roberta, good_xlnet, good_rf, good_lr], axis=0)
good['label'].value_counts()

1    2239
2     712
3     526
4     512
Name: label, dtype: int64

# 3. Extract instances that are misclassified by at least two models

**Get how many models misclassified each example:**

In [9]:
mis_n_models = pd.DataFrame(mis.groupby('text')['model'].count()).stack().reset_index()
mis_n_models = mis_n_models.drop('level_1', axis=1)
mis_n_models = mis_n_models.rename(columns={0: "n_models"})
mis_n_models

Unnamed: 0,text,n_models
0,A thought forms in my mind and it's like a sun-filled day .,1
1,"After a couple of more songs, a familiar-looking blonde girl that had been trying to catch my eye all night asked me to play lips of an angel.",2
2,"After the vibrant colors of the garden, the stone hall seemed as dark and silent as the grave.",5
3,All fire-flush 'd when forest trees smoldered.,3
4,"All new machines, even french ones, are apt to have teething troubles.”",2
...,...,...
131,the ayes have it.,2
132,this problem has migrated to the uk.,1
133,"tough titty, vamp,miranda said.",1
134,"well don't turn it on too loud , will you ?",4


In [10]:
mis_updated = mis.merge(mis_n_models, left_on='text', right_on='text')
mis_updated = mis_updated.drop_duplicates('text')
mis_updated['label'].value_counts()

2    72
4    47
3    10
1     7
Name: label, dtype: int64

In [11]:
mis_updated[mis_updated['n_models'] != 1].drop_duplicates('text')

Unnamed: 0,label,text,pred,model,n_models
0,4,I don't approve of political jokes; I have seen too many of them get elected.,1,bert,5
5,4,Seems to be wide awake could of got up.,2,bert,5
10,2,The situation was cut and dried.,4,bert,2
12,4,Primal Scream Not a name which would lead you to expect self-absorbed acoustic doodlings .,2,bert,3
15,4,Paula perched herself against the cutting table trying to look nonchalant .,2,bert,2
...,...,...,...,...,...
222,4,"And rest can never dwell , hope never comes.",2,rf,2
224,2,Hearing it full whack katherine felt proud being part of it despite her only contribution being that of providing the investor.,1,rf,2
226,2,"day in, day out, I work hard to put food on the table, and then I cop this from the missus.",1,rf,2
229,2,your guess is as good as mine.,3,rf,2


In [12]:
mis_updated[mis_updated['n_models'] != 1].drop_duplicates('text').value_counts('label')

label
2    42
4    26
1     2
3     2
dtype: int64

# 5. Sample 5 misclassified and 5 correctly classified instances for each class

## 5.1 Misclassified

In [13]:
mis_to_select_from = mis_updated[mis_updated['n_models'] != 1]
print(mis_to_select_from['label'].value_counts())
mis_to_select_from

2    42
4    26
3     2
1     2
Name: label, dtype: int64


Unnamed: 0,label,text,pred,model,n_models
0,4,I don't approve of political jokes; I have seen too many of them get elected.,1,bert,5
5,4,Seems to be wide awake could of got up.,2,bert,5
10,2,The situation was cut and dried.,4,bert,2
12,4,Primal Scream Not a name which would lead you to expect self-absorbed acoustic doodlings .,2,bert,3
15,4,Paula perched herself against the cutting table trying to look nonchalant .,2,bert,2
...,...,...,...,...,...
222,4,"And rest can never dwell , hope never comes.",2,rf,2
224,2,Hearing it full whack katherine felt proud being part of it despite her only contribution being that of providing the investor.,1,rf,2
226,2,"day in, day out, I work hard to put food on the table, and then I cop this from the missus.",1,rf,2
229,2,your guess is as good as mine.,3,rf,2


In [14]:
sarc_mis = mis_to_select_from[mis_to_select_from['label'] == 1][['text', 'label', 'model']]
idiom_mis = mis_to_select_from[mis_to_select_from['label'] == 2].sample(7, random_state=45)[['text', 'label', 'model']]
simile_mis = mis_to_select_from[mis_to_select_from['label'] == 3][['text', 'label', 'model']]
metaphor_mis = mis_to_select_from[mis_to_select_from['label'] == 4].sample(7, random_state=45)[['text', 'label', 'model']]

**Add 5 random instances for sarcasm and simile:**

In [15]:
mis_to_add = mis_updated[mis_updated['n_models'] == 1].drop_duplicates('text')
sarc_mis_add = mis_to_add[mis_to_add['label'] == 1].sample(5, random_state=45)[['text', 'label', 'model']]
simile_mis_add = mis_to_add[mis_to_add['label'] == 3].sample(5, random_state=45)[['text', 'label', 'model']]

In [16]:
sarc_mis = pd.concat([sarc_mis, sarc_mis_add], axis=0)
simile_mis = pd.concat([simile_mis, simile_mis_add], axis=0)

## 5.2 Correctly classified

In [17]:
good_n_models = pd.DataFrame(good.groupby('text')['model'].count()).stack().reset_index()
good_n_models = good_n_models.drop('level_1', axis=1)
good_n_models = good_n_models.rename(columns={0: "n_models"})
good_n_models

Unnamed: 0,text,n_models
0,"""Cross my heart, buddy; I'm telling you the whole truth,"" Melanie swore.",5
1,"A faint smile curved her lips, and then I heard her voice in my mind like a welcome breeze on a summer day.",5
2,A friend let me borrow his old headphones and now I feel terrific about my sense of responsibility because I lost them,5
3,A thought forms in my mind and it's like a sun-filled day .,4
4,A weather vane caps the building.,5
...,...,...
837,"tough titty, vamp,miranda said.",4
838,we were sinking in an ocean of grass.,5
839,"well don't turn it on too loud , will you ?",1
840,your guess is as good as mine.,3


In [18]:
good_n_models['n_models'].value_counts()

5    714
4     64
3     42
2     15
1      7
Name: n_models, dtype: int64

In [19]:
good_updated = good.merge(good_n_models, left_on='text', right_on='text')
good_updated[good_updated['n_models'] == 5].drop_duplicates('text')

Unnamed: 0,label,text,pred,model,n_models
0,1,My considerate roommate cooked some meat with a beautifully weird smell in our apartment and I almost threw up,1,bert,5
5,3,"Turns out, tag between super heroes can get like a sibling rivalry.",3,bert,5
10,2,"Then as I gathered courage to go down I realized there was a fire across the street, but it seems to be in hand now.",2,bert,5
15,1,"It's absolutely fine that I got an older car for my 16th birthday, while one of my best friend who got a BRAND new BMW!",1,bert,5
20,4,He was in a black mood.,4,bert,5
...,...,...,...,...,...
3918,4,The speech crowned the meeting.,4,bert,5
3923,1,I was overjoyed when someone backed into my new SUV while driving through the parking lot the other day and,1,bert,5
3928,1,I almost crashed my car because the guy that cut me off was being so careful.,1,bert,5
3937,1,I was so delighted when I came home to find my dog rooting through the garbage can,1,bert,5


In [20]:
good_to_select_from = good_updated[good_updated['n_models'] == 5].drop_duplicates('text')

sarc_good = good_to_select_from[good_to_select_from['label'] == 1].sample(7, random_state=45)[['text', 'label', 'model']]
idiom_good = good_to_select_from[good_to_select_from['label'] == 2].sample(7, random_state=45)[['text', 'label', 'model']]
simile_good = good_to_select_from[good_to_select_from['label'] == 3].sample(7, random_state=45)[['text', 'label', 'model']]
metaphor_good = good_to_select_from[good_to_select_from['label'] == 4].sample(7, random_state=45)[['text', 'label', 'model']]

# 6. Write out final samples as dataframe

In [21]:
sample_temp = pd.concat([sarc_mis, idiom_mis, simile_mis, metaphor_mis], axis=0)
sample_temp['difficult'] = "yes"

In [22]:
sample_temp_2 = pd.concat([sarc_good, idiom_good, simile_good, metaphor_good], axis=0)
sample_temp_2['difficult'] = "no"

In [23]:
sample_final = pd.concat([sample_temp, sample_temp_2], axis=0)
sample_final = shuffle(sample_final, random_state=45)
sample_final

Unnamed: 0,text,label,model,difficult
1598,I wanted that gift as much as cancer,3,bert,no
140,I was encouraged not to get a response back from the company after applying for the job,1,rf,yes
1812,I felt like a hero when I realized that I brought a present for a baby boy to a baby shower for a baby girl,1,bert,no
217,The armor was tough as glass,3,rf,yes
137,That is why it is so important to remain like a planted oak,3,rf,yes
138,But today carly wanted to feel like a kick-ass chick in a superhero movie.,3,rf,yes
2503,They were burning with desire.,4,bert,no
3058,The tax cut will fertilize the economy.,4,bert,no
62,He spent three years seconded to a lame duck industry.,2,bert,yes
205,And made answer very gravely.,4,rf,yes


In [24]:
sample_final.to_csv('../results/human_annotation/samples_to_annotate.csv')