### Load all examples
 * `generate_uids=True`: return UIDs per example
 * `tokenizer=None`: return raw (untokenized) examples

In [1]:
from utils import *
pd.set_option('display.max_colwidth', -1)

In [2]:
from metal.mmtl.utils.preprocess import load_tsv, get_task_tsv_config

In [3]:
config = get_task_tsv_config('COLA', 'dev')
    
(examples, labels), uids = load_tsv(
    tsv_path=config["tsv_path"],
    sent1_idx=config["sent1_idx"],
    sent2_idx=config["sent2_idx"],
    label_idx=config["label_idx"],
    skip_rows=config["skip_rows"],
    tokenizer=None,
    delimiter="\t",
    label_fn=config["label_fn"],
    generate_uids=True
)

assert len(examples) == len(labels) == len(uids)

HBox(children=(IntProgress(value=0, max=1042), HTML(value='')))




### Define Proper Nouns based on Entities
Ref: https://spacy.io/api/annotation#named-entities

In [4]:
import spacy
nlp = spacy.load('xx_ent_wiki_sm')

def get_proper_nouns(sent):
    if sent is None:
        return []

    return [ent for ent in nlp(sent).ents]

def get_org_loc(sent):
    if sent is None:
        return []

    return [ent for ent in nlp(sent).ents
                if ent.label_ in ["ORG", "LOC"]]

### Tag all Proper Nouns

In [5]:
from tagger import Tagger
tagger = Tagger(verbose=False)

In [6]:
for idx, (ex, label, uid) in enumerate(zip(examples, labels, uids)):   
    proper_nouns = get_org_loc(ex['sent1']) \
        + get_org_loc(ex['sent2'])

    # logging for sanity check
    if idx % 1000 == 0:
        print(uid)
        print(ex, label, proper_nouns)
        print()  

    # if there are "proper nouns" as defined by entites, add the tag!
    if len(proper_nouns) > 0:
        tagger.add_tag(uid, 'proper_nouns')


CoLA/dev.tsv:2
{'sent1': 'The weights made the rope stretch over the pulley.', 'sent2': None} 1 []

CoLA/dev.tsv:1002
{'sent1': "John's arriving dead surprised me.", 'sent2': None} 1 [John]



In [7]:
"% proper nouns", len(tagger.get_uids("proper_nouns")) / len(examples)

('% proper nouns', 0.5028790786948176)

In [8]:
tagger.get_examples("proper_nouns")

Error: 1043 CoLA/dev.tsv:1043


[('CoLA/dev.tsv:1000',
  {'sent1': 'the election of John president surprised me.',
   'sent2': None,
   'label': '0'}),
 ('CoLA/dev.tsv:1001',
  {'sent1': "John's arriving dead surprised me.",
   'sent2': None,
   'label': '1'}),
 ('CoLA/dev.tsv:1002',
  {'sent1': 'the attempt by John to leave surprised me.',
   'sent2': None,
   'label': '1'}),
 ('CoLA/dev.tsv:1003',
  {'sent1': 'John left orders to follow Pete.', 'sent2': None, 'label': '1'}),
 ('CoLA/dev.tsv:1004',
  {'sent1': 'John left us orders to follow Pete.',
   'sent2': None,
   'label': '1'}),
 ('CoLA/dev.tsv:1005',
  {'sent1': 'John left orders not to be disturbed.',
   'sent2': None,
   'label': '1'}),
 ('CoLA/dev.tsv:1006',
  {'sent1': 'That he is here is clear.', 'sent2': None, 'label': '1'}),
 ('CoLA/dev.tsv:1010',
  {'sent1': 'John believes it that Bill is here.',
   'sent2': None,
   'label': '0'}),
 ('CoLA/dev.tsv:1011',
  {'sent1': 'John believes it sincerely that Bill is here.',
   'sent2': None,
   'label': '0'}),

### Eval on slices with Uncased model

In [9]:
from metal.mmtl.debugging.utils import load_data_and_model, create_dataframe

# Load model and data
model_path = '/dfs/scratch0/mccreery/mmtl/logs/ST_bertlarge/COLA/logdir/2019_02_25/COLA_21_56_02/best_model.pth'
task_name = 'COLA'
split = 'dev'
bert_model = "bert-large-uncased"
model,dl = load_data_and_model(model_path, [task_name], split, bert_model=bert_model)

Could not find kwarg "generate_uids" in destination dict.
Using random seed: 488854
Loading COLA Dataset


HBox(children=(IntProgress(value=0, max=1042), HTML(value='')))




In [10]:
filepath = f'{task_name}_{bert_model}_{split}_error_analysis.tsv'

# Create DataFrame of Raw Data, Predictions, and Labels
print('Creating dataframe')
df_uncased = create_dataframe(task_name, model, dl, bert_model=bert_model)
print('Created dataframe')

# Save (and reload) DataFrame
save_dataframe(df_uncased, filepath)

Creating dataframe


1042it [00:30, 33.61it/s]


Created dataframe
Saved dataframe to:  COLA_bert-large-uncased_dev_error_analysis.tsv


In [11]:
df_uncased = load_dataframe(filepath)

In [12]:
def df_for_uids(df, uids):
    mask = df['uid'].apply(lambda x: x in uids)
    return df[mask]

In [13]:
slice_uids = tagger.get_uids("proper_nouns")
df_uncased_in_slice = df_for_uids(df_uncased, slice_uids)

In [14]:
df_uncased_in_slice[df_uncased_in_slice['is_wrong']].head()

Unnamed: 0.1,Unnamed: 0,sentence1,sentence2,score,label,uid,pred,is_wrong
3,3,"as you eat the most , you want the least .",,0.976777,0,CoLA/dev.tsv:5,1,True
4,4,"the more you would want , the less you would eat .",,0.998284,0,CoLA/dev.tsv:6,1,True
12,12,"the more does bill smoke , the more susan hates him .",,0.989628,0,CoLA/dev.tsv:14,1,True
22,22,mickey looked up it .,,0.998148,0,CoLA/dev.tsv:24,1,True
27,27,the box contained the ball from the tree .,,0.997315,0,CoLA/dev.tsv:29,1,True


In [15]:
print ("Error rate:", len(df_uncased_in_slice[df_uncased_in_slice['is_wrong']])
                                              / len(df_uncased_in_slice))

Error rate: 0.15067178502879078


## Eval on Slice with Cased Model

In [16]:
from metal.mmtl.debugging.utils import load_data_and_model, create_dataframe

# Load model and data
model_path = '/dfs/scratch0/mccreery/mmtl/logs/ST_bertlarge/COLA_cased/2/logdir/2019_03_05/COLA_00_50_04/best_model.pth'
task_name = 'COLA'
split = 'dev'
bert_model = "bert-large-cased"
model,dl = load_data_and_model(model_path, [task_name], split, bert_model=bert_model)

Could not find kwarg "generate_uids" in destination dict.
Using random seed: 676638
Loading COLA Dataset


HBox(children=(IntProgress(value=0, max=1042), HTML(value='')))




In [17]:
filepath = f'{task_name}_{bert_model}_{split}_error_analysis.tsv'

# Create DataFrame of Raw Data, Predictions, and Labels
print('Creating dataframe')
df_cased = create_dataframe(task_name, model, dl, bert_model=bert_model)
print('Created dataframe')

# Save (and reload) DataFrame
save_dataframe(df_cased, filepath)

Creating dataframe


1042it [00:31, 33.16it/s]


Created dataframe
Saved dataframe to:  COLA_bert-large-cased_dev_error_analysis.tsv


In [18]:
df_cased = load_dataframe(filepath)

In [19]:
df_cased_in_slice = df_for_uids(df_cased, slice_uids)

In [20]:
df_cased_in_slice[df_cased_in_slice['is_wrong']].head()

Unnamed: 0.1,Unnamed: 0,sentence1,sentence2,score,label,uid,pred,is_wrong
12,12,"The more does Bill smoke , the more Susan hates him .",,0.924221,0,CoLA/dev.tsv:14,1,True
38,38,Mary beautifully plays the violin .,,0.999886,0,CoLA/dev.tsv:40,1,True
39,39,"Clearly , John probably will immediately learn French perfectly .",,9.2e-05,1,CoLA/dev.tsv:41,0,True
40,40,Sue gave to Bill a book .,,0.999655,0,CoLA/dev.tsv:42,1,True
46,46,Mary intended John to go abroad .,,0.999915,0,CoLA/dev.tsv:48,1,True


In [21]:
print ("Error rate:", len(df_cased_in_slice[df_cased_in_slice['is_wrong']])
                                              / len(df_cased_in_slice))

Error rate: 0.14694656488549618


## Error analysis on differences in predictions!

In [22]:
df_uncased.head()

Unnamed: 0.1,Unnamed: 0,sentence1,sentence2,score,label,uid,pred,is_wrong
0,0,the weights made the rope stretch over the pull ##ey .,,0.998119,1,CoLA/dev.tsv:2,1,False
1,1,the mechanical doll wr ##ig ##gled itself loose .,,0.997867,1,CoLA/dev.tsv:3,1,False
2,2,"if you had eaten more , you would want less .",,0.999306,1,CoLA/dev.tsv:4,1,False
3,3,"as you eat the most , you want the least .",,0.976777,0,CoLA/dev.tsv:5,1,True
4,4,"the more you would want , the less you would eat .",,0.998284,0,CoLA/dev.tsv:6,1,True


In [23]:
df_cased.head()

Unnamed: 0.1,Unnamed: 0,sentence1,sentence2,score,label,uid,pred,is_wrong
0,0,The weights made the rope stretch over the pull ##ey .,,0.999909,1,CoLA/dev.tsv:2,1,False
1,1,The mechanical doll w ##rig ##gled itself loose .,,0.000127,1,CoLA/dev.tsv:3,0,True
2,2,"If you had eaten more , you would want less .",,0.999914,1,CoLA/dev.tsv:4,1,False
3,3,"As you eat the most , you want the least .",,0.999184,0,CoLA/dev.tsv:5,1,True
4,4,"The more you would want , the less you would eat .",,0.999899,0,CoLA/dev.tsv:6,1,True


In [24]:
# retrieve list uids corresponding to INCORRECT ex in UNCASED
incorrect_uncased_uids = df_uncased_in_slice[df_uncased_in_slice['is_wrong']]['uid'].to_list()

incorrect_cased_uids = df_cased_in_slice[df_cased_in_slice['is_wrong']]['uid'].to_list()

# retrieved list of uids corresponding to CORRECT ex in CASED model
correct_cased_uids = df_cased_in_slice[~df_cased_in_slice['is_wrong']]['uid'].to_list()

### Which examples were "corrected" by the casing model?

In [25]:
corrected_by_casing_uids = set(incorrect_uncased_uids).intersection(set(correct_cased_uids))
df_for_uids(df_uncased, corrected_by_casing_uids).head()

Unnamed: 0.1,Unnamed: 0,sentence1,sentence2,score,label,uid,pred,is_wrong
22,22,mickey looked up it .,,0.998148,0,CoLA/dev.tsv:24,1,True
58,58,"i know which book mag read , and which book bob said that you hadn ' t .",,0.024349,1,CoLA/dev.tsv:60,0,True
123,123,"no writer , nor any playwright , meets in vienna .",,0.997774,0,CoLA/dev.tsv:125,1,True
165,165,carla shovel ##ed the walk .,,0.037656,1,CoLA/dev.tsv:167,0,True
317,317,who achieved the best result was angela .,,0.99542,0,CoLA/dev.tsv:319,1,True


In [27]:
df_for_uids(df_cased, corrected_by_casing_uids).head()

Unnamed: 0.1,Unnamed: 0,sentence1,sentence2,score,label,uid,pred,is_wrong
22,22,Mickey looked up it .,,0.011857,0,CoLA/dev.tsv:24,0,False
58,58,"I know which book Ma ##g read , and which book Bob said that you hadn ' t .",,0.999752,1,CoLA/dev.tsv:60,1,False
123,123,"No writer , nor any playwright , meets in Vienna .",,0.035777,0,CoLA/dev.tsv:125,0,False
165,165,Carla shove ##led the walk .,,0.999921,1,CoLA/dev.tsv:167,1,False
317,317,Who achieved the best result was Angela .,,0.09079,0,CoLA/dev.tsv:319,0,False


### Which examples are "still incorrect" with the casing model? 

In [28]:
still_incorrect_uids = set(incorrect_uncased_uids).intersection(set(incorrect_cased_uids))
df_for_uids(df_cased, still_incorrect_uids).head()

Unnamed: 0.1,Unnamed: 0,sentence1,sentence2,score,label,uid,pred,is_wrong
12,12,"The more does Bill smoke , the more Susan hates him .",,0.924221,0,CoLA/dev.tsv:14,1,True
38,38,Mary beautifully plays the violin .,,0.999886,0,CoLA/dev.tsv:40,1,True
61,61,Rusty talked about himself only after Mary did talk about him .,,4.7e-05,1,CoLA/dev.tsv:63,0,True
67,67,Which report that John was in ##com ##pet ##ent did he submit ?,,0.999852,0,CoLA/dev.tsv:69,1,True
74,74,The mayor regarded as being absurd the proposal to build a sidewalk from Dartmouth to Smith .,,0.999908,0,CoLA/dev.tsv:76,1,True
