### Load all examples
 * `generate_uids=True`: return UIDs per example
 * `tokenizer=None`: return raw (untokenized) examples

In [1]:
from utils import *
pd.set_option('display.max_colwidth', -1)

In [2]:
from metal.mmtl.utils.preprocess import load_tsv, get_task_tsv_config

In [3]:
config = get_task_tsv_config('COLA', 'dev')
    
(examples, labels), uids = load_tsv(
    tsv_path=config["tsv_path"],
    sent1_idx=config["sent1_idx"],
    sent2_idx=config["sent2_idx"],
    label_idx=config["label_idx"],
    skip_rows=config["skip_rows"],
    tokenizer=None,
    delimiter="\t",
    label_fn=config["label_fn"],
    generate_uids=True
)

assert len(examples) == len(labels) == len(uids)

HBox(children=(IntProgress(value=0, max=1042), HTML(value='')))




### Define Proper Nouns based on Entities
Ref: https://spacy.io/api/annotation#named-entities

In [4]:
import spacy
nlp = spacy.load('xx_ent_wiki_sm')

def get_entities(sent, entities=["PER", "ORG", "LOC", "MISC"]):
    if sent is None:
        return []

    return [ent for ent in nlp(sent).ents if ent.label_ in entities]

def ex_has_proper_noun(ex):
    proper_nouns = get_entities(
        ex['sent1'], 
        entities=["PER", "ORG", "LOC", "MISC"]
    ) + get_entities(
        ex['sent2'], 
        entities=["PER", "ORG", "LOC", "MISC"]
    )

    return len(proper_nouns) > 0

def ex_has_loc(ex):
    locs = get_entities(ex['sent1'], entities=["LOC"]) \
        + get_entities(ex['sent2'], entities=["LOC"])
    return len(locs) > 0

def ex_has_loc_org(ex):
    locs = get_entities(ex['sent1'], entities=["LOC", "ORG"]) \
        + get_entities(ex['sent2'], entities=["LOC", "ORG"])
    return len(locs) > 0

### Tag all examples in slice

In [5]:
from tagger import Tagger
tagger = Tagger(verbose=False)

In [6]:
slice_name = 'locs_orgs'
in_slice_fn = ex_has_loc_org

In [7]:
for idx, (ex, label, uid) in enumerate(zip(examples, labels, uids)):   
    # logging for sanity check
    if idx % 1000 == 0:
        print(uid)
        print(ex, label)
        print()  

    # if there are "proper nouns" as defined by entites, add the tag!
    if in_slice_fn(ex):
        tagger.add_tag(uid, slice_name)


CoLA/dev.tsv:2
{'sent1': 'The weights made the rope stretch over the pulley.', 'sent2': None} 1

CoLA/dev.tsv:1002
{'sent1': "John's arriving dead surprised me.", 'sent2': None} 1



In [8]:
num_in_slice = len(tagger.get_uids(slice_name))
num_ex = len(examples)
print(f"% in slice ({num_in_slice}/{num_ex}) {num_in_slice/num_ex}")

% in slice (279/1042) 0.2677543186180422


In [9]:
tagger.get_examples(slice_name)

Error: 278 RTE/dev.tsv:278


[('CoLA/dev.tsv:1027',
  {'sent1': 'I gave it to Pete to take to the fair.',
   'sent2': None,
   'label': '1'}),
 ('CoLA/dev.tsv:124',
  {'sent1': 'No writer, nor any playwright, meets in Vienna.',
   'sent2': None,
   'label': '0'}),
 ('CoLA/dev.tsv:125',
  {'sent1': 'That you will marry any student is not certain.',
   'sent2': None,
   'label': '1'}),
 ('CoLA/dev.tsv:131',
  {'sent1': 'The worker will have a job.', 'sent2': None, 'label': '1'}),
 ('CoLA/dev.tsv:172',
  {'sent1': 'Packages drive easily to New York.',
   'sent2': None,
   'label': '0'}),
 ('CoLA/dev.tsv:173',
  {'sent1': 'The chair pushed.', 'sent2': None, 'label': '0'}),
 ('CoLA/dev.tsv:195',
  {'sent1': 'Ellen said that melons were selling well.',
   'sent2': None,
   'label': '1'}),
 ('CoLA/dev.tsv:200',
  {'sent1': 'Paul laughed at Mary.', 'sent2': None, 'label': '1'}),
 ('CoLA/dev.tsv:23',
  {'sent1': 'Mickey looked up it.', 'sent2': None, 'label': '0'}),
 ('CoLA/dev.tsv:257',
  {'sent1': 'John paid me against t

### Eval on slices with Uncased model

In [10]:
from metal.mmtl.debugging.utils import load_data_and_model, create_dataframe

# Load model and data
model_path = '/dfs/scratch0/mccreery/mmtl/logs/ST_bertlarge/COLA/logdir/2019_02_25/COLA_21_56_02/best_model.pth'
task_name = 'COLA'
split = 'dev'
bert_model = "bert-large-uncased"
model, dl = load_data_and_model(model_path, [task_name], split, bert_model=bert_model)

Could not find kwarg "generate_uids" in destination dict.
Using random seed: 895115
Loading COLA Dataset


HBox(children=(IntProgress(value=0, max=1042), HTML(value='')))




In [11]:
filepath = f'{task_name}_{bert_model}_{split}_error_analysis.tsv'

# Create DataFrame of Raw Data, Predictions, and Labels
print('Creating dataframe')
df_uncased = create_dataframe(task_name, model, dl, bert_model=bert_model)
print('Created dataframe')

# Save (and reload) DataFrame
save_dataframe(df_uncased, filepath)

Creating dataframe


1042it [00:29, 34.88it/s]


Created dataframe
Saved dataframe to:  COLA_bert-large-uncased_dev_error_analysis.tsv


In [12]:
df_uncased = load_dataframe(filepath)

In [13]:
def df_for_uids(df, uids):
    mask = df['uid'].apply(lambda x: x in uids)
    return df[mask]

In [14]:
slice_uids = tagger.get_uids(slice_name)
df_uncased_in_slice = df_for_uids(df_uncased, slice_uids)

In [15]:
df_uncased_in_slice[df_uncased_in_slice['is_wrong']].head(20)

Unnamed: 0.1,Unnamed: 0,sentence1,sentence2,score,label,uid,pred,is_wrong
74,74,the mayor regarded as being absurd the proposal to build a sidewalk from dartmouth to smith .,,0.999036,0,CoLA/dev.tsv:76,1,True
88,88,i live at the place where route 150 crosses the hudson river and my dad lives at it too .,,0.997461,0,CoLA/dev.tsv:90,1,True
122,122,"no writer , and no playwright , meets in vienna .",,0.997668,0,CoLA/dev.tsv:124,1,True
123,123,"no writer , nor any playwright , meets in vienna .",,0.997774,0,CoLA/dev.tsv:125,1,True
198,198,cynthia chewed .,,0.666183,0,CoLA/dev.tsv:200,1,True
413,413,the idea dismay ##ed the prime minister that the dome was dull .,,0.038873,1,CoLA/dev.tsv:415,0,True
467,467,"only churchill remembered churchill giving the blood , sweat and tears speech .",,0.998912,0,CoLA/dev.tsv:469,1,True
588,588,"the ta ' s have been arguing about whether some student or other should pass , but i can ' t now remember which one .",,0.994458,0,CoLA/dev.tsv:590,1,True
653,653,"harry told sue that albania is a lovely place for a vacation , and tom .",,0.040067,1,CoLA/dev.tsv:655,0,True
863,863,us like them .,,0.99757,0,CoLA/dev.tsv:865,1,True


In [16]:
print ("Error rate:", len(df_uncased_in_slice[df_uncased_in_slice['is_wrong']])
                                              / len(df_uncased_in_slice))

Error rate: 0.16393442622950818


## Eval on Slice with Cased Model

In [17]:
from metal.mmtl.debugging.utils import load_data_and_model, create_dataframe

# Load model and data
model_path = '/dfs/scratch0/mccreery/mmtl/logs/ST_bertlarge/COLA_cased/2/logdir/2019_03_05/COLA_00_50_04/best_model.pth'
task_name = 'COLA'
split = 'dev'
bert_model = "bert-large-cased"
model,dl = load_data_and_model(model_path, [task_name], split, bert_model=bert_model)

Could not find kwarg "generate_uids" in destination dict.
Using random seed: 155826
Loading COLA Dataset


HBox(children=(IntProgress(value=0, max=1042), HTML(value='')))




In [18]:
filepath = f'{task_name}_{bert_model}_{split}_error_analysis.tsv'

# Create DataFrame of Raw Data, Predictions, and Labels
print('Creating dataframe')
df_cased = create_dataframe(task_name, model, dl, bert_model=bert_model)
print('Created dataframe')

# Save (and reload) DataFrame
save_dataframe(df_cased, filepath)

Creating dataframe


1042it [00:29, 35.71it/s]

Created dataframe
Saved dataframe to:  COLA_bert-large-cased_dev_error_analysis.tsv





In [19]:
df_cased = load_dataframe(filepath)

In [20]:
df_cased_in_slice = df_for_uids(df_cased, slice_uids)

In [21]:
df_cased_in_slice[df_cased_in_slice['is_wrong']].head()

Unnamed: 0.1,Unnamed: 0,sentence1,sentence2,score,label,uid,pred,is_wrong
74,74,The mayor regarded as being absurd the proposal to build a sidewalk from Dartmouth to Smith .,,0.999908,0,CoLA/dev.tsv:76,1,True
88,88,I live at the place where Route 150 crosses the Hudson River and my dad lives at it too .,,0.9998,0,CoLA/dev.tsv:90,1,True
122,122,"No writer , and no playwright , meets in Vienna .",,0.999385,0,CoLA/dev.tsv:124,1,True
198,198,Cynthia chewed .,,0.999898,0,CoLA/dev.tsv:200,1,True
467,467,"Only Churchill remembered Churchill giving the Blood , Sweat and Tears speech .",,0.999896,0,CoLA/dev.tsv:469,1,True


In [22]:
print ("Error rate:", len(df_cased_in_slice[df_cased_in_slice['is_wrong']])
                                              / len(df_cased_in_slice))

Error rate: 0.11475409836065574


## Error analysis on differences in predictions!

In [23]:
df_uncased.head()

Unnamed: 0.1,Unnamed: 0,sentence1,sentence2,score,label,uid,pred,is_wrong
0,0,the weights made the rope stretch over the pull ##ey .,,0.998119,1,CoLA/dev.tsv:2,1,False
1,1,the mechanical doll wr ##ig ##gled itself loose .,,0.997867,1,CoLA/dev.tsv:3,1,False
2,2,"if you had eaten more , you would want less .",,0.999306,1,CoLA/dev.tsv:4,1,False
3,3,"as you eat the most , you want the least .",,0.976777,0,CoLA/dev.tsv:5,1,True
4,4,"the more you would want , the less you would eat .",,0.998284,0,CoLA/dev.tsv:6,1,True


In [24]:
df_cased.head()

Unnamed: 0.1,Unnamed: 0,sentence1,sentence2,score,label,uid,pred,is_wrong
0,0,The weights made the rope stretch over the pull ##ey .,,0.999909,1,CoLA/dev.tsv:2,1,False
1,1,The mechanical doll w ##rig ##gled itself loose .,,0.000127,1,CoLA/dev.tsv:3,0,True
2,2,"If you had eaten more , you would want less .",,0.999914,1,CoLA/dev.tsv:4,1,False
3,3,"As you eat the most , you want the least .",,0.999184,0,CoLA/dev.tsv:5,1,True
4,4,"The more you would want , the less you would eat .",,0.999899,0,CoLA/dev.tsv:6,1,True


In [25]:
# retrieve list uids corresponding to INCORRECT ex in UNCASED
incorrect_uncased_uids = df_uncased_in_slice[df_uncased_in_slice['is_wrong']]['uid'].to_list()

incorrect_cased_uids = df_cased_in_slice[df_cased_in_slice['is_wrong']]['uid'].to_list()

# retrieved list of uids corresponding to CORRECT ex in CASED model
correct_cased_uids = df_cased_in_slice[~df_cased_in_slice['is_wrong']]['uid'].to_list()

### Which examples were "corrected" by the casing model?

In [26]:
corrected_by_casing_uids = set(incorrect_uncased_uids).intersection(set(correct_cased_uids))
df_for_uids(df_uncased, corrected_by_casing_uids).head(20)

Unnamed: 0.1,Unnamed: 0,sentence1,sentence2,score,label,uid,pred,is_wrong
123,123,"no writer , nor any playwright , meets in vienna .",,0.997774,0,CoLA/dev.tsv:125,1,True
413,413,the idea dismay ##ed the prime minister that the dome was dull .,,0.038873,1,CoLA/dev.tsv:415,0,True
863,863,us like them .,,0.99757,0,CoLA/dev.tsv:865,1,True


In [27]:
df_for_uids(df_cased, corrected_by_casing_uids).head(20)

Unnamed: 0.1,Unnamed: 0,sentence1,sentence2,score,label,uid,pred,is_wrong
123,123,"No writer , nor any playwright , meets in Vienna .",,0.035777,0,CoLA/dev.tsv:125,0,False
413,413,The idea dismay ##ed the Prime Minister that the Dome was dull .,,0.999859,1,CoLA/dev.tsv:415,1,False
863,863,Us like them .,,0.00013,0,CoLA/dev.tsv:865,0,False


### Which examples are "still incorrect" with the casing model? 

In [28]:
still_incorrect_uids = set(incorrect_uncased_uids).intersection(set(incorrect_cased_uids))
df_for_uids(df_cased, still_incorrect_uids).head(20)

Unnamed: 0.1,Unnamed: 0,sentence1,sentence2,score,label,uid,pred,is_wrong
74,74,The mayor regarded as being absurd the proposal to build a sidewalk from Dartmouth to Smith .,,0.999908,0,CoLA/dev.tsv:76,1,True
88,88,I live at the place where Route 150 crosses the Hudson River and my dad lives at it too .,,0.9998,0,CoLA/dev.tsv:90,1,True
122,122,"No writer , and no playwright , meets in Vienna .",,0.999385,0,CoLA/dev.tsv:124,1,True
198,198,Cynthia chewed .,,0.999898,0,CoLA/dev.tsv:200,1,True
467,467,"Only Churchill remembered Churchill giving the Blood , Sweat and Tears speech .",,0.999896,0,CoLA/dev.tsv:469,1,True
588,588,"The T ##A ' s have been arguing about whether some student or other should pass , but I can ' t now remember which one .",,0.999845,0,CoLA/dev.tsv:590,1,True
653,653,"Harry told Sue that Albania is a lovely place for a vacation , and Tom .",,0.000287,1,CoLA/dev.tsv:655,0,True
