In [1]:
from lzma import MODE_NORMAL
import flatten_lattice as fl
import torch
from bert_models import LinearPOSBertV1
from encoding_utils import *
import pickle
from mask_utils import *
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer, AutoConfig
import json
import matplotlib.pyplot as plt
import os

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
bert_tok = fl.bert_tok
mbart_tok = fl.mbart_tok

# -1 for whole graph, otherwise # of lattice segments to use
STOPS = 1
# v3 - first whole lattice
# v4 - first single lattice
# v5 - single lattice with fixes
VNUM = 5
MOD_NAME = 'bertonewayv1.pth'

2022-09-10 12:08:36.222739: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-09-10 12:08:36.222764: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


KeyboardInterrupt: 

In [2]:
# specifies files for pre-loading
LOADED = {
    'amasks': 'attmasksallv'+str(VNUM)+'.pt',
    'tmaps': 'tmapsmaskedv'+str(VNUM)+'/'
}

# Code needed when changing set of gold labels
def prepare_dataset(resset, model):
    x = []
    y = []
    allmasks = []
    for res in resset:
        curinps = []
        curmasks = []
        for r in res:
            try:
                msk = torch.zeros(MAX_LEN, MAX_LEN)
                toktmp = torch.tensor(bert_tok(clean_expanded(r)).input_ids)
                msk[:len(toktmp), :len(toktmp)] = torch.ones(len(toktmp), len(toktmp))
                msk = torch.tril(msk)
                #msk = msk[:MAX_LEN, :MAX_LEN]
                #print(toktmp.shape)
                if float(toktmp.shape[0])<MAX_LEN:
                    dlen = MAX_LEN-toktmp.shape[0]
                    toktmp = torch.cat([toktmp, torch.zeros(dlen)])
                else:
                    toktmp = toktmp[:MAX_LEN]
                curinps.append(toktmp)
                curmasks.append(msk)
            except:
                print("weird error happened") 
        print(len(curinps))
        tinp = torch.stack(curinps).long().to(device)
        print(tinp.shape)
        # not taking in 1-way mask 
        y.append(model(tinp, attmasks=torch.stack(curmasks).long().to(device)))
        x.append(tinp)
        del tinp
        allmasks.append(curmasks)
        
    return x, y, allmasks

def get_labset_partial(explodeds, startind, amt, model):
    dsetx, dsety, _ = prepare_dataset(explodeds[startind:startind+amt], model)
    print(len(dsetx))
    assert len(dsetx)==amt
    latposylabels, tmaps = lattice_pos_goldlabels(dsetx, dsety, sents[startind:startind+amt])
    del dsetx, dsety
    
    torch.cuda.empty_cache()
    return latposylabels, tmaps

def get_biglabset(split, model):
    for i in range(0, int(len(resarrs)/split)):
        print("SUBSET - ", i)
        r, tmap = get_labset_partial(resarrs, i*split, split, model)
        torch.cuda.empty_cache()
        file = open('./torchsaved/tmapsmaskedv'+str(VNUM)+'/tmaps_'+str(i*split)+'.pkl', 'wb')
        # dump information to that file
        pickle.dump(tmap, file)

        # close the file
        file.close()
        del r
        del tmap
        torch.cuda.empty_cache()
        print(torch.cuda.memory_allocated("cuda:0"))

def load_model(labels):
    # load model, same for gold generation and inference
    posbmodel = LinearPOSBertV1(len(list(labels.keys())))
    t = torch.load("./a3distrib/ckpt/"+MOD_NAME)
    posbmodel.load_state_dict(t)
    posbmodel.eval()
    del t
    torch.cuda.empty_cache()

    print("GPU Mem Used = ", torch.cuda.memory_allocated("cuda:0"))

    return posbmodel

In [None]:
# First we want to generate flattened version of the graph
processedgraphs = fl.get_processed_graph_data(fl.frenbase, -1, STOPS)

# extra step for greedy 
if STOPS==1:
    processedgraphs = filter_greedy(processedgraphs)

# get exploded candidates to generate gold labels
resarrs = [fl.get_cover_paths(p)[0] for p in processedgraphs]

# ensure no empty examples
clean_empty(resarrs, processedgraphs)
print("num examples: ", len(resarrs))

# TODO should I add an example?

# get attention masks, make if they don't exist allready
if os.path.exists('./torchsaved/'+LOADED['amasks']):
    print("using loaded masks")
    attmasks = torch.load('./torchsaved/'+LOADED['amasks']).to(device)
else:
    print("creating new masks")
    masktmp = [connect_mat(p) for p in processedgraphs]
    attmasks = torch.stack(masktmp).to(device)
    torch.save(attmasks, './torchsaved/'+LOADED['amasks'])

# convert to backwards-only mask
# TODO some sort of bug causing NaN predictions
attmasks = torch.tril(attmasks)

num examples:  101
using loaded masks


In [None]:
# credit to tutorial by https://pageperso.lis-lab.fr/benoit.favre/pstaln/09_embedding_evaluation.html for 
# input / pre-processing setup
# load labels
with open('./a3distrib/lab_vocab.json') as json_file:
    labels = json.load(json_file)

posbmod = load_model(labels)

# create inputs
sents, posids = create_inputs(processedgraphs)

print("Average, max nodes: ", avg_nodes(sents))

# generate token label maps if they don't exist
if os.path.exists('./torchsaved/'+LOADED['tmaps']+'tmaps_0.pkl')==False:
    with torch.no_grad():
        get_biglabset(1, posbmod)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


GPU Mem Used =  534572544
Average, max nodes:  (22.396039603960396, 61)


In [None]:
print(bert_tok.decode(sents[0][:40]))
print(resarrs[0][0])

[CLS] The US President was to receive Iraqi Prime Minister Nouri Al Maliki Friday, November 1, 2013 in an effort to seek US assistance in fighting the worst wave of violence in five years. [SEP]
The US President was to receive Iraqi Prime Minister Nouri Al Maliki Friday, November 1, 2013 in an effort to seek US assistance in fighting the worst wave of violence in five years.


In [None]:
def get_acc(ylabs, thepred):
    # simplify prediction tensors
    ysimp = ylabs
    psimp = torch.argmax(thepred, dim=2)
    # clean up labels
    sm = subword_mask_all(sents)
    ysimp[sents==0] = 0
    ysimp[sents==102] = 0
    ysimp[sm==0] = 0
    ysimp[:, 0] = 0
    # apply cleanaup to x 
    psimp[ysimp==0] = 0
    # apply cleanaup to x 
    psimp[ysimp==0] = 0
    # compute accuracy
    acc = 1 - ((ysimp-psimp).count_nonzero())/ysimp.count_nonzero()
    return acc, ysimp, psimp

def flex_acc(ylabs, thepred, tmaps):
    ysimp = ylabs
    psimp = torch.argmax(thepred, dim=2)
    # clean up labels
    sm = subword_mask_all(sents)
    ysimp[sents==0] = 0
    ysimp[sents==102] = 0
    ysimp[sm==0] = 0
    ysimp[:, 0] = 0
    # apply cleanaup to x 
    psimp[ysimp==0] = 0
    # apply cleanup to sents
    scop = torch.clone(sents)
    scop[ysimp==0]=0
    tot=0
    cnt = 0
    # go through valid tokens
    for pos in torch.nonzero(scop):
        tk = str(int(scop[pos[0], pos[1]]))
        if int(psimp[pos[0], pos[1]]) in tmaps[pos[0]][tk]:
            cnt+=1
        else:
            print(int(psimp[pos[0], pos[1]]), " ", tmaps[pos[0]][tk])
        tot+=1
    print(tot)
    return cnt/tot
            
flex_acc(latposylabels, pred1, tmaps)

NameError: name 'latposylabels' is not defined

In [49]:
a, ysimp, psimp = get_acc(latposylabels, pred1)

In [137]:
lablist = [l for l in labels.keys()]
def get_err_tensors():
    diff = ysimp-psimp
    diff = diff.abs().bool().int()
    errg = ysimp
    errp = psimp
    errg[diff==0] = 0
    errp[diff==0] = 0
    return errg, errp

errg, errp = get_err_tensors()

NameError: name 'ysimp' is not defined

In [9]:
# load token label maps, use to get y-labels
N_EX = 101
tmaps = []
for i in range(0, N_EX):
    with open('./torchsaved/'+LOADED['tmaps']+'tmaps_'+str(i)+'.pkl', 'rb') as file:
        tmaps.append(pickle.load(file)[0])

In [10]:
# TODO y-labels are wrong currently, go back in to get rid of pad / subword stuff
latposylabels = tmap_pos_goldlabels(tmaps, sents)    
torch.cuda.empty_cache()

# save data to run later
outputdata = {}
outputdata['tmaps'] = tmaps
outputdata['masks'] = attmasks
outputdata['pgraphs'] = processedgraphs

with open('./torchsaved/outputv'+str(VNUM)+'.pkl', 'wb') as file:
    # dump information to that file
    pickle.dump(outputdata, file)

In [None]:
# Make all predictions with ablations
sents, posids = create_inputs(processedgraphs)
pred1 = posbmod(sents, mod_posids(posids), attmasks)

In [11]:
sents, posids = create_inputs(processedgraphs)
pred2 = posbmod(sents, None, attmasks)
print(check_accuracy(pred2, latposylabels, sents))

tensor(0.8899, device='cuda:0')


In [8]:
print(check_accuracy(pred1, latposylabels, sents))
sents, posids = create_inputs(processedgraphs)
pred2 = posbmod(sents, fix_posids(posids), attmasks)
print(check_accuracy(pred2, latposylabels, sents))
sents, posids = create_inputs(processedgraphs)
pred3 = posbmod(sents, mod_posids(posids), None)
print(check_accuracy(pred3, latposylabels, sents))
sents, posids = create_inputs(processedgraphs)
pred4 = posbmod(sents, fix_posids(posids), None)
print(check_accuracy(pred4, latposylabels, sents))

tensor(0.8894, device='cuda:0')
tensor(0.8899, device='cuda:0')
tensor(0.9635, device='cuda:0')
tensor(0.9894, device='cuda:0')


In [26]:
sents, posids = create_inputs(processedgraphs)
mpids = mod_posids(posids)
sents, posids = create_inputs(processedgraphs)
fpids = fix_posids(posids)

KeyboardInterrupt: 

In [125]:
sents, posids = create_inputs(processedgraphs)


In [None]:
for r in res:
            try:
                msk = torch.zeros(MAX_LEN, MAX_LEN)
                toktmp = torch.tensor(bert_tok(clean_expanded(r)).input_ids)
                msk[:len(toktmp), :len(toktmp)] = torch.ones(len(toktmp), len(toktmp))
                msk = torch.tril(msk)
                #msk = msk[:MAX_LEN, :MAX_LEN]
                #print(toktmp.shape)
                if float(toktmp.shape[0])<MAX_LEN:
                    dlen = MAX_LEN-toktmp.shape[0]
                    toktmp = torch.cat([toktmp, torch.zeros(dlen)])
                else:
                    toktmp = toktmp[:MAX_LEN]
                curinps.append(toktmp)
                curmasks.append(msk)
            except:
                print("weird error happened") 

In [None]:
dsetx, dsety, cmasks = prepare_dataset(resarrs, posbmod)

In [None]:
for a in range(len(attmasks)):
    print(torch.sum(cmasks[a][0].to(device)-attmasks[a]))

In [88]:
torch.sum((cmasks[0]-attmasks[0]))

TypeError: unsupported operand type(s) for -: 'list' and 'Tensor'

In [69]:
len(dsetx[0][0].nonzero())

40

In [70]:
len(sents[0].nonzero())

40

IndexError: index 1 is out of bounds for dimension 0 with size 1

In [72]:
attmasks[0][32]

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
        1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 

In [63]:
dlist = []
for i in range(len(dsetx)):
    isdiff = torch.sum((dsetx[i][0]-sents[i]).bool().int())>0
    if isdiff:
        dlist.append(i)

In [44]:
def print_decoded(toks):
    nt = []
    for t in toks:
        if t==100 or t==101 or t==102 or t==0:
            continue
        nt.append(t)
    #print(tok.decode(nt))
    return bert_tok.decode(nt)

sentdiff = [print_decoded(sents[i]) for i in dlist]
golddiff = [print_decoded(dsetx[i][0]) for i in dlist]


In [46]:
print(list(zip(sentdiff, golddiff)))

[('The Americans ’ risk can be double.', 'The'), ('Men were being killed in the street or even in their homes, and security forces were frequently attacked as well.', 'Men'), ('I think there are two levels of response by the French Government.', 'I'), ('The return process will be the same in return.', 'The')]


In [47]:
[resarrs[i] for i in dlist]

[['The', 'The Americans ’ risk can be double.'],
 ['Men',
  'Men were',
  'Men were being killed in the street or even in their homes, and security forces were frequently attacked as well.'],
 ['I', 'I think there are two levels of response by the French Government.'],
 ['The', 'The return process will be the same in return.']]

In [110]:
yval = torch.argmax(dsety[0], dim=2)[0][:40]

In [111]:
pval = torch.argmax(, dim=2)[0][:40]

In [112]:
yval-pval

tensor([ -5,   0,   0,   0,   0,   0,   0,   0,   0,   0, -13,   0,   0,   0,
          0,   0,   0,   1,   0,   0,   0,   0,   0,   0,   0,   0,  18,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0],
       device='cuda:0')

In [18]:
def check_pred(ind):
    pred = posbmod(sents[ind:ind+1], pos_ids=None, attmasks=attmasks[ind:ind+1])
    yval = dsety[ind]
    print(torch.sum(pred-yval))

In [19]:
check_pred(0)

tensor(0., device='cuda:0', grad_fn=<SumBackward0>)


In [13]:
fix_posids(posids[0:0+1])

tensor([[  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
          14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
          28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,
          42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,
          56,  57,  58,  59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,
          70,  71,  72,  73,  74,  75,  76,  77,  78,  79,  80,  81,  82,  83,
          84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,  95,  96,  97,
          98,  99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111,
         112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125,
         126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
         140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153,
         154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167,
         168, 169, 170, 171, 172, 173, 174, 175, 176