In [32]:
# import os
# os.environ['CUDA_VISIBLE_DEVICES'] = '2'
# os.environ['CUDA_VISIBLE_DEVICES']

'2'

In [33]:
from tqdm import tqdm
from torch.nn import CrossEntropyLoss
from transformers import GPT2Tokenizer, GPT2LMHeadModel

In [37]:
tokenizer = GPT2Tokenizer.from_pretrained('gpt2-xl')
tokenizer.pad_token = tokenizer.eos_token
model = GPT2LMHeadModel.from_pretrained('gpt2-xl')
padding_token_id = tokenizer.encode(tokenizer.eos_token )[0]
model.eval()

model.cuda()

RuntimeError: CUDA out of memory. Tried to allocate 308.00 MiB (GPU 0; 10.92 GiB total capacity; 9.82 GiB already allocated; 139.31 MiB free; 9.99 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [35]:
relation_to_text_dict = {
    "Co_Occurrence": ", ",
    "Conjunction": ", and ",
    "Contrast": ", but ",
    "Precedence": ", and then ",
    "Synchronous": ", and at the same time ",
    "Succession": ", and previously ",
    "Result": ", and as a result ",
    "Reason": ", because ",
    'Condition': ", if ",
    "Alternative": ", or ",
    "Concession": ", although ",
    "Exception": ", except ",
    'Restatement': ", in other words ",
    "ChosenAlternative": ", and instead ",
    "Instantiation": ", for example "
}

In [9]:
def compute_score(sentences):
    inputs = tokenizer(sentences, return_tensors="pt", padding='max_length', truncation=True, max_length=24)
    labels = inputs["input_ids"].clone()
    labels[labels == padding_token_id] = -100

    inputs = {key: value.cuda() for key, value in inputs.items()}
    labels = labels.cuda()

    outputs = model(**inputs, labels=labels)

    lm_logits = outputs.logits

    # Shift so that tokens < n predict n
    shift_logits = lm_logits[..., :-1, :].contiguous()
    shift_labels = labels[..., 1:].contiguous()

    # Flatten the tokens
    loss_fct = CrossEntropyLoss(reduction='none')
    loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1)).view(len(sentences), -1)

    num_tokens_per_sentence = inputs["attention_mask"].sum(dim=-1) - 1
    loss_sum_per_sentence = loss.sum(dim=-1)
    average_loss_per_token = loss_sum_per_sentence / num_tokens_per_sentence

    loss = average_loss_per_token.cpu().detach().numpy()

    return loss

def score_edge_batch(batch, Graph=None, NewGraph=None):
    batch_sentences = [h + relation_to_text_dict[key] + r + "."   for h, key, r in batch]
    scores = compute_score(batch_sentences)

    for i in range(len(batch)):
        h, key, r = batch[i]
        gpt2_score = scores[i]
#         print(h, key, r)
#         print(batch_sentences[i])
#         print(gpt2_score)

        if NewGraph:
            assert Graph is not None, 'Graph must exist to provide aser edge scores!'
            if NewGraph.has_edge(h, r):
                NewGraph[h][r][key] = {'GPT-2': gpt2_score, 'discourse': Graph[h][r]['relation'][key]}
            else:
                NewGraph.add_edges_from([(h, r, {key: {'GPT-2': gpt2_score, 'discourse': Graph[h][r]['relation'][key]}})])
                
def score_graph(Graph, batch_size=128):
    batch = []
    NewGraph = nx.DiGraph()
    for h, t in tqdm(Graph.edges):
        for key in Graph[h][t]['relation']:
            batch.append((h, key, t))
            if len(batch) == batch_size:
#                 print(batch)
                score_edge_batch(batch, Graph, NewGraph)
                batch = []
    if len(batch) > 0:
        score_edge_batch(batch, Graph, NewGraph)
        batch = []
    return NewGraph
                
            

In [36]:
data = [('you be a genius','Co_Occurrence','you have money'), 
        ('you be a genius', 'Conjunction', 'i love you'),
        ('you be a genius', 'Conjunction', 'you will do so'),
        ('you be a genius', 'Synchronous', 'it come'),
        ('you be a genius', 'Condition', 'you be think')]
score_edge_batch(data)

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cpu and cuda:0! (when checking argument for argument index in method wrapper__index_select)

In [7]:
import networkx as nx
import pickle5 as pickle
with open('/home/data/jchengaj/aser_data/core_100_filtered.pickle', 'rb') as fb:
    aser = pickle.load(fb)
# aser = nx.read_gpickle('/home/data/jchengaj/aser_data/core_100.pickle')

In [None]:
scored_aser = score_graph(aser, 128)

In [None]:
with open('/home/data/jchengaj/aser_data/core_100_filtered_gpt_scored.pickle', 'wb') as fb:
    pickle.dump(scored_aser, fb)