In [4]:
from transformers import T5ForConditionalGeneration, T5Tokenizer
import torch
from datasets import load_dataset
import pandas as pd
from readability import Readability
import numpy as np

device = 'cuda:3' # if you have a GPU

In [None]:
# get generation model
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-xl")
model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-xl").to(device)

# get shp model
steamtok = T5Tokenizer.from_pretrained('stanfordnlp/SteamSHP-flan-t5-xl')
steamshp = T5ForConditionalGeneration.from_pretrained('stanfordnlp/SteamSHP-flan-t5-xl').to(device)

In [3]:
eli5 = load_dataset("stanfordnlp/shp", data_dir="explainlikeimfive")

Using custom data configuration stanfordnlp--shp-9d14343f6fc5ff1a
Reusing dataset json (/home/prasann/.cache/huggingface/datasets/stanfordnlp___json/stanfordnlp--shp-9d14343f6fc5ff1a/0.0.0/ac0ca5f5289a6cf108e706efcf040422dbbfa8e658dee6a819f20d76bb84d26b)


  0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
# make prompt for eli5
def construct_prompt(row):
    template = \
"""
The system will write a detailed and long post to respond to the user's question. Explain like the user is five years old. 

Question: """
    inp = template+row['history']+"\n Detailed Response:"
    
    
    return inp

# score a single example (I don't think there's enough space to batch this?)
def get_reward_single(inpdict):
    template = "POST: {context:s} \n\nRESPONSE A:{hyp:s} \n\nRESPONSE B: .\n\n Which response is better? RESPONSE "
    inp = template.format(context=inpdict['context'], hyp=inpdict['hyp'])
    x = steamtok([inp], return_tensors='pt').input_ids.to(device)
    outputs = steamshp.generate(x, return_dict_in_generate=True, output_scores=True, max_new_tokens=1)
    return torch.exp(outputs.scores[0][:, 71]) / torch.exp(outputs.scores[0][:,:]).sum(axis=1).item() # index 71 corresponds to the token for 'A'

# generate output for an input row
def gen_row(rw, tok, mod, greedy=False, log=False):
    input_text = construct_prompt(rw)
    
    #print(input_text)
    input_ids = tok(input_text, return_tensors="pt").input_ids.to(device)
    if greedy:
        outputs = mod.generate(input_ids, min_new_tokens=20, max_new_tokens=200)
        outs = [tok.decode(outputs[0], skip_special_tokens=True)]
    else: 
        outputs = mod.generate(input_ids, min_new_tokens=20, max_new_tokens=200, do_sample=True, top_p=.9, temperature=.9, num_return_sequences=10)
        outs = [tok.decode(o, skip_special_tokens=True) for o in outputs]
    if log:
        print(input_text+"\n"+str(outs))
    return rw['history'], outs

def gen_dir_beam(rw, tok, mod, pflen, keepR):
    
    # generate with initial sample
    inp, outs = gen_row(rw, tok, mod, False)
    # generate scores to re-rank, only use best options for next step
    shp_scores = [float(get_reward_single({"context": inp, "hyp":o})) for o in outs]
    bestopts = list(np.argsort(shp_scores)).reverse()[:keepR]
    
    
    
    
    
    