In [None]:
import pandas as pd 
from transformers import pipeline 
from tqdm.auto import tqdm
from pprint import pprint

In [None]:
from transformers import AutoTokenizer
def run_gpt_generation(
    model_path,
    save_path, 
    num_run = 1000
):

    ### Load Pre-Trained Model ### 
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    pipe = pipeline('text-generation', model = model_path, tokenizer = model_path, device=0, pad_token_id = tokenizer.eos_token_id)
    
    outputs = []
    
    prompts = ['Patient Message:' for _ in range(num_run+50)] # add a few extra generations incase any are empty. Rarely happens. 
    out = pipe(prompts, batch_size = 32, temperature=0.75, max_new_tokens = 256)

    ### Post Process Outputs ###
    out = [x[0]['generated_text'].replace('Patient Message:','').strip() for x in out] # Post process outputs to not include the prompt. 
    out = [x for x in out if len(x) > 1][:num_run] # Remove any empty outputs. Take the first num_run remaining. 

    if len(out) != num_run:
        raise 
    
    for sample in out:
        outputs.append({'patient_message':sample})

    df = pd.DataFrame(outputs)
    df.to_csv(save_path)
    return df 
    