# Reddit AITA Finetuned Model Testing

In [None]:
%pip install peft transformers datasets accelerate bitsnbytes

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoModelForCausalLM, AutoTokenizer
from datasets import load_dataset
import torch
from random import randrange
import textwrap

In [None]:
from huggingface_hub import login
login()

# Flan-T5 Finetuned Model

In [None]:
from datasets import load_dataset
dataset = load_dataset("MattBoraske/reddit-AITA-binary-submissions-and-comments-top-2k")

In [None]:
hf_repo = "MattBoraske/flan-t5-xl-reddit-AITA-binary-top-2k"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(hf_repo)
model = AutoModelForSeq2SeqLM.from_pretrained(
  hf_repo,
  device_map='auto',
  torch_dtype=torch.bfloat16
)

In [None]:
sample = dataset['test'][randrange(len(dataset["test"]))]
print(sample['submission_text'])
print()
print(sample['top_comment_1_classification'])
print()
print(sample['top_comment_1'])

In [None]:
input_ids = tokenizer(sample['flanT5_instruction'], max_length=1024, padding='max_length', return_tensors="pt", truncation=True).input_ids.cuda()

for i in range(5):

  outputs = model.generate(
    input_ids=input_ids,
    # max new tokens set to 256 as part of model generation config
  )

  prediction = tokenizer.decode(outputs[0].detach().cpu().numpy(), skip_special_tokens=True)
  print(textwrap.fill(prediction, width=100))
  print()

# Llama-2 Finetuned Model

In [None]:
from datasets import load_dataset
dataset = load_dataset("MattBoraske/reddit-AITA-binary-submissions-and-comments")

In [None]:
hf_repo = "MattBoraske/llama-2-7b-chat-reddit-AITA-binary"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(hf_repo)
model = AutoModelForCausalLM.from_pretrained(
  hf_repo,
  device_map='auto',
  torch_dtype=torch.bfloat16
)

In [None]:
sample = dataset['test'][randrange(len(dataset["test"]))]
print(sample['submission_text'])
print()
print(sample['top_comment_1_classification'])
print()
print(sample['top_comment_1'])

In [None]:
input_ids = tokenizer(sample['llama2_instruction'], return_tensors="pt").input_ids.cuda()

for i in range(5):

  outputs = model.generate(
    input_ids=input_ids,
    max_new_tokens=256 # to guard against really long (unintended) generations where the model keeps repeating itself
  )

  prediction = tokenizer.decode(outputs[0].detach().cpu().numpy(), skip_special_tokens=True)
  print(textwrap.fill(prediction, width=200))
  print()