In [None]:
!git clone https://github.com/NichoJen/langmod-cogmod-project.git

In [None]:
# Insert the directory
import sys
sys.path.insert(0,"/content/langmod-cogmod-project")

In [None]:
# dataset path
aita_dataset_path = "langmod-cogmod-project/data/aita_dataset_clean.json"

In [None]:
!pip install transformers accelerate bitsandbytes

In [None]:
# load model
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "meta-llama/Llama-2-7b-chat-hf"
print("Enter llama 2 access token")
access_token = input()


model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_4bit=True, use_auth_token=access_token)
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, use_auth_token=access_token)

In [None]:
from transformers import pipeline
import torch

llama_2_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.float32,
    device_map="auto"
)

In [None]:
def aita_anlysis_llama2(system_prompt, content, pipeline, do_sample=False):

  aita_input = system_prompt(content)

  print(aita_input)


  sequences = pipeline(
        aita_input,
        do_sample=do_sample,
        top_k=1,
        num_return_sequences=1,
        max_new_tokens=50,
        return_full_text=False,
        temperature=.1,
        top_p=0.95)

  for seq in sequences:
        print(f"Result: {seq['generated_text']}")

  return sequences

In [None]:
def results_to_json_file(results, path):
    with open(path, "w", encoding='utf-8') as f:
        json.dump(results, f, indent=4, sort_keys=True)

In [None]:
# prompt for answering with asshole or not asshole
system_prompt_aita = lambda x: f"""
        [INST]
        <<SYS>>
       You are an expert at judging morally complex situations. Your job is to determine whether the person writing a post in an online forum is the asshole or not the asshole.
       Analyze the text and determine if the author is in the asshole or not in the given text.\n
      Answer only with your judgement: "asshole: yes" or "asshole: no" and nothing else.
        <</SYS>>
        {x}
        [/INST]\n
    """

In [None]:
# prompt for answering with yes or no
system_prompt_yes_no = lambda x: f"""
        [INST]
        <<SYS>>
       You are an expert at judging morally complex situations. Your job is to determine whether the person writing a post in an online forum is in the wrong or not.
       Analyze the text and determine if the author is in the wrong in the given text.\n
      Do not answer in a complete sentence, and answer only with: "yes" or "no".
        <</SYS>>
        {x}
        [/INST]\n
    """

In [None]:
import json
with open(aita_dataset_path) as f:
  aita_dataset = json.load(f)

In [None]:
from collections import defaultdict

In [None]:
# make yes no results
results_yes_no = []
outputs_yes_no = defaultdict(int)
for example in aita_dataset:
  prediction = aita_anlysis_llama2(system_prompt_yes_no, example["body"], llama_2_pipeline, do_sample=True)[0]["generated_text"]
  outputs_yes_no[prediction] += 1
  result = {"id": example["id"],"prediction": prediction, "aita_user_verdict": example["verdict"], "is_asshole": example["is_asshole"]}
  results_yes_no.append(result)

In [None]:
# view outputs for yes no
for output, count in outputs_yes_no.items():
  print(f"output: {output}, count: {count}")

In [None]:
results_to_json_file(results_yes_no, "results_yes_no.json")

In [None]:
# make aita results
results_aita = []
outputs_aita = defaultdict(int)
for example in aita_dataset:
  prediction = aita_anlysis_llama2(system_prompt_aita, example["body"], llama_2_pipeline, do_sample=True)[0]["generated_text"]
  outputs_aita[prediction] += 1
  result = {"id": example["id"],"prediction": prediction, "aita_user_verdict": example["verdict"], "is_asshole": example["is_asshole"]}
  results_aita.append(result)

In [None]:
# view outputs for aita
for output, count in outputs_aita.items():
  print(f"output: {output}, count: {count}")

In [None]:
results_to_json_file(results_aita, "results_aita_prompt_1.json")