## Installing packages

In [9]:
# !pip3 install git+https://github.com/huggingface/transformers.git@72958fcd3c98a7afdc61f953aa58c544ebda2f79

!pip3 install git+https://github.com/casper-hansen/AutoAWQ.git@1c5ccc791fa2cb0697db3b4070df1813f1736208


In [1]:
!pip install autoawq;

Collecting autoawq
  Downloading autoawq-0.2.8.tar.gz (71 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.6/71.6 kB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting transformers<=4.47.1,>=4.45.0 (from autoawq)
  Downloading transformers-4.47.1-py3-none-any.whl.metadata (44 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.1/44.1 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
Collecting datasets>=2.20 (from autoawq)
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets>=2.20->autoawq)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets>=2.20->autoawq)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets>=2.20->autoawq)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Coll

In [1]:
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer

import csv

model_name_or_path = "TheBloke/Mistral-7B-v0.1-AWQ"

## Loading datasets

In [2]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
import os
path = "/content/drive/MyDrive/Awaab98 llms_argumentation master replication-Datasets"
datasets_names = [f for f in os.listdir(path) if os.path.isdir(os.path.join(path, f))]
datasets_names.remove("few_shot")

In [4]:
from datasets import load_from_disk

In [5]:
ds = load_from_disk(path + "/" + datasets_names[0])

## Prompt

In [6]:
def generate_prompt(arg1, arg2, nr=False, support_label="Support", attack_label="Attack", no_label="No", n_shot="0", primer=None, instruction=True, **_):
      def formatter(relation):
          logits = None
          if type(relation) == tuple:
              logits = relation[1]
              relation = relation[0]

          if (rel := relation.replace("Relation:", "").strip()) not in [support_label, attack_label, no_label]:
              if logits:
                  logits = {token.token: token.logprob for token in logits}
                  support=attack=no=-float('inf')
                  if support_label in logits.keys():
                      support = logits[support_label]

                  if attack_label in logits.keys():
                      attack = logits[attack_label]

                  if no_label in logits.keys():
                      support = logits[no_label]

                  if support > attack and support > no:
                      return 1
                  elif attack > support and attack > no:
                      return 0
                  elif no > attack and no > support:
                      return 2
              return -1

          return (1 if rel == support_label else 0) if rel != no_label else 2

      constraints = {
          "constraint_prefix": "Relation:",
          "constraint_options": [support_label, attack_label] + ([no_label] if nr else []),
          "constraint_end_after_options": True,
      }

      instructions = (f"In this task, you will be given two arguments and your goal is to classify " +
                      (f"the relation between them as either “{support_label}”, or “{attack_label}” based on the " if not nr else
                      f"the relation between them as either “{support_label}”, “{attack_label}”, or “{no_label}” based on the ") +
                      f"definitions below.\n'{support_label}': It is an argument that is in favour of to the parent "
                      f"argument.\n'{attack_label}': It is an argument that contradicts or opposes the parent "
                      f"argument.\n" + (f"\n" if not nr else f"'{no_label}': It is an argument that has no relation "
                                                              f"to the parent argument.\n"))
      if not instruction:
          instructions = ""

      if n_shot != "0":
          file = open(path + f"/few_shot/{n_shot}/{primer}")
          reader = csv.reader(file, delimiter='#', quotechar='|', quoting=csv.QUOTE_MINIMAL)
          primer = ""
          for row in reader:
              primer += (f"Arg1: {row[0]}\nArg2: {row[1]}\nRelation: "
                          f"{(support_label if row[2] == '1' else attack_label) if row[2] != '2' else no_label}\n\n")
      else:
          primer = ""

      prompt = instructions + primer + f"Arg1: {arg1}\nArg2: {arg2}"

      return prompt, constraints, formatter

In [11]:
N_SHOT = "1A1S"
PRIMER = "seed_3.csv"
prompt, constraints, format_fn = generate_prompt(ds["arg1"][0], ds["arg2"][0], N_SHOT, instruction=True,
        primer=PRIMER, n_shot=N_SHOT)

In [12]:
print(prompt)

In this task, you will be given two arguments and your goal is to classify the relation between them as either “Support”, “Attack”, or “No” based on the definitions below.
'Support': It is an argument that is in favour of to the parent argument.
'Attack': It is an argument that contradicts or opposes the parent argument.
'No': It is an argument that has no relation to the parent argument.
Arg1: Elections would limit the influence of lobbyists on the appointment of Supreme Court judges.
Arg2: The more individuals take part in a decision, as would be the case in a popular vote compared to a vote in the Senate, the harder it is to sway the outcome.
Relation: Support

Arg1: ChatGPT will reach AGI level before 2030.
Arg2: To reach AGI it should be able to generate its own goals and intentions: where would it draw these from?
Relation: Attack

Arg1: I do n't take the views that the only alternative to a dictator is a Communist dictator . If the United States had just had its influence , and 

## Model initialization

In [13]:
model = AutoAWQForCausalLM.from_quantized(model_name_or_path, fuse_layers=True,
                                          trust_remote_code=False, safetensors=True)
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, trust_remote_code=False)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Fetching 11 files:   0%|          | 0/11 [00:00<?, ?it/s]

Replacing layers...: 100%|██████████| 32/32 [00:10<00:00,  3.01it/s]


In [14]:
# Requires < 5 GB of GPU memory

tokens = tokenizer(
    prompt,
    return_tensors='pt'
).input_ids.cuda()

generation_output = model.generate(
    tokens,
    do_sample=True,
    temperature=0.7,
    top_p=0.95,
    top_k=40,
    max_new_tokens=512
)

In [19]:
format_fn(tokenizer.decode(generation_output[0]))

-1

TODO:
- llm manager
- pair generation