In [1]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 3000 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model_original, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.9.post3: Fast Llama patching. Transformers = 4.45.1.
   \\   /|    GPU: NVIDIA GeForce RTX 4070 Ti SUPER. Max memory: 15.688 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.1+cu124. CUDA = 8.9. CUDA Toolkit = 12.4.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post1. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth




In [2]:
model = FastLanguageModel.get_peft_model(
    model_original,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2024.9.post3 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [5]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN

In [6]:
# Knowledge on how to persuade the poster, extracted from the paper: Winning Arguments: Interaction Dynamics and Persuasion
# Strategies in Good-faith Online Discussions
knowledge = "Hint: 1. Language Dissimilarity with Original Post: Persuasive replies use different content words but match in stopwords 2.Reply Length: Longer replies tend to be more persuasive, as they can convey more information and elaborate on points effectively. 3. Language Dissimilarity with Original Post: Persuasive replies use different content words but match in stopwords. 4. Links and Evidence: Including links as evidence in an argument increases the chances of persuasion. 5. Calmer Tone: Replies that use calmer, less intense language are more likely to persuade, as they come across as more composed. 6. Positive Emotion and Sentiment: Persuasive replies include a mix of positive and negative sentiment."

instruction_1 = f"This is a conversation from an online discussion community. The first was a poster who posted an opinion, and the next two replies were each trying to convince the poster to revise his opinion. Analyze the persuasiveness of the two replies respectively in two paragraphs with the following format: First Reply: your analysis here.\n Second Reply: your analysis here.\n Note only analysis, not conclusions. \n {knowledge}"

instruction_2 = f"You're a semantic analyst. The following two paragraphs are analysis on the persuasiveness of two replies. These replies are from an online discussion community that are trying to convince the original poster to revise its opinion. Based on these analysis, which reply do you think that successfully persuaded the original poster? \n Answer only with first or second.\n {knowledge}"

In [9]:
from datasets import load_dataset
dataset = load_dataset("json", data_files={"train": "pairs_train_alpaca.jsonl",
                                           "test": "pairs_test_alpaca.jsonl",})

dataset_train = dataset["train"]

from tqdm import tqdm
def infer(dataset, model, instruction):
  results = []
  for line in tqdm(dataset, desc="Inferring on test set: "):
      inputs = tokenizer(
      [
          alpaca_prompt.format(
              instruction, # instruction
              line["input"], # input
              "") # output - leave this blank for generation!
      ], return_tensors = "pt").to("cuda")
      outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
      result = tokenizer.batch_decode(outputs)[0]

      start_marker = "### Response:\n"
      end_marker = "<|end_of_text|>"
      result = result[result.find(start_marker) + len(start_marker):
                      result.find(end_marker)]

      results.append(result)
  return results

FastLanguageModel.for_inference(model_original)
stage_one_results = infer(dataset_train, model_original, instruction_1)

Inferring on test set: 100%|██████████| 1000/1000 [18:35<00:00,  1.12s/it]


In [None]:
from datasets import Dataset
import json
with open("finetune_datasets/pairs_train_alpaca.jsonl", "r") as f:
    targets = [json.loads(line)["output"] for line in f]
    
def formatting_prompts_func(results, targets):
    inputs = results
    outputs = dataset["output"]
    instructions = [op_instruction] * len(inputs)
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
