In [1]:
#@title Install software dependencies
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

import torch
major_version, minor_version = torch.cuda.get_device_capability()
# Must install separately since Colab has torch 2.2.1, which breaks packages
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

if major_version >= 8:
    # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    # Use this for older GPUs (V100, Tesla T4, RTX 20xx)
    !pip install --no-deps xformers trl peft accelerate bitsandbytes
pass
!pip install convokit
!pip install datasets
!pip install numpy
!pip install triton

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-38uz_fy_/unsloth_474c57b908d04e5e80ad7562d957c63c
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-38uz_fy_/unsloth_474c57b908d04e5e80ad7562d957c63c
  Resolved https://github.com/unslothai/unsloth.git to commit 933d9fe2cb2459f949ee2250e90a5b610d277eab
  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-vjam4s_c/unsloth_aaf409f93a444ea7bf5b1e52b461d5ec
  Running command git clone --filter=blob:none --quie

In [2]:
from convokit import Corpus, download
import pandas as pd
import unsloth
from unsloth import FastLanguageModel
from transformers import AutoTokenizer
from datasets import Dataset

from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np



  from .autonotebook import tqdm as notebook_tqdm
    PyTorch 2.2.2+cu121 with CUDA 1201 (you have 2.3.1)
    Python  3.11.8 (you have 3.11.9)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


Load corpus data

In [3]:
%%capture
corpus = Corpus(filename=download("winning-args-corpus"))

In [4]:
ids = corpus.get_utterance_ids()

print(len(ids))

293297


In [5]:
SPEAKER_BLACKLIST = ['DeltaBot','AutoModerator']
training_trios = []

for id in ids:
  ut = corpus.get_utterance(id)
  if ut.reply_to == ut.conversation_id and (ut.meta['success'] == 1 or ut.meta['success'] == 0) and (ut.speaker.id not in SPEAKER_BLACKLIST):
    op = corpus.get_utterance(ut.conversation_id).text
    x = ut.text
    y = ut.meta['success']

    training_trios += [(op, x, y)]

print(len(training_trios))

train_len = len(training_trios)

8106


In [6]:
max_seq_length = 8000 
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

==((====))==  Unsloth: Fast Llama patching release 2024.6
   \\   /|    GPU: NVIDIA A100 80GB PCIe. Max memory: 79.138 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.1. CUDA = 8.0. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. Xformers = 0.0.25.post1. FA = True.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [7]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

Unsloth 2024.6 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [8]:
# Function to move inputs to device
def to_device(inputs, device):
    if isinstance(inputs, dict):
        return {key: to_device(value, device) for key, value in inputs.items()}
    elif isinstance(inputs, list):
        return [to_device(item, device) for item in inputs]
    return inputs.to(device)

# Custom trainer to ensure inputs are on the correct device
class CustomSFTTrainer(SFTTrainer):
    def training_step(self, model, inputs):
        inputs = to_device(inputs, device)
        return super().training_step(model, inputs)

In [9]:
import itertools

# Define Alpaca prompt template
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
"""

#EOS_TOKEN = tokenizer.eos_token  # Ensure this is defined

# Function to format data according to the Alpaca template
def formatting_prompts_func(training_trios):
    texts = []
    targets = []
    for trio in training_trios:
        op, x, y = trio
        instruction = "Please determine if the following argument is successful based on the original post.  Output 1 for successful and 0 for unsuccessful.  Only output the one number, NOTHING ELSE."
        input_context = f"Original post: {op}\nArgument: {x}"

        # Format the text without including the target in the response
        text = alpaca_prompt.format(instruction, input_context) #+ EOS_TOKEN
        texts.append(text)
        targets.append(y)

    return texts, targets

# Format the data
texts, targets = formatting_prompts_func(training_trios)

l = int(len(texts) * 0.9)

train = {"text": texts[:l], "target": targets[:l]}
test = {"text": texts[l:], "target":targets[l:]}

In [10]:
from datasets import Dataset

dataset = Dataset.from_dict(train)
test_ds = Dataset.from_dict(test)

print(dataset)
print(test_ds)

Dataset({
    features: ['text', 'target'],
    num_rows: 7295
})
Dataset({
    features: ['text', 'target'],
    num_rows: 811
})


In [11]:
trainer = CustomSFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,  # Can make training 5x faster for short sequences.
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=60,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
    ),
)

Map (num_proc=2): 100%|██████████| 7295/7295 [00:11<00:00, 642.39 examples/s]
Detected kernel version 5.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
max_steps is given, it will override any value given in num_train_epochs


In [12]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 7,295 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 60
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,2.4972
2,2.4711
3,2.4928
4,2.5515
5,2.4411
6,2.4036
7,2.4012
8,2.0825
9,2.1587
10,2.0588


In [16]:
def to_device(inputs, device):
    for key in inputs:
        inputs[key] = inputs[key].to(device)
    return inputs

def evaluate_model(trainer, test_dataset, tokenizer, max_seq_length, device):
    model = trainer.model
    model.eval()
    test_texts = test_dataset['text']
    test_targets = test_dataset['target']
    
    predictions, true_labels = [], []

    for text, target in zip(test_texts, test_targets):
        inputs = tokenizer(text, return_tensors='pt', max_length=max_seq_length, padding=True, truncation=True)
        inputs = to_device(inputs, device)

        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=1)
        
        prediction = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

        print(prediction)
        
        predictions.append(int(prediction))
        true_labels.append(int(target))
    
    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions)
    recall = recall_score(true_labels, predictions)
    f1 = f1_score(true_labels, predictions)
    
    return {
        'accuracy': accuracy,
        'precision': precision,
        'recall': recall,
        'f1': f1
    }

# Assuming `test_ds` is the test dataset, `trainer` is your trainer instance,
# and `tokenizer` is the tokenizer you're using.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
eval_metrics = evaluate_model(trainer, test_ds, tokenizer, 8000, device)
print(eval_metrics)


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Please determine if the following argument is successful based on the original post.  Output 1 for successful and 0 for unsuccessful.  Only output the one number, NOTHING ELSE.

### Input:
Original post: A multitude of films have twists in the plot, for example Gone Girl, Memento, a lot of films of M. Night Shyamalan, the Usual Suspects, Shutter Island, Fight club, well I can go on and on. [Here is a list of 100 twists](http://www.imdb.com/list/ls070836741/). 

It usually consists of a plot that advances, but with hidden information not completely revealed yet. When the information is revealed, it will heighten the liking of the plot. It feels 'Mind blowing', the movie instantly goes deeper, it is suddenly a more complex movie, and twists are ideal for deep analysis and allegories. 

Adding a twist to a plot can 

ValueError: invalid literal for int() with base 10: 'Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nPlease determine if 

In [None]:
with open("/home/arjunsohur/persuasion/private_/hf_token_6_18_24", "r") as f:
  token = f.readline()

In [None]:
model.push_to_hub("ArjunSohur/lm3_argument", token = token) # Online saving
tokenizer.push_to_hub("ArjunSohur/lm3_argument_tokenizer", token = token) # Online saving

HfHubHTTPError:  (Request ID: Root=1-6675a0d7-5a50421d3a2173124702eb89;535c1f33-3fd9-46ca-8265-eab740807836)

403 Forbidden: Forbidden: you must use a write token to upload to a repository..
Cannot access content at: https://huggingface.co/api/models/ArjunSohur/lm3_argument/preupload/main.
If you are trying to create or update content,make sure you have a token with the `write` role.