In [None]:
%%capture
!pip install pip3-autoremove
!pip-autoremove torch torchvision torchaudio -y
!pip install torch torchvision torchaudio xformers --index-url https://download.pytorch.org/whl/cu121
!pip install unsloth
!pip install --upgrade --no-cache-dir transformers
import os
os.environ["WANDB_DISABLED"] = "true"

* We support Llama, Mistral, Phi-3, Gemma, Yi, DeepSeek, Qwen, TinyLlama, Vicuna, Open Hermes etc
* We support 16bit LoRA or 4bit QLoRA. Both 2x faster.
* `max_seq_length` can be set to anything, since we do automatic RoPE Scaling via [kaiokendev's](https://kaiokendev.github.io/til) method.
* With [PR 26037](https://github.com/huggingface/transformers/pull/26037), we support downloading 4bit models **4x faster**! [Our repo](https://huggingface.co/unsloth) has Llama, Mistral 4bit models.
* [**NEW**] We make Phi-3 Medium / Mini **2x faster**! See our [Phi-3 Medium notebook](https://colab.research.google.com/drive/1hhdhBa1j_hsymiW9m-WzxQtgqTH_NHqi?usp=sharing)

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

#Loading the model that was saved after fine tuning for classification
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen2.5-7B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


==((====))==  Unsloth 2024.11.2: Fast Qwen2 patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.741 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.55G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/117 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/4.87k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/632 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/616 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/162M [00:00<?, ?B/s]

Unsloth 2024.11.2 patched 28 layers with 0 QKV layers, 28 O layers and 28 MLP layers.


We now add LoRA adapters so we only need to update 1 to 10% of all parameters!

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

<a name="Data"></a>
### Data Prep
We now use the Alpaca dataset from [yahma](https://huggingface.co/datasets/yahma/alpaca-cleaned), which is a filtered version of 52K of the original [Alpaca dataset](https://crfm.stanford.edu/2023/03/13/alpaca.html). You can replace this code section with your own data prep.

**[NOTE]** To train only on completions (ignoring the user's input) read TRL's docs [here](https://huggingface.co/docs/trl/sft_trainer#train-on-completions-only).

**[NOTE]** Remember to add the **EOS_TOKEN** to the tokenized output!! Otherwise you'll get infinite generations!

If you want to use the `llama-3` template for ShareGPT datasets, try our conversational [notebook](https://colab.research.google.com/drive/1XamvWYinY6FOSX9GLvnqSjjsNflxdhNc?usp=sharing).

For text completions like novel writing, try this [notebook](https://colab.research.google.com/drive/1ef-tab5bhkvWmBOObepl1WgJvfvSzn5Q?usp=sharing).

In [None]:
from datasets import load_dataset
dataset=load_dataset('1-800-SHARED-TASKS/COLING-2025-FINNLP-FMD')
train_dataset=dataset['Train']
dev_dataset=dataset['Dev']

README.md:   0%|          | 0.00/4.54k [00:00<?, ?B/s]

FINNLP-train.csv:   0%|          | 0.00/11.2M [00:00<?, ?B/s]

FINNLP-dev.csv:   0%|          | 0.00/3.40M [00:00<?, ?B/s]

Generating Train split:   0%|          | 0/1500 [00:00<?, ? examples/s]

Generating Dev split:   0%|          | 0/453 [00:00<?, ? examples/s]

In [None]:
import re
def preprocess_func(examples):
  length=len(examples['claim'])
  for i in range(length):
    if re.search(r'(?=Claim).*(?= Example:)',examples['justification'][i]):
      examples['claim'][i]=examples['claim'][i]+'- '+re.findall(r'[^(?:Claim: )].*(?= Example:)',examples['justification'][i])[0]
      examples['justification'][i]=re.sub(r'(?:Claim: ).* Example: ','',examples['justification'][i])

  return {'claim':examples['claim'],'justification':examples['justification']}

train_dataset_new=train_dataset.map(preprocess_func,batched=True)
dev_dataset_new=dev_dataset.map(preprocess_func,batched=True)

Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

In [None]:
#Training for both classification and explanation
prompt_template = """Below is an instruction that describes a task, paired with a claim and justification that provides further context. Please write a response that appropriately completes the request.
### Instruction:
The goal is to classify the text as true/not_enough_info/false. Choose the correct category from these options and add an explanation after classification:
1: True
2: NEI
3: False
Your response must be in the following format:
Prediction: Your_Prediction Explanation: Your_Explanation

### Claim:
{claim}
### Justification:
{justification}
### Response:
Prediction: {label} Explanation: {expl}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    texts = []
    for claim, justification, label, explanation in zip(examples['claim'], examples['justification'], examples['label'], examples['evidence']):
        formatted_text = prompt_template.format(claim=claim, justification=justification, label=label, expl=explanation) + EOS_TOKEN
        texts.append(formatted_text)
    return {"text": texts}
formatted_train_dataset=train_dataset_new.map(formatting_prompts_func, batched = True,)
formatted_dev_dataset=dev_dataset_new.map(formatting_prompts_func, batched = True,)

Map:   0%|          | 0/1500 [00:00<?, ? examples/s]

In [None]:
train_dataset_new['text'][0]

'Below is an instruction that describes a task, paired with a claim and justification that provides further context. Write a response that appropriately completes the request.\n### Instruction:\nThe goal is to classify the text as true/not_enough_info/false. Choose the correct category from these options and add an explanation after classification:\n1: True\n2: NEI\n3: False\nYour response must be in the following format:\nPrediction: Your_Prediction Explanation: Your_Explanation\n\n### Claim:\nChecking the Facts About \'Dreamers\'\n### Justification:\nFirst introduced in Congress in 2001 and last revisited in 2017, the so-called DREAM Act (which stands for the Development, Relief, and Education for Alien Minors Act) was aimed at providing a path to permanent residency in the United States for children of undocumented immigrants. Every attempt to pass the legislation has failed to date, as politicians continue to kick the can down the road to the next administration. DREAM Act However,

<a name="Train"></a>
### Train the model
Now let's use Huggingface TRL's `SFTTrainer`! More docs here: [TRL SFT docs](https://huggingface.co/docs/trl/sft_trainer). We do 60 steps to speed things up, but you can set `num_train_epochs=1` for a full run, and turn off `max_steps=None`. We also support TRL's `DPOTrainer`!

In [None]:
from trl import SFTTrainer,DataCollatorForCompletionOnlyLM
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

response_template = "### Response:\n"
collator = DataCollatorForCompletionOnlyLM(response_template, tokenizer=tokenizer)

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = formatted_train_dataset,
    eval_dataset = formatted_dev_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator=collator,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
#     packing = True, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 8,
        gradient_accumulation_steps = 2,
        warmup_steps = 5,
        num_train_epochs=5,
        eval_steps = 25,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        save_strategy='epoch',
        eval_strategy='steps'
    ),
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


Map (num_proc=2):   0%|          | 0/1500 [00:00<?, ? examples/s]

In [None]:
#@title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.741 GB.
5.936 GB of memory reserved.


In [None]:
# trainer_stats = trainer.train()
from unsloth import unsloth_train
# unsloth_train fixes gradient_accumulation_steps
trainer_stats = unsloth_train(trainer)

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 1,500 | Num Epochs = 5
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 935
 "-____-"     Number of trainable parameters = 40,370,176


Step,Training Loss
500,0.1249


In [None]:
# model.save_pretrained("lora_model") # Local saving
# tokenizer.save_pretrained("lora_model")
model.push_to_hub("minemaster01/qwen2.5-7b-fmd-5-full", token = "hf_token") # Online saving
tokenizer.push_to_hub("minemaster01/qwen2.5-7b-fmd-5-full", token = "hf_token") # Online saving

README.md:   0%|          | 0.00/574 [00:00<?, ?B/s]

  0%|          | 0/1 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/162M [00:00<?, ?B/s]

Saved model to https://huggingface.co/jebish7/qwen2.5-7b-fmd-5-full


  0%|          | 0/1 [00:00<?, ?it/s]

tokenizer.json:   0%|          | 0.00/11.4M [00:00<?, ?B/s]

In [None]:
#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory         /max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

34167.7171 seconds used for training.
569.46 minutes used for training.
Peak reserved memory = 10.582 GB.
Peak reserved memory for training = 4.646 GB.
Peak reserved memory % of max memory = 71.786 %.
Peak reserved memory for training % of max memory = 31.518 %.
