In [None]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab and Kaggle notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29 peft trl triton
    !pip install --no-deps cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
    !pip install --no-deps unsloth

!pip install codecarbon

In [None]:
%load_ext autoreload
%autoreload 2

##### **Load tokenizer & model**

In [None]:
# Select Model
# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
# More models at https://huggingface.co/unsloth
MODEL_NAMES = {
    "nemo": "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
    "mistral": "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "llama": "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "phi3": "unsloth/Phi-3.5-mini-instruct",
}
selected_model = "phi3"
model_name = MODEL_NAMES[selected_model]
model_name

'unsloth/Phi-3.5-mini-instruct'

In [None]:
import unsloth
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

NotImplementedError: Unsloth: No NVIDIA GPU found? Unsloth currently only supports GPUs!

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2025.3.3 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
print('model.device :', model.device)
print('precision model.dtype :', model.dtype)
print('model.framework :',  model.framework)
print('model.is_gradient_checkpointing :',  model.is_gradient_checkpointing)
print('model.is_parallelizable :',  model.is_parallelizable)

model.device : cuda:0
precision model.dtype : torch.float16
model.framework : pt
model.is_gradient_checkpointing : True
model.is_parallelizable : False


##### **Load dataset**

In [None]:
from datasets import load_dataset, DatasetDict


label2id = {
    "0_not_relevant": 0,
    "1_not_happening": 1,
    "2_not_human": 2,
    "3_not_bad": 3,
    "4_solutions_harmful_unnecessary": 4,
    "5_science_unreliable": 5,
    "6_proponents_biased": 6,
    "7_fossil_fuels_needed": 7,
}

id2label = {int(v): k for k, v in label2id.items()}

prompt = """
Your task is to classify statements into 8 categories.
Respond STRICTLY with only the corresponding number.
DO NOT INCLUDE ANY OTHER TEXT.
If you do not know the answer, make your best guess.
The categories should be one of the following:

### Categories
0 - Not relevant: No climate-related claims or doesn't fit other categories
1 - Denial: Claims climate change is not happening
2 - Attribution denial: Claims human activity is not causing climate change
3 - Impact minimization: Claims climate change impacts are minimal or beneficial
4 - Solution opposition: Claims solutions to climate change are harmful
5 - Science skepticism: Challenges climate science validity or methods
6 - Actor criticism: Attacks credibility of climate scientists or activists
7 - Fossil fuel promotion: Asserts importance of fossil fuels

### Statement:
{}

### Answer:
{}"""

#EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
EOS_TOKEN = ''
def formatting_prompts_func(examples):
    inputs       = examples["quote"]
    outputs      = [label2id[e] for e in examples["label"]]
    texts = []
    for input, output in zip(inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = prompt.format(input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, "label" : outputs}
pass


def load_frugalai_dataset():
    dataset = load_dataset("QuotaClimat/frugalaichallenge-text-train")
    train_val_split = dataset['train'].train_test_split(test_size=0.2, seed=42)
    dataset = DatasetDict({
        'train': train_val_split['train'],
        'validation': train_val_split['test'],
        'test': dataset['test']
    })
    for split in dataset:
      dataset[split] = dataset[split].select_columns(['quote', 'label'])
      dataset[split] = dataset[split].map(formatting_prompts_func, batched=True)
    return dataset

dataset = load_frugalai_dataset()

dataset_train = dataset['train']
dataset_val = dataset['validation']
dataset_test = dataset['test']

X_train = dataset_train['text']
y_train = dataset_train['label']

print(dataset)
print(type(dataset_train), dataset_train.shape, dataset_train.column_names)
dataset_train.info.features

DatasetDict({
    train: Dataset({
        features: ['quote', 'label', 'text'],
        num_rows: 3897
    })
    validation: Dataset({
        features: ['quote', 'label', 'text'],
        num_rows: 975
    })
    test: Dataset({
        features: ['quote', 'label', 'text'],
        num_rows: 1219
    })
})
<class 'datasets.arrow_dataset.Dataset'> (3897, 3) ['quote', 'label', 'text']


{'quote': Value(dtype='string', id=None),
 'label': Value(dtype='int64', id=None),
 'text': Value(dtype='string', id=None)}

**Training arguments**

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

training_args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    )

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset_train,
    eval_dataset = dataset_train,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = training_args,
)

Converting train dataset to ChatML (num_proc=2):   0%|          | 0/4872 [00:00<?, ? examples/s]

Applying chat template to train dataset (num_proc=2):   0%|          | 0/4872 [00:00<?, ? examples/s]

Tokenizing train dataset (num_proc=2):   0%|          | 0/4872 [00:00<?, ? examples/s]

Truncating train dataset (num_proc=2):   0%|          | 0/4872 [00:00<?, ? examples/s]

In [None]:
trainer.label_names, trainer.state

(['labels'],
 TrainerState(epoch=None, global_step=0, max_steps=0, logging_steps=500, eval_steps=500, save_steps=500, train_batch_size=None, num_train_epochs=0, num_input_tokens_seen=0, total_flos=0, log_history=[], best_metric=None, best_model_checkpoint=None, is_local_process_zero=True, is_world_process_zero=True, is_hyper_param_search=False, trial_name=None, trial_params=None, stateful_callbacks={'TrainerControl': {'args': {'should_training_stop': False, 'should_epoch_stop': False, 'should_save': False, 'should_evaluate': False, 'should_log': False}, 'attributes': {}}}))

**Train**

In [None]:
# @title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.741 GB.
2.307 GB of memory reserved.


In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 4,872 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 29,884,416/2,039,024,640 (1.47% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,2.4037
2,2.3974
3,2.3404
4,2.1407
5,2.0214
6,1.9193
7,1.8628
8,1.6957
9,1.8039
10,1.6479


In [None]:
trainer.state.best_metric

In [None]:
# @title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(
    f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training."
)
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

297.7701 seconds used for training.
4.96 minutes used for training.
Peak reserved memory = 3.436 GB.
Peak reserved memory for training = 1.129 GB.
Peak reserved memory % of max memory = 23.309 %.
Peak reserved memory for training % of max memory = 7.659 %.


**Evaluate**

In [None]:
# Evaluate model on validation set
eval_results = trainer.evaluate()

# Print validation accuracy
print(f"Validation Accuracy: {eval_results['eval_accuracy']:.4f}")

**Inference**

In [None]:
def output_parser(output):
    if isinstance(output, list) and output:
        text = output[0]
    else:
        text = output

    try:
        if "Statement:" in text:
            after_statement = text.split("Statement:", 1)[1]
        else:
            after_statement = text
        if "### Answer:" in after_statement:
            quote, answer_part = after_statement.split("### Answer:", 1)
        else:
            quote = after_statement
            answer_part = ""

        quote = quote.strip()
        label = answer_part.split('<|endoftext|>')[0].strip()

        return quote, label

    except Exception as e:
        print(f"Parsing error: {e}")
        return text.strip(), ""

In [None]:
# alpaca_prompt = Copied from above
FastLanguageModel.for_inference(model) # Enable native 2x faster inference
inputs = tokenizer(
[
    prompt.format(
        "Climate change is just a hoax created to control people.", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
decoded = tokenizer.batch_decode(outputs)
quote, label = output_parser(decoded)
print(quote, label)


Climate change is just a hoax created to control people. 1


In [None]:
test = [
            "Climate change is just a hoax created to control people.",
            "I think we should focus on improving public transportation in urban areas.",
            "My favorite season is autumn because of the changing leaf colors.",
            "There is no evidence that global temperatures are actually rising.",
            "Climate change is a hoax; the Earth's climate has always fluctuated naturally.",
            "The current warming is just part of natural climate cycles that have occurred throughout Earth's history.",
            "Solar activity, not human emissions, is the primary driver of recent climate change.",
            "A warmer climate will actually benefit agriculture by extending growing seasons and increasing crop yields.",
            "Sea level rise is happening so slowly that communities will easily adapt over time.",
            "Renewable energy mandates will destroy jobs and devastate our economy.",
            "Carbon taxes unfairly burden working families while doing little to affect global temperatures.",
            "Climate models are fundamentally flawed and cannot accurately predict future conditions.",
            "The peer review process in climate science is corrupted by groupthink and confirmation bias.",
            "Climate scientists exaggerate threats to secure more research funding.",
            "Environmental activists promote climate alarmism to advance a political agenda rather than protecting the environment.",
            "Coal, oil, and natural gas remain essential for reliable energy that solar and wind simply cannot provide.",
            "Developing countries need affordable fossil fuels to lift their populations out of poverty, just as developed nations did."
        ]

In [None]:
def preprocess_function(element):
    return tokenizer([prompt.format(element, "")], return_tensors = "pt").to("cuda")

tokenized_ds = [preprocess_function(e) for e in test]
tokenized_ds[0]

{'input_ids': tensor([[29871,    13, 10858,  3414,   338,   304,   770,  1598,  9506,   964,
         29871, 29947, 13997, 29889,    13,  1666,  2818,  6850,  3960,  1783,
         16786,   411,   871,   278,  6590,  1353, 29889,    13,  3970,  6058,
          2672,  6154, 29965,  2287, 13764, 29979,   438, 29911,  4448,   323,
         12194, 29889,    13,  3644,   366,   437,   451,  1073,   278,  1234,
         29892,  1207,   596,  1900,  4140, 29889,    13,  1576, 13997,   881,
           367,   697,   310,   278,  1494, 29901,    13,    13,  2277, 29937,
           315, 14404,    13, 29900,   448,  2216,  8018, 29901,  1939, 23622,
         29899, 12817, 16726,   470,  1838, 29915, 29873,  6216,   916, 13997,
            13, 29896,   448,  3384,   616, 29901,  6015,  9893, 23622,  1735,
           338,   451, 10464,    13, 29906,   448,  6212,  3224,   972,   616,
         29901,  6015,  9893,  5199,  6354,   338,   451, 10805, 23622,  1735,
            13, 29941,   448, 14305,  

In [None]:
for input in tokenized_ds:
  outputs = model.generate(**input, max_new_tokens = 64, use_cache = True)
  decoded = tokenizer.batch_decode(outputs)
  quote, label = output_parser(decoded)
  print(quote, label)

Climate change is just a hoax created to control people. 1
I think we should focus on improving public transportation in urban areas. 0
My favorite season is autumn because of the changing leaf colors. 0
There is no evidence that global temperatures are actually rising. 1
Climate change is a hoax; the Earth's climate has always fluctuated naturally. 1
The current warming is just part of natural climate cycles that have occurred throughout Earth's history. 1
Solar activity, not human emissions, is the primary driver of recent climate change. 2
A warmer climate will actually benefit agriculture by extending growing seasons and increasing crop yields. 3
Sea level rise is happening so slowly that communities will easily adapt over time. 3
Renewable energy mandates will destroy jobs and devastate our economy. 4
Carbon taxes unfairly burden working families while doing little to affect global temperatures. 4
Climate models are fundamentally flawed and cannot accurately predict future conditi

In [None]:
type(model)

In [None]:
def batch_inference(X_test, model, tokenizer):
  quotes, labels = [], []
  for input in X_test:
    tokens = tokenize(input)
    outputs = model.generate(**tokens, max_new_tokens = 64, use_cache = True)
    decoded = tokenizer.batch_decode(outputs)
    quote, label = output_parser(decoded)
    quotes.append(quote)
    labels.append(label)
  return quotes, labels

FastLanguageModel.for_inference(model) # Enable native 2x faster inference
quotes, labels = batch_inference(X_test, model, tokenizer)
def get_results(quotes, y_test, y_pred):
    for quote, test, pred in zip(quotes, y_test, y_pred):
      print(pred == test, pred, test, quote)


**Saving the Lora layers**

In [None]:
model.save_pretrained("./FT_LoRa_baseline_unsloth")
tokenizer.save_pretrained("./FT_LoRa_baseline_unsloth")

('./FT_LoRa_baseline_unsloth/tokenizer_config.json',
 './FT_LoRa_baseline_unsloth/special_tokens_map.json',
 './FT_LoRa_baseline_unsloth/tokenizer.model',
 './FT_LoRa_baseline_unsloth/added_tokens.json',
 './FT_LoRa_baseline_unsloth/tokenizer.json')

**Saving the whole model**

In [None]:
# Merge the LoRA weights back into the base model
merged_model = model.merge_and_unload()

# Save the full model
merged_model.save_pretrained("./my_complete_model")
tokenizer.save_pretrained("./my_complete_model")

NameError: name 'model' is not defined