In [None]:
from frugalai.utils.hardware import print_mac_specs

print_mac_specs()

In [1]:
%load_ext autoreload
%autoreload 2

### Fine-tuning LLM text-classification with Langchain

In [2]:
from frugalai.utils.efficiency_tracker import FunctionTracker

tracker = FunctionTracker()

##### **Load dataset**

In [3]:
import os

os.environ["TOKENIZERS_PARALLELISM"] = "false"

In [4]:
label2id = {
    "0_not_relevant": 0,
    "1_not_happening": 1,
    "2_not_human": 2,
    "3_not_bad": 3,
    "4_solutions_harmful_unnecessary": 4,
    "5_science_unreliable": 5,
    "6_proponents_biased": 6,
    "7_fossil_fuels_needed": 7,
}

id2label = {int(v): k for k, v in label2id.items()}

In [6]:
from datasets import load_dataset

@tracker.track
def load_frugalai_dataset():
    
    ds = load_dataset("QuotaClimat/frugalaichallenge-text-train")
    ds = ds.select_columns(['quote', 'label'])
    ds = ds.map(lambda x: {"label": label2id[x["label"]]}, batched=False)
    return ds

ds = load_frugalai_dataset()
print(type(ds))
print(ds.shape)
print()


⏳ FunctionTimer: load_frugalai_dataset
| time            00:00:08.2587
| emissions       0.000000 CO2eq
| energy consumed 0.000005 kWh

<class 'datasets.dataset_dict.DatasetDict'>
{'train': (4872, 2), 'test': (1219, 2)}



##### **Sample a balanced subset**

In [7]:
from frugalai.utils.analytics import print_distribution

print_distribution(ds)


🔹 Label distribution in 'train' split (dataset.DatasetDict):
Category                                Count     Percentage
------------------------------------------------------------
0                                       1311      26.91%
1                                       587       12.05%
2                                       565       11.6%
3                                       289       5.93%
4                                       614       12.6%
5                                       641       13.16%
6                                       643       13.2%
7                                       222       4.56%
------------------------------------------------------------
Total                                   4872      

🔹 Label distribution in 'test' split (dataset.DatasetDict):
Category                                Count     Percentage
------------------------------------------------------------
0                                       307       25.18%
1            

In [8]:
from frugalai.utils.sampling import sample_balanced_subset
# N = lowest category count * nb of categories
subset_ds = sample_balanced_subset(ds, N=40, seed=42)

print(subset_ds)
print_distribution(subset_ds)

DatasetDict({
    train: Dataset({
        features: ['quote', 'label'],
        num_rows: 40
    })
    test: Dataset({
        features: ['quote', 'label'],
        num_rows: 40
    })
})

🔹 Label distribution in 'train' split (dataset.DatasetDict):
Category                                Count     Percentage
------------------------------------------------------------
0                                       5         12.5%
1                                       5         12.5%
2                                       5         12.5%
3                                       5         12.5%
4                                       5         12.5%
5                                       5         12.5%
6                                       5         12.5%
7                                       5         12.5%
------------------------------------------------------------
Total                                   40        

🔹 Label distribution in 'test' split (dataset.DatasetDict):
Categ

##### **Load tokenizer & model**

In [9]:
# Select Model
MODEL_NAMES = {
    "mistral": "mistralai/Mistral-7B-Instruct-v0.1",
    "phi3": "microsoft/Phi-3-mini-4k-instruct",
    "Qphi3": "OpenVINO/Phi-3-mini-4k-instruct-int4-ov",
    "qwen": "Qwen/Qwen2.5-0.5B"
}
selected_model = "Qphi3"
model_name = MODEL_NAMES[selected_model]
model_name

'OpenVINO/Phi-3-mini-4k-instruct-int4-ov'

In [10]:
subset_ds.shape, subset_ds.column_names

({'train': (40, 2), 'test': (40, 2)},
 {'train': ['quote', 'label'], 'test': ['quote', 'label']})

**Tokenizer**

- essayer avec et sans padding

In [11]:
max_tokens = 128

In [12]:
from transformers import AutoTokenizer

@tracker.track
def load_tokenizer(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=max_tokens)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    return tokenizer
    

def preprocess_function(element):
    return tokenizer(element["quote"], truncation=True, max_length=max_tokens) #padding="max_length"

tokenizer = load_tokenizer(model_name)
tokenized_ds = subset_ds.map(preprocess_function, batched=True, remove_columns=["quote"])
tokenized_ds


⏳ FunctionTimer: load_tokenizer
| time            00:00:06.3313
| emissions       0.000000 CO2eq
| energy consumed 0.000001 kWh



Map:   0%|          | 0/40 [00:00<?, ? examples/s]

Map:   0%|          | 0/40 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['label', 'input_ids', 'attention_mask'],
        num_rows: 40
    })
    test: Dataset({
        features: ['label', 'input_ids', 'attention_mask'],
        num_rows: 40
    })
})

In [13]:
train_lengths = [len(x) for x in tokenized_ds["train"]["input_ids"]]
print('Actual max lenght in train set :', max(train_lengths), 'tokens')
print(f"Average sequence length: {sum(train_lengths)/len(train_lengths):.1f}")

Actual max lenght in train set : 128 tokens
Average sequence length: 64.8


In [14]:
tokenizer.model_max_length

128

**Model**

In [15]:
from frugalai.utils.monitoring import print_memory_status_across_devices

print_memory_status_across_devices()


                    MEMORY USAGE REPORT                     

-------------------- CPU MEMORY --------------------
Total System Memory:         24.00 GB
Available System Memory:     10.19 GB
Used System Memory:          12.24 GB (57.5%)
Current Process Memory:      0.48 GB

-------------------- MPS MEMORY --------------------
Tensor Allocated Memory:     0.00 GB
Overhead (PyTorch Internal): 0.00 GB
Driver Allocated Memory:     0.00 GB
Recommended Maximum Memory:  16.00 GB
Available in Memory Pool:    16.00 GB

-------------------- TENSOR COUNTS --------------------
CPU Tensors:                 0
MPS Tensors:                 0




  return isinstance(obj, torch.Tensor)


- try with mps
- if not, cpu

In [16]:
from transformers import BitsAndBytesConfig
import torch

# For MPS compatibility, we need to be careful with quantization settings
# MPS doesn't fully support all quantization formats, so we'll use bfloat16 for training
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,                     # Load model in 4-bit precision
    bnb_4bit_use_double_quant=True,        # Use double quantization
    bnb_4bit_quant_type="nf4",             # Quantization type
    bnb_4bit_compute_dtype=torch.float16,  # Compute datatype
)
bnb_config

BitsAndBytesConfig {
  "_load_in_4bit": true,
  "_load_in_8bit": false,
  "bnb_4bit_compute_dtype": "float16",
  "bnb_4bit_quant_storage": "uint8",
  "bnb_4bit_quant_type": "nf4",
  "bnb_4bit_use_double_quant": true,
  "llm_int8_enable_fp32_cpu_offload": false,
  "llm_int8_has_fp16_weight": false,
  "llm_int8_skip_modules": null,
  "llm_int8_threshold": 6.0,
  "load_in_4bit": true,
  "load_in_8bit": false,
  "quant_method": "bitsandbytes"
}

In [17]:
from transformers import AutoModelForSequenceClassification
from frugalai.utils.hardware import get_device

@tracker.track
def load_model(model_name):

    device = get_device() 

    if device.type == "cuda" or device.type == "mps":
        dtype = torch.float16
    else:
        dtype = torch.float32

    # Loading model with AutoModelForSequenceClassification adds an 
    # randomly initialized classification head : score.weight
    # it will be trained along with the LoRa parameters during FT
    return AutoModelForSequenceClassification.from_pretrained(
        model_name,
        label2id=label2id,
        id2label=id2label,
        num_labels=8,
        #torch_dtype=dtype,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )

        # trust_remote_code=True,
        
        # Load a quantized model
        #load_in_8bit=True,  # Enable 8-bit quantization, allow auto device allocation in that case to help manage
        
        # Enable model sharding to optimize memory allocation
        # device_map=auto, # for larger models : model sharding : auto distribution of model layers across available hardware, 
        # splits a large model across GPU and CPU. Handled by Accelerate library
        
    #.to(device) # for smaller models : moves the entire model to the specified device / all or nothing

model = load_model(model_name)

print('model.device :', model.device)
print('precision model.dtype :', model.dtype)
print('model.framework :',  model.framework)
print('model.is_gradient_checkpointing :',  model.is_gradient_checkpointing)
print('model.is_parallelizable :',  model.is_parallelizable)


device : mps


config.json:   0%|          | 0.00/940 [00:00<?, ?B/s]

configuration_phi3.py:   0%|          | 0.00/11.2k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/OpenVINO/Phi-3-mini-4k-instruct-int4-ov:
- configuration_phi3.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


ValueError: Unrecognized configuration class <class 'transformers_modules.OpenVINO.Phi-3-mini-4k-instruct-int4-ov.4f812530a007e205a0c04f17c8c28fb5c8015a3c.configuration_phi3.Phi3Config'> for this kind of AutoModel: AutoModelForSequenceClassification.
Model type should be one of AlbertConfig, BartConfig, BertConfig, BigBirdConfig, BigBirdPegasusConfig, BioGptConfig, BloomConfig, CamembertConfig, CanineConfig, LlamaConfig, ConvBertConfig, CTRLConfig, Data2VecTextConfig, DebertaConfig, DebertaV2Config, DiffLlamaConfig, DistilBertConfig, ElectraConfig, ErnieConfig, ErnieMConfig, EsmConfig, FalconConfig, FlaubertConfig, FNetConfig, FunnelConfig, GemmaConfig, Gemma2Config, GlmConfig, GPT2Config, GPT2Config, GPTBigCodeConfig, GPTNeoConfig, GPTNeoXConfig, GPTJConfig, HeliumConfig, IBertConfig, JambaConfig, JetMoeConfig, LayoutLMConfig, LayoutLMv2Config, LayoutLMv3Config, LEDConfig, LiltConfig, LlamaConfig, LongformerConfig, LukeConfig, MarkupLMConfig, MBartConfig, MegaConfig, MegatronBertConfig, MistralConfig, MixtralConfig, MobileBertConfig, ModernBertConfig, MPNetConfig, MptConfig, MraConfig, MT5Config, MvpConfig, NemotronConfig, NezhaConfig, NystromformerConfig, OpenLlamaConfig, OpenAIGPTConfig, OPTConfig, PerceiverConfig, PersimmonConfig, PhiConfig, Phi3Config, PhimoeConfig, PLBartConfig, QDQBertConfig, Qwen2Config, Qwen2MoeConfig, ReformerConfig, RemBertConfig, RobertaConfig, RobertaPreLayerNormConfig, RoCBertConfig, RoFormerConfig, SqueezeBertConfig, StableLmConfig, Starcoder2Config, T5Config, TapasConfig, TransfoXLConfig, UMT5Config, XLMConfig, XLMRobertaConfig, XLMRobertaXLConfig, XLNetConfig, XmodConfig, YosoConfig, ZambaConfig, Zamba2Config.

In [None]:
from frugalai.utils.monitoring import print_memory_status_across_devices

print_memory_status_across_devices()

In [None]:
model

In [None]:
for name, param in model.named_parameters():
    print(f"Layer: {name}, Device: {param.device}")

In [None]:
model.config

In [None]:
# Print model architecture
for name, module in model.named_modules():
    print(name)

**LoRa Config**

In [None]:
from peft import prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)

In [None]:
def find_all_linear_names(model):
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, bnb.nn.Linear4bit):
            names = name.split(".")
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])

    if "lm_head" in lora_module_names:  # needed for 16-bit
        lora_module_names.remove("lm_head")
    return list(lora_module_names)

find_all_linear_names(model)

In [None]:
from peft import LoraConfig, get_peft_model

modules = find_all_linear_names(model) 
    # target_modules=[
    #    "self_attn.qkv_proj",
    #    "self_attn.o_proj",
    #    "mlp.gate_up_proj",
    #    "mlp.down_proj"
    #],

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    target_modules=modules,
    task_type="SEQ_CLS"
)

# model.gradient_checkpointing_enable()

model = get_peft_model(model, lora_config)
print(type(model))
model.print_trainable_parameters()

In [None]:
lora_config.to_dict()

**Metrics**

In [None]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    return {
        "accuracy": accuracy_score(labels, predictions),
        "f1": f1_score(labels, predictions, average="weighted")
    }

**Training arguments**

In [None]:
import transformers
from transformers import Trainer, TrainingArguments

def get_training_args(output_dir="./results"):
    return TrainingArguments(
        max_steps=500,  # Adjust based on your dataset
        warmup_steps=50,
        eval_steps=50,
        save_steps=100,
        load_best_model_at_end=True,
        metric_for_best_model="f1",
        push_to_hub=False,
    )

    
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=8,
    eval_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=3,
    learning_rate=2e-4,
    num_train_epochs=5,
    logging_dir="./logs",
    logging_steps=10,
    logging_strategy="steps",
    save_total_limit=2,
    push_to_hub=False,
    report_to="none",
    fp16=True,                         # Use mixed precision
    optim="adamw_torch",
    label_names=['label'],
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
    weight_decay=0.01,
    report_to="none",                  # Disable reporting to wandb, etc.
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ds['train'],
    eval_dataset=tokenized_ds['test'],
    compute_metrics=compute_metrics
    packing=True,                      # Enable packing for efficiency
)


In [None]:
#estimated_time_per_step = 0.5
#num_training_steps = (len(tokenized_ds['train']) // 4) * 5
#total_training_time = estimated_time_per_step * num_training_steps 

#print(f"Estimated training time: {total_training_time / 60:.2f} minutes")

In [None]:
from frugalai.utils.monitoring import estimate_ft_memory_requirements

memory_estimates = estimate_ft_memory_requirements(
    model=model,
    tokenizer=tokenizer,
    training_args=training_args
)

In [None]:
trainer.label_names

**Do a small test run to check if it's ok**

In [None]:
trainer.train()

In [None]:
trainer.state.log_history

In [None]:
# Evaluate model on validation set
eval_results = trainer.evaluate()

# Print validation accuracy
print(f"Validation Accuracy: {eval_results['eval_accuracy']:.4f}")