In [2]:
from frugalai.utils.hardware import print_mac_specs

print_mac_specs()


                     MAC SPECIFICATIONS                     

                     System Information                     
------------------------------------------------------------
Model: Mac15,12
OS: macOS 15.3.1
Architecture: arm64 (Apple Silicon)

                      CPU Information                       
------------------------------------------------------------
Processor: Apple M3
Physical cores: 8
Logical cores: 8

                     Memory Information                     
------------------------------------------------------------
Total RAM: 24.0 GB
Available RAM: 9.97 GB

                    Storage Information                     
------------------------------------------------------------
Total Disk: 460.43 GB
Free Disk: 303.38 GB

                     GPU/ML Information                     
------------------------------------------------------------
PyTorch Version: 2.6.0
MPS Available: Yes

                 FINE-TUNING RECOMMENDATION                 

Your Mac 

In [1]:
from frugalai.utils.hardware import print_system_specs

print_system_specs()


                   SYSTEM SPECIFICATIONS                    

                     System Information                     
------------------------------------------------------------
Hostname: AMAFHP9MXRXX1
OS: Darwin 24.3.0 Darwin Kernel Version 24.3.0: Thu Jan  2 20:24:23 PST 2025; root:xnu-11215.81.4~3/RELEASE_ARM64_T8122
Model: Mac15,12
Architecture: arm64 (Apple Silicon)
Environment: Physical Machine

                      CPU Information                       
------------------------------------------------------------
Processor: Apple M3
Physical cores: 8
Logical cores: 8
Frequency: Unknown

                     Memory Information                     
------------------------------------------------------------
Total RAM: 24.00 GB
Available RAM: 10.01 GB
Used RAM: 12.14 GB (58.3%)

                    Storage Information                     
------------------------------------------------------------
Total Disk: 2.25 TB
Free Disk: 1.48 TB

Mounted Partitions:
  / (apfs): 460

In [1]:
%load_ext autoreload
%autoreload 2

### Fine-tuning LLM text-classification with transformers chattemplate

In [2]:
from frugalai.utils.efficiency_tracker import FunctionTracker

tracker = FunctionTracker()

##### **Load dataset**

In [3]:
label2id = {
    "0_not_relevant": 0,
    "1_not_happening": 1,
    "2_not_human": 2,
    "3_not_bad": 3,
    "4_solutions_harmful_unnecessary": 4,
    "5_science_unreliable": 5,
    "6_proponents_biased": 6,
    "7_fossil_fuels_needed": 7,
}

id2label = {int(v): k for k, v in label2id.items()}

In [6]:
from datasets import load_dataset

@tracker.track
def load_frugalai_dataset():
    
    ds = load_dataset("QuotaClimat/frugalaichallenge-text-train")
    ds = ds.select_columns(['quote', 'label'])
    ds = ds.map(lambda x: {"label": label2id[x["label"]]}, batched=False)
    return ds

ds = load_frugalai_dataset()
print(type(ds))
print(ds.shape)
print()


⏳ FunctionTimer: load_frugalai_dataset
| time            00:00:07.8952
| emissions       0.000000 CO2eq
| energy consumed 0.000005 kWh

<class 'datasets.dataset_dict.DatasetDict'>
{'train': (4872, 2), 'test': (1219, 2)}



##### **Sample a balanced subset**

In [7]:
from frugalai.utils.analytics import print_distribution

print_distribution(ds)


🔹 Label distribution in 'train' split (dataset.DatasetDict):
Category                                Count     Percentage
------------------------------------------------------------
0                                       1311      26.91%
1                                       587       12.05%
2                                       565       11.6%
3                                       289       5.93%
4                                       614       12.6%
5                                       641       13.16%
6                                       643       13.2%
7                                       222       4.56%
------------------------------------------------------------
Total                                   4872      

🔹 Label distribution in 'test' split (dataset.DatasetDict):
Category                                Count     Percentage
------------------------------------------------------------
0                                       307       25.18%
1            

##### **Load tokenizer & model**

In [8]:
# Select Model
MODEL_NAMES = {
    "mistral": "mistralai/Mistral-7B-Instruct-v0.1",
    "phi3": "microsoft/Phi-3-mini-4k-instruct",
    "unsloth": "unsloth/Phi-3.5-mini-instruct",
    "qwen": "Qwen/Qwen2.5-0.5B"
}
selected_model = "unsloth"
model_name = MODEL_NAMES[selected_model]
model_name

'unsloth/Phi-3.5-mini-instruct'

In [10]:
ds.shape, ds.column_names

({'train': (4872, 2), 'test': (1219, 2)},
 {'train': ['quote', 'label'], 'test': ['quote', 'label']})

**Tokenizer**

- essayer avec et sans padding

In [11]:
max_tokens = 128

In [12]:
from transformers import AutoTokenizer

@tracker.track
def load_tokenizer(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name, model_max_length=max_tokens)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    return tokenizer
    

def preprocess_function(element):
    return tokenizer(element["quote"], truncation=True, max_length=max_tokens) #padding="max_length"

tokenizer = load_tokenizer(model_name)
tokenized_ds = ds.map(preprocess_function, batched=True, remove_columns=["quote"])
tokenized_ds

tokenizer_config.json:   0%|          | 0.00/3.37k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/571 [00:00<?, ?B/s]


⏳ FunctionTimer: load_tokenizer
| time            00:00:08.4972
| emissions       0.000000 CO2eq
| energy consumed 0.000009 kWh



Map:   0%|          | 0/4872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1219 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['label', 'input_ids', 'attention_mask'],
        num_rows: 4872
    })
    test: Dataset({
        features: ['label', 'input_ids', 'attention_mask'],
        num_rows: 1219
    })
})

In [13]:
train_lengths = [len(x) for x in tokenized_ds["train"]["input_ids"]]
print('Actual max lenght in train set :', max(train_lengths), 'tokens')
print(f"Average sequence length: {sum(train_lengths)/len(train_lengths):.1f}")

Actual max lenght in train set : 128 tokens
Average sequence length: 62.2


In [14]:
tokenizer.model_max_length

128

**Model**

In [15]:
from frugalai.utils.monitoring import print_memory_status_across_devices

print_memory_status_across_devices()


                    MEMORY USAGE REPORT                     

-------------------- CPU MEMORY --------------------
Total System Memory:         24.00 GB
Available System Memory:     10.07 GB
Used System Memory:          12.03 GB (58.0%)
Current Process Memory:      0.50 GB

-------------------- MPS MEMORY --------------------
Tensor Allocated Memory:     0.00 GB
Overhead (PyTorch Internal): 0.00 GB
Driver Allocated Memory:     0.00 GB
Recommended Maximum Memory:  16.00 GB
Available in Memory Pool:    16.00 GB

-------------------- TENSOR COUNTS --------------------
CPU Tensors:                 0
MPS Tensors:                 0




  return isinstance(obj, torch.Tensor)


- try with mps
- if not, cpu

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 128 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",      # Llama-3.1 15 trillion tokens model 2x faster!
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",    # We also uploaded 4bit for 405b!
    "unsloth/Mistral-Nemo-Base-2407-bnb-4bit", # New Mistral 12b 2x faster!
    "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
    "unsloth/mistral-7b-v0.3-bnb-4bit",        # Mistral v3 2x faster!
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",           # Phi-3.5 2x faster!
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",            # Gemma 2x faster!
] # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(
    # Can select any from the below:
    # "unsloth/Qwen2.5-0.5B", "unsloth/Qwen2.5-1.5B", "unsloth/Qwen2.5-3B"
    # "unsloth/Qwen2.5-14B",  "unsloth/Qwen2.5-32B",  "unsloth/Qwen2.5-72B",
    # And also all Instruct versions and Math. Coding verisons!
    model_name = "unsloth/Qwen2.5-7B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [None]:
from transformers import BitsAndBytesConfig
import torch

# For MPS compatibility, we need to be careful with quantization settings
# MPS doesn't fully support all quantization formats, so we'll use bfloat16 for training
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,                     # Load model in 4-bit precision
    bnb_4bit_use_double_quant=True,        # Use double quantization
    bnb_4bit_quant_type="nf4",             # Quantization type
    bnb_4bit_compute_dtype=torch.float16,  # Compute datatype
)
bnb_config

In [None]:
from transformers import AutoModelForSequenceClassification
from frugalai.utils.hardware import get_device

@tracker.track
def load_model(model_name):

    device = get_device() 

    if device.type == "cuda" or device.type == "mps":
        dtype = torch.float16
    else:
        dtype = torch.float32

    # Loading model with AutoModelForSequenceClassification adds an 
    # randomly initialized classification head : score.weight
    # it will be trained along with the LoRa parameters during FT
    return AutoModelForSequenceClassification.from_pretrained(
        model_name,
        label2id=label2id,
        id2label=id2label,
        num_labels=8,
        #torch_dtype=dtype,
        quantization_config=bnb_config,
        device_map="auto",
        trust_remote_code=True
    )

        # trust_remote_code=True,
        
        # Load a quantized model
        #load_in_8bit=True,  # Enable 8-bit quantization, allow auto device allocation in that case to help manage
        
        # Enable model sharding to optimize memory allocation
        # device_map=auto, # for larger models : model sharding : auto distribution of model layers across available hardware, 
        # splits a large model across GPU and CPU. Handled by Accelerate library
        
    #.to(device) # for smaller models : moves the entire model to the specified device / all or nothing

model = load_model(model_name)

print('model.device :', model.device)
print('precision model.dtype :', model.dtype)
print('model.framework :',  model.framework)
print('model.is_gradient_checkpointing :',  model.is_gradient_checkpointing)
print('model.is_parallelizable :',  model.is_parallelizable)


In [None]:
from frugalai.utils.monitoring import print_memory_status_across_devices

print_memory_status_across_devices()

In [None]:
model

In [None]:
for name, param in model.named_parameters():
    print(f"Layer: {name}, Device: {param.device}")

In [None]:
model.config

In [None]:
# Print model architecture
for name, module in model.named_modules():
    print(name)

**LoRa Config**

In [None]:
from peft import prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)

In [None]:
def find_all_linear_names(model):
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, bnb.nn.Linear4bit):
            names = name.split(".")
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])

    if "lm_head" in lora_module_names:  # needed for 16-bit
        lora_module_names.remove("lm_head")
    return list(lora_module_names)

find_all_linear_names(model)

In [None]:
from peft import LoraConfig, get_peft_model

modules = find_all_linear_names(model) 
    # target_modules=[
    #    "self_attn.qkv_proj",
    #    "self_attn.o_proj",
    #    "mlp.gate_up_proj",
    #    "mlp.down_proj"
    #],

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    target_modules=modules,
    task_type="SEQ_CLS"
)

# model.gradient_checkpointing_enable()

model = get_peft_model(model, lora_config)
print(type(model))
model.print_trainable_parameters()

In [None]:
lora_config.to_dict()

**Metrics**

In [None]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    return {
        "accuracy": accuracy_score(labels, predictions),
        "f1": f1_score(labels, predictions, average="weighted")
    }

**Training arguments**

In [None]:
import transformers
from transformers import Trainer, TrainingArguments

def get_training_args(output_dir="./results"):
    return TrainingArguments(
        max_steps=500,  # Adjust based on your dataset
        warmup_steps=50,
        eval_steps=50,
        save_steps=100,
        load_best_model_at_end=True,
        metric_for_best_model="f1",
        push_to_hub=False,
    )

    
training_args = TrainingArguments(
    output_dir="./results",
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=8,
    eval_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=3,
    learning_rate=2e-4,
    num_train_epochs=5,
    logging_dir="./logs",
    logging_steps=10,
    logging_strategy="steps",
    save_total_limit=2,
    push_to_hub=False,
    report_to="none",
    fp16=True,                         # Use mixed precision
    optim="adamw_torch",
    label_names=['label'],
    lr_scheduler_type="cosine",
    warmup_ratio=0.05,
    weight_decay=0.01,
    report_to="none",                  # Disable reporting to wandb, etc.
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ds['train'],
    eval_dataset=tokenized_ds['test'],
    compute_metrics=compute_metrics
    packing=True,                      # Enable packing for efficiency
)


In [None]:
#estimated_time_per_step = 0.5
#num_training_steps = (len(tokenized_ds['train']) // 4) * 5
#total_training_time = estimated_time_per_step * num_training_steps 

#print(f"Estimated training time: {total_training_time / 60:.2f} minutes")

In [None]:
from frugalai.utils.monitoring import estimate_ft_memory_requirements

memory_estimates = estimate_ft_memory_requirements(
    model=model,
    tokenizer=tokenizer,
    training_args=training_args
)

In [None]:
trainer.label_names

**Do a small test run to check if it's ok**

In [None]:
trainer.train()

In [None]:
trainer.state.log_history

In [None]:
# Evaluate model on validation set
eval_results = trainer.evaluate()

# Print validation accuracy
print(f"Validation Accuracy: {eval_results['eval_accuracy']:.4f}")