In [104]:
from notebook.services.config import ConfigManager
cm = ConfigManager()
cm.get('notebook')

ModuleNotFoundError: No module named 'notebook.services'

In [97]:
print(torch.backends.mps.is_available())
print(torch.mps.current_allocated_memory())

True
8900671232


In [98]:
# Memory validation tool for Apple Silicon
def check_model_fit(model):
    param_count = sum(p.numel() for p in model.parameters())
    mem_required = (param_count * 2) / 1e9 * 1.2  # FP16 calculation
    print(f"Estimated VRAM needed: {mem_required:.1f}GB")
    return mem_required < 20  # 24GB - 4GB system reserve

if check_model_fit(model):
    print("✅ Model fits in M3 memory")
else:
    print("❌ Reduce model size or use quantization")

Estimated VRAM needed: 17.4GB
✅ Model fits in M3 memory


**Machine information**

In [99]:
import torch
import psutil

def get_machine_info():
    """Fetch machine hardware information."""
    info = {
        "device": "MPS" if torch.backends.mps.is_available() else "CPU",
        "total_ram_gb": round(psutil.virtual_memory().total / (1024 ** 3), 2),
        "gpu_available": torch.backends.mps.is_available(),
        "gpu_name": "Apple Silicon M3" if torch.backends.mps.is_available() else None,
    }
    return info

# Example usage
machine_info = get_machine_info()
print("Machine Info:", machine_info)


Machine Info: {'device': 'MPS', 'total_ram_gb': 24.0, 'gpu_available': True, 'gpu_name': 'Apple Silicon M3'}


**Training estimate**

In [100]:
def estimate_training_ram(model, batch_size, seq_length=2048, precision="fp16"):
    """
    Estimate RAM needed for training.
    
    Args:
        model: The model object (e.g., Hugging Face transformer).
        batch_size: Number of samples per batch.
        seq_length: Maximum sequence length (tokens).
        precision: Precision type ("fp32", "fp16", or "int8").
    
    Returns:
        Estimated RAM in GB.
    """
    # Number of parameters in the model
    param_count = sum(p.numel() for p in model.parameters())
    
    # Bytes per parameter based on precision
    precision_bytes = {"fp32": 4, "fp16": 2, "int8": 1}.get(precision.lower(), 2)
    
    # Model memory (parameters + optimizer states)
    model_memory_gb = (param_count * precision_bytes * 3) / 1e9  # Model + Gradients + Optimizer
    
    # Activation memory (batch size x sequence length x hidden size x bytes)
    hidden_size = model.config.hidden_size
    activation_memory_gb = (batch_size * seq_length * hidden_size * precision_bytes) / 1e9
    
    # Total memory with overhead
    total_memory_gb = (model_memory_gb + activation_memory_gb) * 1.2  # Add ~20% overhead
    
    return round(total_memory_gb, 2)

# Example usage
batch_size = 2
seq_length = 1024
training_ram = estimate_training_ram(model, batch_size, seq_length)
print(f"Estimated Training RAM: {training_ram} GB")


Estimated Training RAM: 52.16 GB


In [101]:
def estimate_inference_ram(model, seq_length=2048, precision="fp16"):
    """
    Estimate RAM needed for inference.
    
    Args:
        model: The model object (e.g., Hugging Face transformer).
        seq_length: Maximum sequence length (tokens).
        precision: Precision type ("fp32", "fp16", or "int8").
    
    Returns:
        Estimated RAM in GB.
    """
    # Number of parameters in the model
    param_count = sum(p.numel() for p in model.parameters())
    
    # Bytes per parameter based on precision
    precision_bytes = {"fp32": 4, "fp16": 2, "int8": 1}.get(precision.lower(), 2)
    
    # Model memory only (parameters)
    model_memory_gb = (param_count * precision_bytes) / 1e9
    
    # Activation memory (sequence length x hidden size x bytes)
    hidden_size = model.config.hidden_size
    activation_memory_gb = (seq_length * hidden_size * precision_bytes) / 1e9
    
    # Total memory with overhead
    total_memory_gb = (model_memory_gb + activation_memory_gb) * 1.2  # Add ~20% overhead
    
    return round(total_memory_gb, 2)

# Example usage
seq_length = 1024
inference_ram = estimate_inference_ram(model, seq_length)
print(f"Estimated Inference RAM: {inference_ram} GB")


Estimated Inference RAM: 17.39 GB


In [54]:
import os
import random
import torch
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments
from trl import SFTTrainer, SFTConfig

In [55]:
# Model Selection
MODEL_NAMES = {
    "mistral": "mistralai/Mistral-7B-Instruct-v0.1",
    "phi2": "microsoft/phi-2",
    "qwen": "Qwen/Qwen2.5-0.5B"
}

# Class Labels
CLASS_LABELS = [
    "0_not_relevant", "1_not_happening", "2_not_human", "3_not_bad",
    "4_solutions_harmful_unnecessary", "5_science_unreliable",
    "6_proponents_biased", "7_fossil_fuels_needed"
]

os.environ["TOKENIZERS_PARALLELISM"] = "false"  # Suppress multiprocessing warning

In [56]:
# Model Loading Function
def load_model(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
        device_map="auto"
    ).eval()
    return tokenizer, model

In [60]:
# Prompt Template
def create_prompt(row):
    statement = row['quote']
    category = row['label'].split('_')[0]
    instruction = f"""[INST] <<SYS>>
Classify the following statement into one of these 8 categories:
Respond STRICTLY with only the corresponding number.
<</SYS>>

<categories>
0 - Not relevant: No climate-related claims or doesn't fit other categories
1 - Denial: Claims climate change is not happening
2 - Attribution denial: Claims human activity is not causing climate change
3 - Impact minimization: Claims climate change impacts are minimal or beneficial
4 - Solution opposition: Claims solutions to climate change are harmful
5 - Science skepticism: Challenges climate science validity or methods
6 - Actor criticism: Attacks credibility of climate scientists or activists
7 - Fossil fuel promotion: Asserts importance of fossil fuels
</categories>
<statement>{statement}</statement>
[/INST]
Category : """
    return {"text": "<s>" + instruction + "<category>" + category + "<category></s>"}

In [62]:
# Load Dataset
df = pd.read_parquet("hf://datasets/QuotaClimat/frugalaichallenge-text-train/train.parquet")

# Train-Test Split
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

# Sample for Baseline Training
N_SAMPLES = 50
df_sampled_train = train_df.sample(N_SAMPLES, random_state=42)
df_sampled_val = val_df.sample(N_SAMPLES, random_state=42)

In [63]:
# Format Dataset
train_data = df_sampled_train.apply(create_prompt, axis=1).tolist()
val_data = df_sampled_val.apply(create_prompt, axis=1).tolist()
train_data[0]

{'text': "<s>[INST] <<SYS>>\nClassify the following statement into one of these 8 categories:\nRespond STRICTLY with only the corresponding number.\n<</SYS>>\n\n<categories>\n0 - Not relevant: No climate-related claims or doesn't fit other categories\n1 - Denial: Claims climate change is not happening\n2 - Attribution denial: Claims human activity is not causing climate change\n3 - Impact minimization: Claims climate change impacts are minimal or beneficial\n4 - Solution opposition: Claims solutions to climate change are harmful\n5 - Science skepticism: Challenges climate science validity or methods\n6 - Actor criticism: Attacks credibility of climate scientists or activists\n7 - Fossil fuel promotion: Asserts importance of fossil fuels\n</categories>\n<statement>With the carbon tax to be terminated, attention therefore needs to be turned to repealing the renewable requirements.</statement>\n[/INST]\nCategory : <category>4<category></s>"}

In [90]:
# Select Model
selected_model = "mistral"
model_name = MODEL_NAMES[selected_model]

tokenizer, model = load_model(model_name)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.truncation_side = 'right'
tokenizer.padding_side = 'right'

tokenizer("this is the instruction", return_tensors="pt").to(model.device)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the disk.


{'input_ids': tensor([[    1,   456,   349,   272, 13126]], device='mps:0'), 'attention_mask': tensor([[1, 1, 1, 1, 1]], device='mps:0')}

In [92]:
# Tokenization
def tokenize_function(train_data):
    return tokenizer(train_data["text"], padding="max_length", truncation=True, max_length=2048, return_tensors="pt").to(model.device)

In [93]:
# Convert Data to HF Dataset Format
from datasets import Dataset

train_dataset = Dataset.from_list(train_data).map(tokenize_function, batched=True)
val_dataset = Dataset.from_list(val_data).map(tokenize_function, batched=True)

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

In [95]:
# Training Configuration
use_fp16 = torch.cuda.is_available()  
use_bf16 = torch.cuda.is_bf16_supported() and not torch.backends.mps.is_available()
"""
    eval_strategy="epoch",
    save_strategy="epoch",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    fp16=use_fp16,
    bf16=use_bf16,
    logging_dir="./logs",
    logging_steps=10
"""

training_args = TrainingArguments(
    output_dir="./models",
)
training_args

TrainingArguments(
_n_gpu=1,
accelerator_config={'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None, 'use_configured_state': False},
adafactor=False,
adam_beta1=0.9,
adam_beta2=0.999,
adam_epsilon=1e-08,
auto_find_batch_size=False,
average_tokens_across_devices=False,
batch_eval_metrics=False,
bf16=False,
bf16_full_eval=False,
data_seed=None,
dataloader_drop_last=False,
dataloader_num_workers=0,
dataloader_persistent_workers=False,
dataloader_pin_memory=True,
dataloader_prefetch_factor=None,
ddp_backend=None,
ddp_broadcast_buffers=None,
ddp_bucket_cap_mb=None,
ddp_find_unused_parameters=None,
ddp_timeout=1800,
debug=[],
deepspeed=None,
disable_tqdm=False,
dispatch_batches=None,
do_eval=False,
do_predict=False,
do_train=False,
eval_accumulation_steps=None,
eval_delay=0,
eval_do_concat_batches=True,
eval_on_start=False,
eval_steps=None,
eval_strategy=no,
eval_use_gather_object=F

In [96]:
# Trainer Setup
trainer = SFTTrainer(
    model=model,
    processing_class=tokenizer,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)
trainer

You shouldn't move a model that is dispatched using accelerate hooks.


RuntimeError: You can't move a model that has some modules offloaded to cpu or disk.

In [None]:
# Train Model
trainer.train()

In [None]:
# Save Model
trainer.save_model("./models/fine_tuned_model")

In [None]:
# Simple Evaluation
def compute_accuracy(predictions, labels):
    preds = np.argmax(predictions, axis=-1)
    return np.mean(preds == labels)

In [None]:
predictions = trainer.predict(val_dataset).predictions
labels = np.array(val_dataset["label"])
accuracy = compute_accuracy(predictions, labels)

print(f"Validation Accuracy: {accuracy:.4f}")