### Imports

In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from peft import LoraConfig, get_peft_model


  from .autonotebook import tqdm as notebook_tqdm


### Load the Dataset

In [2]:
# Load Alpaca dataset from Hugging Face or local JSON
dataset = load_dataset("tatsu-lab/alpaca")

# Preview the dataset structure
print(dataset)


DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output', 'text'],
        num_rows: 52002
    })
})


### Format and Tokenize the Dataset

In [3]:

# Combine instruction, input, and output into a single text field
def format_example(example):
    instruction = example['instruction']
    input_text = example['input']
    output = example['output']
    if input_text:
        return f"Instruction: {instruction}\nInput: {input_text}\nOutput: {output}"
    else:
        return f"Instruction: {instruction}\nOutput: {output}"

formatted_dataset = dataset.map(lambda x: {"text": format_example(x)})

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-135M")

# Set the padding token to the <eos_token> (end of sentence token)
tokenizer.pad_token = tokenizer.eos_token

# If you want to use a new token for padding
# tokenizer.add_special_tokens({'pad_token': '[PAD]'})

# Tokenize dataset
def tokenize_function(examples):
    tokenized = tokenizer(examples["text"], padding="max_length", truncation=True, max_length=512)
    tokenized["labels"] = tokenized["input_ids"].copy()  # Copy input_ids to labels
    return tokenized


tokenized_dataset = formatted_dataset.map(tokenize_function, batched=True)


In [4]:
print(tokenized_dataset)
print(tokenized_dataset["train"][0])


DatasetDict({
    train: Dataset({
        features: ['instruction', 'input', 'output', 'text', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 52002
    })
})
{'instruction': 'Give three tips for staying healthy.', 'input': '', 'output': '1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule.', 'text': 'Instruction: Give three tips for staying healthy.\nOutput: 1.Eat a balanced diet and make sure to include plenty of fruits and vegetables. \n2. Exercise regularly to keep your body active and strong. \n3. Get enough sleep and maintain a consistent sleep schedule.', 'input_ids': [25464, 42, 13843, 1296, 5608, 327, 9286, 2458, 30, 198, 17597, 42, 216, 33, 30, 36693, 253, 8609, 2714, 284, 919, 2090, 288, 1453, 7568, 282, 5574, 284, 5136, 30, 3717, 34, 30, 15382, 5578, 288, 1446, 469, 1248, 3212, 284, 1837, 30, 3717, 35, 30, 5399,

### Fine tuning setup

In [5]:
from transformers import AutoModelForCausalLM
from peft import LoraConfig, get_peft_model
from transformers import EarlyStoppingCallback

# Load the pretrained model
model = AutoModelForCausalLM.from_pretrained("HuggingFaceTB/SmolLM2-135M")

# Configure LoRA
lora_config = LoraConfig(
    r=8,  # Rank for low-rank adaptation
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj", "v_proj"],  # Target query and value projections
    task_type="CAUSAL_LM"  # This is a causal language model
)

# Apply LoRA to the model
model = get_peft_model(model, lora_config)

# Print trainable parameters
model.print_trainable_parameters()


trainable params: 460,800 || all params: 134,975,808 || trainable%: 0.3414


In [7]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="steps",  # Evaluate periodically
    eval_steps=2000,              # Evaluate every 500 steps
    save_steps=2000,              # Save model every 500 steps
    logging_steps=100,          # Log progress every 1000 steps
    load_best_model_at_end=True,  # Load the best model after training
    metric_for_best_model="eval_loss",  # Use evaluation loss as the metric
    greater_is_better=False,     # Lower eval_loss is better
    learning_rate=5e-4,
    per_device_train_batch_size=4,
    num_train_epochs=10,
    save_total_limit=2,
    fp16=False,
)

### Train

In [8]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["train"],  # Replace with validation set if available
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)

trainer.train()


  0%|          | 100/130010 [01:20<28:44:23,  1.26it/s]

{'loss': 1.3741, 'grad_norm': 0.15918688476085663, 'learning_rate': 0.0004996154141989077, 'epoch': 0.01}


  0%|          | 200/130010 [02:39<29:56:33,  1.20it/s]

{'loss': 0.3093, 'grad_norm': 0.11364385485649109, 'learning_rate': 0.0004992308283978155, 'epoch': 0.02}


  0%|          | 300/130010 [03:57<29:33:13,  1.22it/s]

{'loss': 0.3112, 'grad_norm': 0.15798549354076385, 'learning_rate': 0.0004988462425967234, 'epoch': 0.02}


  0%|          | 400/130010 [05:17<27:23:04,  1.31it/s]

{'loss': 0.3043, 'grad_norm': 0.15722036361694336, 'learning_rate': 0.0004984616567956311, 'epoch': 0.03}


  0%|          | 500/130010 [06:34<27:21:55,  1.31it/s]

{'loss': 0.2846, 'grad_norm': 0.1746341735124588, 'learning_rate': 0.0004980770709945389, 'epoch': 0.04}


  0%|          | 600/130010 [07:50<27:36:35,  1.30it/s]

{'loss': 0.2702, 'grad_norm': 0.12423109263181686, 'learning_rate': 0.0004976924851934467, 'epoch': 0.05}


  1%|          | 700/130010 [09:07<27:39:38,  1.30it/s]

{'loss': 0.2889, 'grad_norm': 0.13373440504074097, 'learning_rate': 0.0004973078993923544, 'epoch': 0.05}


  1%|          | 800/130010 [10:24<27:28:58,  1.31it/s]

{'loss': 0.298, 'grad_norm': 0.10913708806037903, 'learning_rate': 0.0004969233135912621, 'epoch': 0.06}


  1%|          | 900/130010 [11:44<27:29:53,  1.30it/s]

{'loss': 0.2927, 'grad_norm': 0.12487331032752991, 'learning_rate': 0.00049653872779017, 'epoch': 0.07}


  1%|          | 1000/130010 [13:04<28:07:57,  1.27it/s]

{'loss': 0.2865, 'grad_norm': 0.10351908206939697, 'learning_rate': 0.0004961541419890778, 'epoch': 0.08}


  1%|          | 1100/130010 [14:23<28:16:12,  1.27it/s]

{'loss': 0.2842, 'grad_norm': 0.11968785524368286, 'learning_rate': 0.0004957695561879855, 'epoch': 0.08}


  1%|          | 1200/130010 [15:42<27:18:52,  1.31it/s]

{'loss': 0.2805, 'grad_norm': 0.09975295513868332, 'learning_rate': 0.0004953849703868933, 'epoch': 0.09}


  1%|          | 1300/130010 [17:02<29:49:07,  1.20it/s]

{'loss': 0.2907, 'grad_norm': 0.10131990164518356, 'learning_rate': 0.0004950003845858011, 'epoch': 0.1}


  1%|          | 1400/130010 [18:23<27:58:14,  1.28it/s]

{'loss': 0.289, 'grad_norm': 0.1177963986992836, 'learning_rate': 0.0004946157987847088, 'epoch': 0.11}


  1%|          | 1409/130010 [18:30<28:51:05,  1.24it/s]

KeyboardInterrupt: 

In [None]:
trainer.train()
