In [None]:
#libraries
!pip -qq install datasets peft accelerate bitsandbytes transformers wandb scikit-learn

In [None]:
#imports
import torch
import transformers
from datasets import load_dataset
from typing import Optional
from transformers import (
    AutoTokenizer,
    AutoConfig,
    AutoModelForCausalLM,
    BitsAndBytesConfig,
    PreTrainedModel,
)
from peft import (
    prepare_model_for_kbit_training,
    LoraConfig,
    get_peft_model,
)
from sklearn.model_selection import train_test_split
import sklearn
from huggingface_hub import login

In [None]:
#hf login
login(token="hf_key")

In [None]:
#variables
MICRO_BATCH_SIZE = 8
BATCH_SIZE = 64
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
EPOCHS = 5
LEARNING_RATE = 3e-4
MAX_SEQ_LEN = 512
LORA_R = 4
LORA_ALPHA = 8
LORA_DROPOUT = 0.1

OUTPUT_DIR = "LlamaSin-3.2-1b"
MODEL_NAME = "meta-llama/Llama-3.2-1B"

In [None]:
#load base model configuration
config = AutoConfig.from_pretrained(MODEL_NAME)

In [None]:
load_in_4bit=False
load_in_8bit=True

In [None]:
#quantization setup
quantization_config: Optional[BitsAndBytesConfig] = BitsAndBytesConfig(
  load_in_4bit=load_in_4bit,
  load_in_8bit=load_in_8bit,
  llm_int8_threshold=6.0,
  llm_int8_has_fp16_weight=False,
  bnb_4bit_compute_dtype=torch.float16,
  bnb_4bit_use_double_quant=True,
  bnb_4bit_quant_type='nf4',
) if load_in_4bit or load_in_8bit else None

In [None]:
#load base model
model: PreTrainedModel = AutoModelForCausalLM.from_pretrained(
  MODEL_NAME,
  config=config,
  device_map="auto",
  quantization_config=quantization_config,
  torch_dtype=torch.float16,
)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME,
    add_eos_token=True,
)

In [None]:
tokenizer.pad_token_id = 0

In [None]:
#prepare model for training
model = prepare_model_for_kbit_training(model)

In [None]:
lora_config = LoraConfig(
    r=LORA_R,
    lora_alpha=LORA_ALPHA,
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"], # Added more LoRA target modules common for Llama models
    lora_dropout=LORA_DROPOUT,
    bias="none",
    task_type="CAUSAL_LM",
)

In [None]:
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

In [None]:
#load dataset
df = load_dataset("wikimedia/wikipedia", "20231101.si")
df = df["train"]

In [None]:
#data split for training and validation
split_datasets = df.train_test_split(test_size=0.1, seed=42)
df_train = split_datasets["train"]
df_val = split_datasets["test"]

In [None]:
def tokenize(prompt):
  result = tokenizer(
      prompt,
      truncation=True,
      max_length=MAX_SEQ_LEN + 1,
      padding="max_length",
  )
  return {
      "input_ids": result["input_ids"][:-1],
      "attention_mask": result["attention_mask"][:-1],
  }

In [None]:
data_train = df_train.map(lambda x: tokenize(str(x)))
data_test = df_val.map(lambda x: tokenize(str(x)))

In [None]:
#train
trainer = transformers.Trainer(
    model=model,
    train_dataset=data_train,
    eval_dataset=data_test,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=MICRO_BATCH_SIZE,
        gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
        warmup_steps=100,
        num_train_epochs=EPOCHS,
        learning_rate=LEARNING_RATE,
        fp16=True,
        logging_steps=300,
        output_dir=OUTPUT_DIR,
        save_total_limit=2,
        save_strategy="steps",
        eval_strategy="steps",
        eval_steps=300,
        save_steps=300,
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        report_to="wandb",
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)
model.config.use_cache = False
trainer.train()

In [None]:
model.save_pretrained(OUTPUT_DIR)