In [None]:
!pip install -q  torch peft bitsandbytes transformers trl accelerate sentencepiece cryptography wandb

In [None]:
import torch
from datasets import load_dataset, Dataset, DatasetDict
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline
)
from peft import LoraConfig
from trl import SFTTrainer
from cryptography.fernet import Fernet
from getpass import getpass
from huggingface_hub import notebook_login
import os
import wandb

In [None]:
# SETUP

DATA_NAME = 'Tom10117/simme'
PROJECT_NAME = 'messages'
RUN_NAME = 'a100'
MAX_SEQ_LENGTH = 200
BASE_MODEL_NAME = "meta-llama/Llama-3.1-8B"
REFINED_MODEL_NAME = f"Tom10117/{PROJECT_NAME}-{RUN_NAME}"

# HYPER-PARAMETERS

LORA_ALPHA = 64
LORA_R = 32
LORA_DROPOUT = 0.1
BATCH_SIZE = 1
GRADIENT_ACCUMULATION_STEPS = 4
LEARNING_RATE = 2e-4
LR_SCHEDULER_TYPE = 'cosine'
WEIGHT_DECAY = 0.001
TARGET_MODULES = ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]

# OTHER TRAINING CONFIG

STEPS = 100
SAVE_STEPS = 500
EVAL_STEPS = 1000

In [None]:
from huggingface_hub import login

In [None]:
wandb_key = getpass("Enter Weights & Biases Key")
wandb.login(key="", relogin=True)

In [None]:
# set the wandb project where this run will be logged
os.environ["WANDB_PROJECT"] = PROJECT_NAME

# save your trained model checkpoint to wandb
os.environ["WANDB_LOG_MODEL"] = "checkpoint"

# turn off watch to log faster
os.environ["WANDB_WATCH"] = "false"

In [None]:
train_dataset = load_dataset(DATA_NAME, split="train")
test_dataset = load_dataset(DATA_NAME, split="test")

data = DatasetDict({
    'train': train_dataset,
    'test': test_dataset
})

print(data)
print(data['train'][100])
print(data['test'][100])

In [None]:
# Model and tokenizer names
base_model_name = BASE_MODEL_NAME
refined_model = REFINED_MODEL_NAME

# Tokenizer
llama_tokenizer = AutoTokenizer.from_pretrained(base_model_name, trust_remote_code=True)
llama_tokenizer.pad_token = llama_tokenizer.eos_token
llama_tokenizer.padding_side = "right"

# Quantization Config
quant_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

# Model
base_model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    quantization_config=quant_config,
    device_map="auto"
)

base_model.config.use_cache = False
base_model.config.pretraining_tp = 1

In [None]:
# LoRA Config
peft_parameters = LoraConfig(
    lora_alpha=LORA_ALPHA,
    lora_dropout=LORA_DROPOUT,
    r=LORA_R,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=TARGET_MODULES,
)

In [None]:
# Training Params
train_params = TrainingArguments(
    output_dir=REFINED_MODEL_NAME,
    num_train_epochs=1,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=1,
    eval_strategy="steps",
    eval_steps=EVAL_STEPS,
    gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
    optim="paged_adamw_32bit",
    save_steps=SAVE_STEPS,
    save_total_limit=10, # to avoid running out of disk space!
    logging_steps=STEPS,
    learning_rate=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY,
    fp16=False,
    bf16=False,
    max_grad_norm=0.3,
    max_steps=-1,
    warmup_ratio=0.03,
    group_by_length=True,
    lr_scheduler_type=LR_SCHEDULER_TYPE,
    report_to="wandb",
    run_name=RUN_NAME,
    push_to_hub=True,
    hub_model_id=REFINED_MODEL_NAME,
    hub_strategy="end",
    hub_private_repo=True
)

In [None]:
# Trainer
fine_tuning = SFTTrainer(
    model=base_model,
    train_dataset=data['train'],
    eval_dataset=data['test'],
    peft_config=peft_parameters,
    dataset_text_field="text",
    tokenizer=llama_tokenizer,
    max_seq_length=MAX_SEQ_LENGTH,
    args=train_params
)

In [None]:
fine_tuning.train()

fine_tuning.model.save_pretrained(refined_model)

In [None]:
fine_tuning.model.push_to_hub(
    repo_id=REFINED_MODEL_NAME,
    private=True,
    commit_message="Updating model and README",
)