In [1]:
# Install necessary libraries
!pip install transformers datasets peft torch wandb

import transformers
from datetime import datetime
import wandb
import torch
from datasets import load_dataset, Dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, BitsAndBytesConfig
from peft import get_peft_model, PeftConfig, prepare_model_for_kbit_training
import random

# Initialize wandb
wandb.init(project="llama2-finetuning")

# Set base model ID and output directory
base_model_id = "meta-llama/Llama-2-7b-hf"
project = "viggo-finetune"
run_name = base_model_id + "-" + project
output_dir = "./" + run_name

# Load the dataset
dataset = load_dataset("openvega-simon/investopedia")

# Split the dataset into train and validation sets
split_datasets = dataset["train"].train_test_split(test_size=0.2, seed=42)
train_dataset = split_datasets["train"]
test_dataset = split_datasets["test"]

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(
    base_model_id,
    add_eos_token=True,
    add_bos_token=True,
)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)



[34m[1mwandb[0m: Currently logged in as: [33msameerabdulmohamed[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [2]:
model = AutoModelForCausalLM.from_pretrained(base_model_id, quantization_config=bnb_config, device_map="auto")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [3]:
# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples['clean_content'], truncation=True, padding="max_length")

tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
tokenized_val_dataset = test_dataset.map(tokenize_function, batched=True)

In [4]:
from peft import LoraConfig, get_peft_model

config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
        "lm_head",
    ],
    bias="none",
    lora_dropout=0.05,  # Conventional
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, config)

In [5]:
!pip install -q wandb -U

import wandb, os
wandb.login()

wandb_project = "investopedia-finetune"
if len(wandb_project) > 0:
    os.environ["WANDB_PROJECT"] = wandb_project



In [6]:
if torch.cuda.device_count() > 1: # If more than 1 GPU
    model.is_parallelizable = True
    model.model_parallel = True

In [8]:
import transformers
from datetime import datetime

project = "investopedia-finetune"
base_model_name = "llama2-7b"
run_name = base_model_name + "-" + project
output_dir = "./" + run_name

tokenizer.pad_token = tokenizer.eos_token

trainer = transformers.Trainer(
    model=model,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_val_dataset,
    args=transformers.TrainingArguments(
        output_dir=output_dir,
        warmup_steps=5,
        per_device_train_batch_size=1,
        gradient_checkpointing=True,
        gradient_accumulation_steps=1,
        max_steps=1000,
        learning_rate=2.5e-5,
        logging_steps=50,
        bf16=True,
        optim="paged_adamw_8bit",
        logging_dir="./logs",        # Directory for storing logs
        save_strategy="steps",       # Save the model checkpoint every logging step
        save_steps=25,                # Save checkpoints every 50 steps
        evaluation_strategy="steps", # Evaluate the model every logging step
        eval_steps=50,               # Evaluate and save checkpoints every 50 steps
        do_eval=True,                # Perform evaluation at the end of training
        report_to="wandb",           # Comment this out if you don't want to use weights & baises
        run_name=f"{run_name}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}"          # Name of the W&B run (optional)
    ),
    data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
)

model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

max_steps is given, it will override any value given in num_train_epochs


Step,Training Loss,Validation Loss


OutOfMemoryError: CUDA out of memory. Tried to allocate 6.27 GiB. GPU 