In [None]:
!pip install transformers datasets bitsandbytes accelerate peft


Collecting bitsandbytes
  Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.48.2-py3-none-manylinux_2_24_x86_64.whl (59.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.48.2


In [None]:
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    Trainer,
)
from datasets import load_dataset
from peft import LoraConfig, get_peft_model


In [None]:
dataset = load_dataset("json", data_files="data.json")

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,
    llm_int8_enable_fp32_cpu_offload=True,
    bnb_8bit_compute_dtype=torch.float16
)


In [None]:
model_name = "meta-llama/Llama-2-7b-hf"

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)


In [None]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=["q_proj", "v_proj"],
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)


In [None]:
model.config.use_cache = False
model.enable_input_require_grads()


In [None]:
def format_prompt(example):
    if example["input"]:
        return f"""### Instruction:
{example['instruction']}

### Input:
{example['input']}

### Response:
{example['output']}"""
    return f"""### Instruction:
{example['instruction']}

### Response:
{example['output']}"""


In [None]:
def tokenize_function(batch):
    formatted = []
    for i in range(len(batch["instruction"])):
        formatted.append(format_prompt({
            "instruction": batch["instruction"][i],
            "input": batch["input"][i],
            "output": batch["output"][i],
        }))

    tokens = tokenizer(
        formatted,
        truncation=True,
        padding="max_length",
        max_length=512,
        return_tensors="pt"
    )

    tokens["labels"] = tokens["input_ids"].clone()
    return tokens


tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=["instruction", "input", "output"]
)


In [None]:
training_args = TrainingArguments(
    output_dir="./finetuned-llama-interviewer",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=4,
    warmup_steps=20,
    max_steps=200,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    report_to="none",
    gradient_checkpointing=True,
    remove_unused_columns=False
)


In [None]:
trainer = Trainer(
    model=model,
    train_dataset=tokenized_dataset["train"],
    args=training_args
)

trainer.train()
