# Step 1: Install all the needed packages

In [2]:
import numpy as np
import pandas as pd
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [3]:
!pip install -q accelerate peft bitsandbytes transformers trl

# Step 2: Import all the Required Libraries

In [5]:
import os
import torch
from datasets import load_dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
    pipeline,
    EarlyStoppingCallback
)
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from trl import SFTTrainer

# Step 3: Get the model names and dataset names

In [6]:
model_name = 'NousResearch/Llama-2-7b-chat-hf'
dataset_name = 'mlabonne/guanaco-llama2'
new_model = 'Llama-2-7b-hf-chat-finetune'

# Step 4: Load the dataset and model

In [7]:
dataset = load_dataset(dataset_name, split = 'train')
dataset = dataset.shuffle(seed=42).select(range(200))

train_test_split = dataset.train_test_split(test_size=50)
train_dataset = train_test_split['train']
eval_dataset = train_test_split['test']

Downloading data:   0%|          | 0.00/8.99M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/476k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/9846 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/518 [00:00<?, ? examples/s]

In [11]:
print(len(train_dataset), len(eval_dataset), train_dataset[0])

150 50 {'text': '<s>[INST] Расскажи про атаку на Бисмарк [/INST] Атака на Бисмарк – это событие Второй мировой войны, произошедшее в мае 1941 года, когда британский флот атаковал немецкий линкор "Бисмарк" в Атлантике.\n\nЛинкор "Бисмарк" был одним из самых мощных и опасных кораблей своего времени. В мае 1941 года "Бисмарк" вместе с другим линкором "Принц Евгений" пытались прорваться в Атлантику, чтобы прервать снабжение союзников Германии, а также атаковать союзные корабли.\n\nБританский флот, получив информацию о движении "Бисмарка", решил пресечь его попытки прорваться в Атлантику. Были отправлены две эскадры – первая, в составе которой были линкоры "Худ" и "Прицесс Роял", а также несколько крейсеров, а вторая, более крупная, которая включала в себя боевые корабли "Викториус", "Родней", "Норфолк" и "Суффолк".\n\nПосле нескольких дней погони, "Бисмарк" был поражен торпедой британского самолета "Скорпион" и терпел значительные повреждения. Несмотря на это, он продолжал борьбу, но в кон

In [12]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit = True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant = True
)

In [13]:
# loading the base model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config = bnb_config,
    device_map = 'auto',
    token = ''
)

model.config.use_cache = False
model.config.pretraining_tp = 1    # more accurate but slower computation

# Loading LLaMa tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True, token='key_here')
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'right'    # Fix weird overflow issue with fp16 training

config.json:   0%|          | 0.00/583 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/200 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/746 [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/21.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/435 [00:00<?, ?B/s]

In [15]:
# Load LoRA configuration
peft_config = LoraConfig(
    lora_alpha = 16,
    lora_dropout = 0.1,
    r = 64,
    bias = 'none',
    task_type = "CAUSAL_LM")

model = prepare_model_for_kbit_training(model)
model = get_peft_model(model,peft_config)

# Step 5: Training

In [16]:
training_arguments = TrainingArguments(
    output_dir = '/kaggle/working',
    per_device_train_batch_size = 2,
    per_device_eval_batch_size = 2,
    gradient_checkpointing = True,
    gradient_accumulation_steps = 1,
    optim = 'paged_adamw_8bit',
    save_steps = 25,
    save_strategy = 'steps',
    evaluation_strategy = 'steps',
    eval_steps = 25,
    load_best_model_at_end = True,
    learning_rate = 2e-4,
    weight_decay = 0.001,
    fp16 = False,
    bf16 = False,
    max_grad_norm = 0.3,
    max_steps = -1,
    warmup_ratio = 0.03,
    group_by_length = True,
    lr_scheduler_type = 'cosine',
    report_to = 'tensorboard'
)



In [17]:
early_stopping = EarlyStoppingCallback(
    early_stopping_patience = 2,
)

In [19]:
# Set supervised fine-tuning parameters
trainer = SFTTrainer(
    model = model, 
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,
    dataset_text_field = 'text',
    max_seq_length = None,
    tokenizer = tokenizer,
    args = training_arguments,
    callbacks = [early_stopping]
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/150 [00:00<?, ? examples/s]

Map:   0%|          | 0/50 [00:00<?, ? examples/s]

In [20]:
trainer.train()



Step,Training Loss,Validation Loss
25,No log,1.428499
50,No log,1.281319
75,No log,1.245929
100,No log,1.235654
125,No log,1.231044
150,No log,1.230033
175,No log,1.231645
200,No log,1.232338




TrainOutput(global_step=200, training_loss=1.3957742309570313, metrics={'train_runtime': 3765.2712, 'train_samples_per_second': 0.12, 'train_steps_per_second': 0.06, 'total_flos': 7355697750294528.0, 'train_loss': 1.3957742309570313, 'epoch': 2.6666666666666665})

# Step 6: Testing

In [21]:
def generate_response(prompt, model, tokenizer, max_length=200):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=max_length, temperature=0.7, top_p=0.9)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

# Example usage
prompt = "Why did the chicken cross the road?"
response = generate_response(prompt, model, tokenizer)
print(f"Prompt: {prompt}")
print(f"Model's response: {response}")

Prompt: Why did the chicken cross the road?
Model's response: Why did the chicken cross the road? To get to the other side!
