In [1]:
!pip install transformers bitsandbytes datasets peft gdown huggingface_hub trl

Collecting bitsandbytes
  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)
Collecting peft
  Downloading peft-0.13.0-py3-none-any.whl.metadata (13 kB)
Collecting gdown
  Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Collecting trl
  Downloading trl-0.11.2-py3-none-any.whl.metadata (12 kB)
Collecting tyro>=0.5.11 (from trl)
  Downloading tyro-0.8.11-py3-none-any.whl.metadata (8.4 kB)
Collecting shtab>=1.5.6 (from tyro>=0.5.11->trl)
  Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)
Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl (122.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.4/122.4 MB[0m [31m13.2 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading peft-0.13.0-py3-none-any.whl (322 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m322.5/322.5 kB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading gdown-5.2.0-py3-none-any.whl (18 kB)
Downlo

In [2]:
import gdown

url = 'https://drive.google.com/file/d/1GJ0HyraB1DaUSrUUEgrez3F3CtgJFfat/view?usp=sharing'
output_path = '/kaggle/working/'
gdown.download(url, output_path, quiet=False,fuzzy=True)

Downloading...
From (original): https://drive.google.com/uc?id=1GJ0HyraB1DaUSrUUEgrez3F3CtgJFfat
From (redirected): https://drive.google.com/uc?id=1GJ0HyraB1DaUSrUUEgrez3F3CtgJFfat&confirm=t&uuid=3a2f43ee-d147-4de9-8b97-6faafc114949
To: /kaggle/working/Cleaned_Questions_Answers_For_Finetuning.csv
100%|██████████| 268M/268M [00:05<00:00, 48.5MB/s] 


'/kaggle/working/Cleaned_Questions_Answers_For_Finetuning.csv'

In [3]:
import torch
import pandas as pd
from datasets import Dataset
from trl import SFTTrainer, SFTConfig, setup_chat_format
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

In [4]:
data = pd.read_csv('/kaggle/working/Cleaned_Questions_Answers_For_Finetuning.csv')

data = data.drop(columns=["Id", "ParentId"])

dataset = Dataset.from_pandas(data.iloc[:2000])

dataset

Dataset({
    features: ['Cleaned_Questions', 'Cleaned_Answers'],
    num_rows: 2000
})

In [5]:
model_name = "NousResearch/Hermes-3-Llama-3.1-8B"
tokenizer = AutoTokenizer.from_pretrained(model_name)

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map='auto',
    quantization_config=bnb_config,
    trust_remote_code=True
)

lora_config = LoraConfig(
        r=16,
        lora_alpha=64,
        lora_dropout=0.05,
        bias="none",
        use_dora=True,
        task_type="CAUSAL_LM",
        target_modules=[
        "up_proj",
        "o_proj",
        "v_proj",
        "gate_proj",
        "q_proj",
        "down_proj",
        "k_proj"
      ])

model.gradient_checkpointing_enable()
model = prepare_model_for_kbit_training(model)
model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, lora_config)

tokenizer_config.json:   0%|          | 0.00/55.8k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/444 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/883 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/23.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/4 [00:00<?, ?it/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/1.17G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/185 [00:00<?, ?B/s]

In [6]:
def format_chat_template(row):
    row_json = [{"role": "user", "content": row["Cleaned_Questions"]},
                {"role": "assistant", "content": row["Cleaned_Answers"]}]
    
    row["text"] = tokenizer.apply_chat_template(row_json, tokenize=False)
    
    return row

tokenized_dataset = dataset.map(format_chat_template, num_proc=4)

  self.pid = os.fork()


Map (num_proc=4):   0%|          | 0/2000 [00:00<?, ? examples/s]

In [7]:
training_args = SFTConfig(
    output_dir="./results",                        
    per_device_train_batch_size=8,           
    gradient_accumulation_steps=4,               
    eval_strategy="no",
    optim="adamw_torch",
    logging_strategy="steps",                  
    logging_steps=100,                        
    save_strategy="epoch",                       
    save_steps=300,                           
    save_total_limit=3,                         
    learning_rate=2e-4,                      
    num_train_epochs=6,
    lr_scheduler_type="linear",
    logging_dir="./logs",                    
    fp16=True,                               
    group_by_length=True,                       
    push_to_hub=False,                     
    report_to="tensorboard",
    dataloader_num_workers=4,
    overwrite_output_dir=True,          
    save_only_model=False,
    remove_unused_columns=True
)

trainer = SFTTrainer(
    model=model,
    train_dataset=tokenized_dataset,
    peft_config=lora_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_args,
    packing= False
)

trainer.train()


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)
  self.pid = os.fork()
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss
100,1.5916
200,0.8771
300,0.5252


  self.pid = os.fork()
  self.pid = os.fork()
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  self.pid = os.fork()
  self.pid = os.fork()
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  self.pid = os.fork()
  self.pid = os.fork()
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  self.pid = os.fork()
  self.pid = os.fork()
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]
  self.pid = os.fork()
  self.pid = os.fork()
  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # ty

TrainOutput(global_step=372, training_loss=0.8679029198103053, metrics={'train_runtime': 25015.4333, 'train_samples_per_second': 0.48, 'train_steps_per_second': 0.015, 'total_flos': 1.4042668537734758e+17, 'train_loss': 0.8679029198103053, 'epoch': 5.952})

In [None]:
messages = [
    {
        "role": "user",
        "content": "How do I turn a python program into an .egg file?"
    }
]

prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

outputs = model.generate(**inputs, max_length=512, num_return_sequences=1, early_stopping=True, num_beams=5)

text = tokenizer.decode(outputs[0], skip_special_tokens=True)

question = messages[0]["content"]
answer = text.split("assistant")[1]

print(f"You: {question}")
print(f"Bot: {answer}")

In [None]:
trainer.save_model("./fine_tuned_model")
tokenizer.save_pretrained("./fine_tuned_model")

In [None]:
from huggingface_hub import login, HfApi
import os

login("hf_asdasdasdasdasd")

model_dir = "./fine_tuned_model"

api = HfApi()
repo_id = "thuan220401/Llama3_StackOverflow_FineTuning"

api.upload_folder(
    folder_path=model_dir, 
    repo_id=repo_id,
    repo_type="model"
)


In [None]:
import shutil

shutil.make_archive('checkpoint-372', 'zip', '/kaggle/working/results/checkpoint-372')