In [None]:
import pickle
from dotenv import load_dotenv
import transformers
import torch
from trl import SFTTrainer
from peft import LoraConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig, GemmaTokenizer

In [None]:
import os
from google.colab import userdata

os.environ['HF_TOKEN'] = userdata.get('HF_TOKEN')

In [None]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

In [None]:
def loadData(file):
    dbfile = open(file, 'rb')
    db = pickle.load(dbfile)

    return db

In [None]:
ins_re_dataset = loadData('InsReBroader.pickle')

In [None]:
len(ins_re_dataset)

In [None]:
ins_re_dataset[:10]

In [None]:
for pair in ins_re_dataset:
  ins = pair.pop('instruction', None)
  re = pair.pop('response', None)
  pair['text'] = f"### Instruction:\n{ins}\n\n### Response:\n{re}"

In [None]:
ins_re_dataset[:10]

In [None]:
from datasets import Dataset

train_dataset = Dataset.from_list(ins_re_dataset[:2000])

In [None]:
train_dataset2 = Dataset.from_list(ins_re_dataset[2000:4000])

In [None]:
model_id = "google/gemma-2-2b-it"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=False,
    attn_implementation='eager',
    use_cache=False,
)

In [None]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    target_modules=['q_proj', "o_proj", "k_proj", "v_proj", 'gate_proj', 'up_proj', "down_proj"],
    task_type='CAUSAL_LM',

)

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=4096)

train_dataset = train_dataset.map(tokenize, batched=True)

In [None]:
train_dataset2 = train_dataset2.map(tokenize, batched=True)

In [None]:
def print_trainable_parameters(model):
    trainable = 0
    total = 0
    for param in model.parameters():
        num_params = param.numel()
        total += num_params
        if param.requires_grad:
            trainable += num_params
    print(f"Trainable parameters: {trainable:,}")
    print(f"Total parameters: {total:,}")
    print(f"Trainable ratio: {100 * trainable / total:.4f}%")

print_trainable_parameters(model)

In [None]:
tuner = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=8,
        num_train_epochs=2,
        learning_rate=5e-5,
        warmup_steps=50,
        logging_steps=10,
        fp16=True,
        optim="paged_adamw_8bit",
        gradient_checkpointing=True,
        output_dir="outputs"
    ),
    peft_config=lora_config
)


In [None]:
os.environ['WANDB_DISABLED'] = "false"

In [None]:
import wandb

wandb.init(project="lang-tuner")

In [None]:
tuner.train()

In [None]:
tuner.model.save_pretrained("fine-tuned-gemma")

In [None]:
model.save_pretrained("fine-tuned-gemma")
tokenizer.save_pretrained("fine-tuned-gemma")

In [None]:
from huggingface_hub import HfApi
api = HfApi()

api.create_repo(repo_id="Prince-Dastan/langchain-docbot-2", repo_type="model", private=False ,token=os.environ['HF_TOKEN'])

for file in os.listdir(r'/content/fine-tuned-gemma'):
  api.upload_file(path_or_fileobj=f"fine-tuned-gemma/{file}", path_in_repo=f"{file}", repo_id="Prince-Dastan/langchain-docbot-2", repo_type="model",token=os.environ['HF_TOKEN'])