In [3]:
import pickle
from dotenv import load_dotenv
import transformers
import torch
from trl import SFTTrainer
from peft import LoraConfig
from transformers import AutoTokenizer, AutoModelForCausalLM
from transformers import BitsAndBytesConfig, GemmaTokenizer

In [4]:
import os
from google.colab import userdata

os.environ['HF_TOKEN'] = userdata.get('HF_TOKEN')

In [5]:
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))

True
Tesla T4


In [6]:
def loadData(file):
    dbfile = open(file, 'rb')
    db = pickle.load(dbfile)

    return db

In [7]:
ins_re_dataset = loadData('InsReBroader.pickle')

In [8]:
len(ins_re_dataset)

6739

In [9]:
ins_re_dataset[:10]

[{'instruction': 'What is the purpose of the `langchain-perplexity` package?',
  'response': 'The `langchain-perplexity` package provides the Perplexity AI integration for the LangChain framework.'},
 {'instruction': 'What version of the `langchain-perplexity` package is documented here?',
  'response': 'The documentation refers to version 0.1.1 of the `langchain-perplexity` package.'},
 {'instruction': 'What classes are available within the `chat_models` module of the `langchain-perplexity` package?',
  'response': 'The `chat_models` module contains the `ChatPerplexity` class, which provides access to the Perplexity AI Chat models API.'},
 {'instruction': "How can I use Perplexity AI's chat models within LangChain, according to this documentation?",
  'response': "You can use the `ChatPerplexity` class found in the `chat_models` module of the `langchain-perplexity` package to interact with Perplexity AI's chat models."},
 {'instruction': 'Can you provide a simple code example of how t

In [10]:
for pair in ins_re_dataset:
  ins = pair.pop('instruction', None)
  re = pair.pop('response', None)
  pair['text'] = f"### Instruction:\n{ins}\n\n### Response:\n{re}"

In [11]:
ins_re_dataset[:10]

[{'text': '### Instruction:\nWhat is the purpose of the `langchain-perplexity` package?\n\n### Response:\nThe `langchain-perplexity` package provides the Perplexity AI integration for the LangChain framework.'},
 {'text': '### Instruction:\nWhat version of the `langchain-perplexity` package is documented here?\n\n### Response:\nThe documentation refers to version 0.1.1 of the `langchain-perplexity` package.'},
 {'text': '### Instruction:\nWhat classes are available within the `chat_models` module of the `langchain-perplexity` package?\n\n### Response:\nThe `chat_models` module contains the `ChatPerplexity` class, which provides access to the Perplexity AI Chat models API.'},
 {'text': "### Instruction:\nHow can I use Perplexity AI's chat models within LangChain, according to this documentation?\n\n### Response:\nYou can use the `ChatPerplexity` class found in the `chat_models` module of the `langchain-perplexity` package to interact with Perplexity AI's chat models."},
 {'text': '### I

In [12]:
from datasets import Dataset

train_dataset = Dataset.from_list(ins_re_dataset[:2000])

In [27]:
train_dataset2 = Dataset.from_list(ins_re_dataset[2000:4000])

In [13]:
model_id = "google/gemma-2-2b-it"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [14]:
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=False,
    attn_implementation='eager',
    use_cache=False,
)

config.json:   0%|          | 0.00/838 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/24.2k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/241M [00:00<?, ?B/s]

You have set `use_cache` to `False`, but cache_implementation is set to hybrid. cache_implementation will have no effect.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/187 [00:00<?, ?B/s]

In [15]:
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    target_modules=['q_proj', "o_proj", "k_proj", "v_proj", 'gate_proj', 'up_proj', "down_proj"],
    task_type='CAUSAL_LM',

)

In [16]:
tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)
def tokenize(example):
    return tokenizer(example["text"], truncation=True, padding="max_length", max_length=4096)

train_dataset = train_dataset.map(tokenize, batched=True)

tokenizer_config.json:   0%|          | 0.00/47.0k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [28]:
train_dataset2 = train_dataset2.map(tokenize, batched=True)

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [17]:
def print_trainable_parameters(model):
    trainable = 0
    total = 0
    for param in model.parameters():
        num_params = param.numel()
        total += num_params
        if param.requires_grad:
            trainable += num_params
    print(f"Trainable parameters: {trainable:,}")
    print(f"Total parameters: {total:,}")
    print(f"Trainable ratio: {100 * trainable / total:.4f}%")

print_trainable_parameters(model)

Trainable parameters: 590,065,920
Total parameters: 1,602,203,904
Trainable ratio: 36.8284%


In [18]:
tuner = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    args=transformers.TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=8,
        num_train_epochs=2,
        learning_rate=5e-5,
        warmup_steps=50,
        logging_steps=10,
        fp16=True,
        optim="paged_adamw_8bit",
        gradient_checkpointing=True,
        output_dir="outputs"
    ),
    peft_config=lora_config
)


Truncating train dataset:   0%|          | 0/2000 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


In [19]:
os.environ['WANDB_DISABLED'] = "false"

In [20]:
import wandb

wandb.init(project="lang-tuner")

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mprincedastan[0m ([33mprincedastan-mbm-university-jodhpur[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [21]:
tuner.train()



Step,Training Loss
10,36.0422
20,19.0861
30,5.569
40,1.5918
50,0.4579
60,0.1847
70,0.052
80,0.0312
90,0.0254
100,0.0229


TrainOutput(global_step=250, training_loss=2.5346357830166815, metrics={'train_runtime': 7056.3383, 'train_samples_per_second': 0.567, 'train_steps_per_second': 0.035, 'total_flos': 5.0264914526208e+16, 'train_loss': 2.5346357830166815})

In [23]:
tuner.model.save_pretrained("fine-tuned-gemma")

In [22]:
model.save_pretrained("fine-tuned-gemma")
tokenizer.save_pretrained("fine-tuned-gemma")

('fine-tuned-gemma/tokenizer_config.json',
 'fine-tuned-gemma/special_tokens_map.json',
 'fine-tuned-gemma/tokenizer.model',
 'fine-tuned-gemma/added_tokens.json',
 'fine-tuned-gemma/tokenizer.json')

In [24]:
from huggingface_hub import HfApi
api = HfApi()

api.create_repo(repo_id="Prince-Dastan/langchain-docbot-2", repo_type="model", private=False ,token=os.environ['HF_TOKEN'])

for file in os.listdir(r'/content/fine-tuned-gemma'):
  api.upload_file(path_or_fileobj=f"fine-tuned-gemma/{file}", path_in_repo=f"{file}", repo_id="Prince-Dastan/langchain-docbot-2", repo_type="model",token=os.environ['HF_TOKEN'])

model.safetensors:   0%|          | 0.00/3.58G [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/83.1M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/34.4M [00:00<?, ?B/s]