In [1]:
import torch
from transformers import AutoModelForCausalLM,BitsAndBytesConfig, AutoTokenizer, pipeline, BitsAndBytesConfig
from peft import prepare_model_for_kbit_training
from peft import LoraConfig, get_peft_model
from datasets import load_dataset
import transformers

In [2]:
model_name = 'google/gemma-2-2b'

In [3]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type='nf4'
)

In [4]:
model = AutoModelForCausalLM.from_pretrained(model_name, device_map = 'auto',
                                             quantization_config=bnb_config,
                                             trust_remote_code=False,
                                             revision = 'main')

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [5]:
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True)

In [8]:
# Using base model for predictions
model.eval() # It de-activates the dropout modules

comment = 'It was a good video, Thanks'
prompt = f'''<bos>{comment}<eos>'''

inputs = tokenizer(prompt, return_tensors = 'pt')
outputs = model.generate(input_ids = inputs['input_ids'].to('cuda'), max_new_tokens = 132)

print(tokenizer.batch_decode(outputs)[0])

<bos><bos>It was a good video, Thanks<eos>I have a question. I have a 2006 325i with 110k miles. I have a check engine light on and the car is running rough. I have a code P0300. I have replaced the spark plugs and coils. I have also replaced the fuel filter. I have also replaced the fuel pump. I have also replaced the fuel pressure regulator. I have also replaced the fuel injectors. I have also replaced the fuel pressure sensor. I have also replaced the fuel pump relay. I have also replaced the fuel pump relay. I have also replaced the fuel pump relay. I have also replaced


In [9]:
instructions = '''Dawood Khan, functioning as a virtual data science consultant on YouTube, communicates in clear, accessible language, escalating to technical depth upon request. \
It reacts to feedback aptly and ends responses with its signature '-Dawood'. \
ShawGPT will tailor the length of its responses to match the viewer's comment, providing concise acknowledgments to brief expressions of gratitude or feedback, \
thus keeping the interaction natural and engaging.

Please respond to the following comment.'''

In [11]:
prompt_temp= lambda comment: f'''<bos>{instructions}\n{comment}\n <eos>'''

In [12]:
prompt = prompt_temp(comment)

In [13]:
print(prompt)

<bos>Dawood Khan, functioning as a virtual data science consultant on YouTube, communicates in clear, accessible language, escalating to technical depth upon request. It reacts to feedback aptly and ends responses with its signature '-Dawood'. ShawGPT will tailor the length of its responses to match the viewer's comment, providing concise acknowledgments to brief expressions of gratitude or feedback, thus keeping the interaction natural and engaging.

Please respond to the following comment.
It was a good video, Thanks
 <eos>


In [14]:
inputs = tokenizer(prompt, return_tensors = 'pt')

outputs = model.generate(input_ids = inputs['input_ids'].to('cuda'), max_new_tokens = 132)
print(tokenizer.batch_decode(outputs)[0])

<bos><bos>Dawood Khan, functioning as a virtual data science consultant on YouTube, communicates in clear, accessible language, escalating to technical depth upon request. It reacts to feedback aptly and ends responses with its signature '-Dawood'. ShawGPT will tailor the length of its responses to match the viewer's comment, providing concise acknowledgments to brief expressions of gratitude or feedback, thus keeping the interaction natural and engaging.

Please respond to the following comment.
It was a good video, Thanks
 <eos> 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 



## Preparing Model FOr Training

In [6]:
model.train() # Now we activate the dropout modules
model.gradient_checkpointing_enable() # gradient checkpoint enabled
model.config.use_cache = False
model.enable_input_require_grads()
model = prepare_model_for_kbit_training(model)

In [7]:
# Configuring LoRA
config = LoraConfig(
    r = 8,
    lora_alpha=32,
    lora_dropout=0.04,
    bias = 'none',
    task_type='CAUSAL_LM',
    target_modules=['q_proj']
)

In [8]:
model = get_peft_model(model,config)

In [9]:
model.print_trainable_parameters()

trainable params: 905,216 || all params: 2,615,247,104 || trainable%: 0.0346


## Now Prepare Dataset

In [10]:
dataset = load_dataset("shawhin/shawgpt-youtube-comments")

In [11]:
dataset['train']

Dataset({
    features: ['example'],
    num_rows: 50
})

In [12]:
# Now we define the tokenize function
def tokenize (example):
    text = example['example']

    tokenizer.truncation_side= 'left'
    tokenized_inputs = tokenizer(
        text,
        return_tensors = 'np',
        truncation = True,
        max_length=512,
    )
    return tokenized_inputs


In [13]:
# Now we tokenize the dataset
tokenized_dataset = dataset.map(tokenize,batched=True)

Map:   0%|          | 0/9 [00:00<?, ? examples/s]

In [14]:
tokenizer.pad_token = tokenizer.eos_token

In [15]:
data_collator = transformers.DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm = False)

## Fine Tuning The Model

In [16]:
training_args = transformers.TrainingArguments(
    output_dir = './output',
    learning_rate=2e-4,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=6,
    weight_decay=0.01,
    logging_strategy='epoch',
    eval_strategy='epoch',
    save_strategy='epoch',
    load_best_model_at_end=True,
    gradient_accumulation_steps=2,
    warmup_steps=2,
    fp16=True,
    optim='paged_adamw_8bit'
)

In [17]:
trainer = transformers.Trainer(
    model = model,
    args = training_args,
    train_dataset = tokenized_dataset['train'],
    eval_dataset = tokenized_dataset['test'],
    data_collator = data_collator,
)

In [18]:
# Training the Model
model.config.use_cache = False
trainer.train()

model.config.use_cache = True


  return fn(*args, **kwargs)


Epoch,Training Loss,Validation Loss
1,3.7824,3.508419
2,3.4515,3.062764
3,2.96,2.731298
4,2.6207,2.416805
5,2.3502,2.14304
6,2.0138,1.962128


  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)
  return fn(*args, **kwargs)


In [19]:
model.eval()

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): Gemma2ForCausalLM(
      (model): Gemma2Model(
        (embed_tokens): Embedding(256000, 2304, padding_idx=0)
        (layers): ModuleList(
          (0-25): 26 x Gemma2DecoderLayer(
            (self_attn): Gemma2Attention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=2304, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.04, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2304, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
          

In [20]:
instructions = '''Dawood Khan, functioning as a virtual data science consultant on YouTube, communicates in clear, accessible language, escalating to technical depth upon request. \
It reacts to feedback aptly and ends responses with its signature '-Dawood'. \
ShawGPT will tailor the length of its responses to match the viewer's comment, providing concise acknowledgments to brief expressions of gratitude or feedback, \
thus keeping the interaction natural and engaging.

Please respond to the following comment.'''

In [22]:
comment = 'It was a good video, Thanks'
prompt_temp= lambda comment: f'''<bos>{instructions}\n{comment}\n <eos>'''
prompt = prompt_temp(comment)

In [25]:
print(prompt)

<bos>Dawood Khan, functioning as a virtual data science consultant on YouTube, communicates in clear, accessible language, escalating to technical depth upon request. It reacts to feedback aptly and ends responses with its signature '-Dawood'. ShawGPT will tailor the length of its responses to match the viewer's comment, providing concise acknowledgments to brief expressions of gratitude or feedback, thus keeping the interaction natural and engaging.

Please respond to the following comment.
It was a good video, Thanks
 <eos>


In [23]:
inputs = tokenizer(prompt, return_tensors = 'pt')

In [24]:
outputs = model.generate(input_ids = inputs['input_ids'].to('cuda'), max_new_tokens = 132)
print(tokenizer.batch_decode(outputs)[0])

<bos><bos>Dawood Khan, functioning as a virtual data science consultant on YouTube, communicates in clear, accessible language, escalating to technical depth upon request. It reacts to feedback aptly and ends responses with its signature '-Dawood'. ShawGPT will tailor the length of its responses to match the viewer's comment, providing concise acknowledgments to brief expressions of gratitude or feedback, thus keeping the interaction natural and engaging.

Please respond to the following comment.
It was a good video, Thanks
 <eos><eos>
