**1. Load the libraries**

In [1]:
import torch
from transformers import (AutoModelForCausalLM,
                          TrainingArguments,
                          Trainer)
from datasets import concatenate_datasets
from transformers import LlamaTokenizer, LlamaForCausalLM
from pyprojroot import here
from prepare_training_data import prepare_cubetrianlge_qa_dataset, prepare_cubetriangle_instruction_response_dataset

**1.1. Set the module configs**

In [2]:
seed = 20
test_size=0.1 # 10%
epochs=5
max_steps = -1
output_dir = here(f"models/fine_tuned_models/final_CubeTriangle_open_llama_3b_{epochs}_epochs")

**2. Load the model and tokenizer**

In [3]:
model_path = 'openlm-research/open_llama_3b'
tokenizer = LlamaTokenizer.from_pretrained(model_path)

You are using the default legacy behaviour of the <class 'transformers.models.llama.tokenization_llama.LlamaTokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [4]:
base_model = LlamaForCausalLM.from_pretrained(
    model_path, torch_dtype=torch.float16, device_map='cuda',
)

  return self.fget.__get__(instance, owner)()


**3. Prepare the training and test data**

**A few notes:**

* Treat the training process as building a reversed pyramid. use a subset of your data and smaller model.
* Always have baselines and compare your models.
* Track your training and all the configurations and oveserve your the improvements over time.

In [5]:
tokenized_cubetriangle_qa_dataset = prepare_cubetrianlge_qa_dataset(tokenizer)
tokenized_cubetriangle_inst_resp_dataset = prepare_cubetriangle_instruction_response_dataset(tokenizer)

Raw dataset shape: Dataset({
    features: ['question', 'answer'],
    num_rows: 204
})


**3.1 process the datasets and concatenate them together**

In [6]:
# Define a function to concatenate question and answer into a single text
def concatenate_qa_ds(examples):
    examples['text'] = [q + ' ' + a for q, a in zip(examples['question'], examples['answer'])]
    return examples
def concatenate_inst_resp_ds(examples):
    examples['text'] = [q + ' ' + a for q, a in zip(examples['instruction'], examples['response'])]
    return examples

# Apply the function to each split in the dataset
qa_ds = tokenized_cubetriangle_qa_dataset.map(concatenate_qa_ds, batched=True)
qa_ds = qa_ds.remove_columns(['question', 'answer'])
split_qa_ds = qa_ds.train_test_split(test_size=test_size, shuffle=True, seed=seed)

# Apply the function to each split in the dataset
inst_resp_ds = tokenized_cubetriangle_inst_resp_dataset.map(concatenate_inst_resp_ds, batched=True)
inst_resp_ds = inst_resp_ds.remove_columns(['instruction', 'response'])
split_inst_resp_ds = inst_resp_ds.train_test_split(test_size=test_size, shuffle=True, seed=seed)


Map:   0%|          | 0/204 [00:00<?, ? examples/s]

Map:   0%|          | 0/144 [00:00<?, ? examples/s]

In [7]:
# Concatenate the train datasets
concatenated_train = concatenate_datasets([split_qa_ds['train'], split_inst_resp_ds['train']])

# Concatenate the test datasets
concatenated_test = concatenate_datasets([split_qa_ds['test'], split_inst_resp_ds['test']])

# Create a new DatasetDict with the concatenated datasets
concatenated_dataset_dict = {"train": concatenated_train, "test": concatenated_test}
concatenated_dataset_dict

{'train': Dataset({
     features: ['input_ids', 'attention_mask', 'labels', 'text'],
     num_rows: 312
 }),
 'test': Dataset({
     features: ['input_ids', 'attention_mask', 'labels', 'text'],
     num_rows: 36
 })}

In [8]:
concatenated_dataset_dict['test']["text"]

['### Question:\nHow much does CubeTriangle Delta Earbuds cost?\n\n\n### Answer:\n $350',
 "### Question:\nWhat percentage of CubeTriangle's energy consumption comes from renewable sources?\n\n\n### Answer:\n CubeTriangle is actively increasing the use of renewable energy sources, with a goal to achieve a significant percentage of our energy consumption from renewable sources in the coming years.",
 '### Question:\nAre CubeTriangle products compatible with other smart home ecosystems like Alexa or Google Home?\n\n\n### Answer:\n Yes, CubeTriangle products are designed to be compatible with popular smart home ecosystems, including Alexa and Google Home. Follow the integration instructions in the product manual or app to seamlessly connect and control your CubeTriangle devices through these platforms.',
 "### Question:\nMy CubeTriangle Kappa Portable Speaker isn't pairing with my device. What should I do?\n\n\n### Answer:\n Reset the speaker and ensure your device's Bluetooth is on. If i

**4. Set the training config**

`TrainingArguments`

* https://huggingface.co/docs/transformers/v4.36.1/en/main_classes/trainer#transformers.TrainingArguments

In [10]:
training_args = TrainingArguments(
  learning_rate=1.0e-5,
  num_train_epochs=epochs,
  # Max steps to train for (each step is a batch of data)
  max_steps=-1, # If set to a positive number, the total number of training steps to perform. Overrides num_train_epochs, if not -1. 
  #For a finite dataset, training is reiterated through the dataset (if all data is exhausted) until max_steps is reached.
  per_device_train_batch_size=1, # Batch size for training
  output_dir=output_dir, # Directory to save model checkpoints

  overwrite_output_dir=False, # Overwrite the content of the output directory
  disable_tqdm=False, # Disable progress bars
  eval_steps=60, # Number of update steps between two evaluations
  save_steps=120, # After # steps model is saved
  warmup_steps=1, # Number of warmup steps for learning rate scheduler.  Ratio of total training steps used for a linear warmup from 0 to learning_rate.
  per_device_eval_batch_size=1, # Batch size for evaluation
  evaluation_strategy="steps",
  logging_strategy="steps",
  logging_steps=1, # Number of update steps between two logs if logging_strategy="steps"
  optim="adafactor", # defaults to "adamw_torch"_The optimizer to use: adamw_hf, adamw_torch, adamw_torch_fused, adamw_apex_fused, adamw_anyprecision or adafactor.
  gradient_accumulation_steps = 4, # Number of updates steps to accumulate the gradients for, before performing a backward/update pass.
  gradient_checkpointing=False, # If True, use gradient checkpointing to save memory at the expense of slower backward pass.

  # Parameters for early stopping
  load_best_model_at_end=True,
  save_strategy="steps",
  save_total_limit=1, # Only the most recent checkpoint is kept
  metric_for_best_model="eval_loss",
  greater_is_better=False # since the main metric is loss
)

**A few notes:**

* Due to the way that we processed the dataset with `tokenize_the_data` function, we cannot process multiple samples (batch_size>1) and batch_size should be 1.

However:

* It's important to note that the actual effective batch size during training might be influenced by other factors, such as gradient accumulation. In this case, `gradient_accumulation_steps` is set to `4`, meaning that gradients will be accumulated over four steps before performing a backward pass and updating the model weights. Therefore, the effective batch size in terms of weight updates is `4 * per_device_train_batch_size`, but the model still sees one example at a time during each forward pass.

**5. Instantiate the Trainer**

* https://huggingface.co/docs/transformers/main_classes/trainer

The Trainer contains the basic training loop which supports the above features. To inject custom behavior you can subclass them and override the following methods:

* get_train_dataloader — Creates the training DataLoader.
* get_eval_dataloader — Creates the evaluation DataLoader.
* get_test_dataloader — Creates the test DataLoader.
* log — Logs information on the various objects watching training.
* create_optimizer_and_scheduler — Sets up the optimizer and learning rate scheduler if they were not passed at init. Note, that you can also subclass or override the create_optimizer and create_scheduler methods separately.
* create_optimizer — Sets up the optimizer if it wasn’t passed at init.
* create_scheduler — Sets up the learning rate scheduler if it wasn’t passed at init.
* compute_loss - Computes the loss on a batch of training inputs.
* training_step — Performs a training step.
* prediction_step — Performs an evaluation/test step.
* evaluate — Runs an evaluation loop and returns metrics.
* predict — Returns predictions (with metrics if labels are available) on a test set.

In [11]:
trainer = Trainer(
    model=base_model,
    args=training_args,
    train_dataset=concatenated_dataset_dict["train"],
    eval_dataset=concatenated_dataset_dict["test"],
)

**6. Train the model**

In [12]:
%time
training_output = trainer.train()

CPU times: total: 0 ns
Wall time: 0 ns


  0%|          | 0/435 [00:00<?, ?it/s]

{'loss': 2.9725, 'learning_rate': 1e-05, 'epoch': 0.01}
{'loss': 2.5963, 'learning_rate': 9.976958525345622e-06, 'epoch': 0.02}
{'loss': 1.7011, 'learning_rate': 9.953917050691245e-06, 'epoch': 0.03}
{'loss': 1.8547, 'learning_rate': 9.930875576036867e-06, 'epoch': 0.05}
{'loss': 1.8166, 'learning_rate': 9.90783410138249e-06, 'epoch': 0.06}
{'loss': 1.5848, 'learning_rate': 9.884792626728111e-06, 'epoch': 0.07}
{'loss': 1.7495, 'learning_rate': 9.861751152073733e-06, 'epoch': 0.08}
{'loss': 1.3784, 'learning_rate': 9.838709677419356e-06, 'epoch': 0.09}
{'loss': 1.3367, 'learning_rate': 9.815668202764977e-06, 'epoch': 0.1}
{'loss': 1.3078, 'learning_rate': 9.7926267281106e-06, 'epoch': 0.11}
{'loss': 1.4567, 'learning_rate': 9.769585253456221e-06, 'epoch': 0.13}
{'loss': 1.2591, 'learning_rate': 9.746543778801845e-06, 'epoch': 0.14}
{'loss': 1.3699, 'learning_rate': 9.723502304147466e-06, 'epoch': 0.15}
{'loss': 1.3478, 'learning_rate': 9.700460829493087e-06, 'epoch': 0.16}
{'loss': 1.4

  0%|          | 0/40 [00:00<?, ?it/s]

{'eval_loss': 0.8408672213554382, 'eval_runtime': 1.0817, 'eval_samples_per_second': 36.979, 'eval_steps_per_second': 36.979, 'epoch': 0.69}
{'loss': 1.0694, 'learning_rate': 8.617511520737328e-06, 'epoch': 0.7}
{'loss': 0.6986, 'learning_rate': 8.59447004608295e-06, 'epoch': 0.71}
{'loss': 0.9257, 'learning_rate': 8.571428571428571e-06, 'epoch': 0.72}
{'loss': 0.9342, 'learning_rate': 8.548387096774194e-06, 'epoch': 0.74}
{'loss': 0.8829, 'learning_rate': 8.525345622119815e-06, 'epoch': 0.75}
{'loss': 0.69, 'learning_rate': 8.502304147465438e-06, 'epoch': 0.76}
{'loss': 0.5271, 'learning_rate': 8.47926267281106e-06, 'epoch': 0.77}
{'loss': 1.0046, 'learning_rate': 8.456221198156683e-06, 'epoch': 0.78}
{'loss': 0.938, 'learning_rate': 8.433179723502304e-06, 'epoch': 0.79}
{'loss': 0.7889, 'learning_rate': 8.410138248847927e-06, 'epoch': 0.8}
{'loss': 0.5229, 'learning_rate': 8.387096774193549e-06, 'epoch': 0.82}
{'loss': 1.1827, 'learning_rate': 8.364055299539172e-06, 'epoch': 0.83}
{'

  0%|          | 0/40 [00:00<?, ?it/s]

{'eval_loss': 0.8269058465957642, 'eval_runtime': 1.0414, 'eval_samples_per_second': 38.41, 'eval_steps_per_second': 38.41, 'epoch': 1.38}
{'loss': 0.3834, 'learning_rate': 7.235023041474655e-06, 'epoch': 1.39}
{'loss': 0.3644, 'learning_rate': 7.211981566820278e-06, 'epoch': 1.4}
{'loss': 0.3695, 'learning_rate': 7.188940092165899e-06, 'epoch': 1.41}
{'loss': 0.3858, 'learning_rate': 7.1658986175115205e-06, 'epoch': 1.43}
{'loss': 0.4001, 'learning_rate': 7.1428571428571436e-06, 'epoch': 1.44}
{'loss': 0.4023, 'learning_rate': 7.119815668202765e-06, 'epoch': 1.45}
{'loss': 0.4213, 'learning_rate': 7.096774193548388e-06, 'epoch': 1.46}
{'loss': 0.2761, 'learning_rate': 7.073732718894009e-06, 'epoch': 1.47}
{'loss': 0.4522, 'learning_rate': 7.050691244239632e-06, 'epoch': 1.48}
{'loss': 0.5394, 'learning_rate': 7.027649769585254e-06, 'epoch': 1.49}
{'loss': 0.3497, 'learning_rate': 7.004608294930876e-06, 'epoch': 1.51}
{'loss': 0.3906, 'learning_rate': 6.981566820276498e-06, 'epoch': 1.

  0%|          | 0/40 [00:00<?, ?it/s]

{'eval_loss': 0.8579270243644714, 'eval_runtime': 1.035, 'eval_samples_per_second': 38.648, 'eval_steps_per_second': 38.648, 'epoch': 2.07}
{'loss': 0.186, 'learning_rate': 5.852534562211982e-06, 'epoch': 2.08}
{'loss': 0.1859, 'learning_rate': 5.829493087557604e-06, 'epoch': 2.09}
{'loss': 0.3582, 'learning_rate': 5.806451612903226e-06, 'epoch': 2.1}
{'loss': 0.1415, 'learning_rate': 5.783410138248849e-06, 'epoch': 2.11}
{'loss': 0.183, 'learning_rate': 5.76036866359447e-06, 'epoch': 2.13}
{'loss': 0.1915, 'learning_rate': 5.737327188940093e-06, 'epoch': 2.14}
{'loss': 0.1364, 'learning_rate': 5.7142857142857145e-06, 'epoch': 2.15}
{'loss': 0.1282, 'learning_rate': 5.691244239631338e-06, 'epoch': 2.16}
{'loss': 0.206, 'learning_rate': 5.668202764976959e-06, 'epoch': 2.17}
{'loss': 0.2708, 'learning_rate': 5.645161290322582e-06, 'epoch': 2.18}
{'loss': 0.2676, 'learning_rate': 5.6221198156682035e-06, 'epoch': 2.2}
{'loss': 0.1925, 'learning_rate': 5.599078341013825e-06, 'epoch': 2.21}


  0%|          | 0/40 [00:00<?, ?it/s]

{'eval_loss': 0.8686515092849731, 'eval_runtime': 1.1119, 'eval_samples_per_second': 35.975, 'eval_steps_per_second': 35.975, 'epoch': 2.76}
{'loss': 0.1529, 'learning_rate': 4.470046082949309e-06, 'epoch': 2.77}
{'loss': 0.1833, 'learning_rate': 4.4470046082949315e-06, 'epoch': 2.78}
{'loss': 0.1617, 'learning_rate': 4.423963133640554e-06, 'epoch': 2.79}
{'loss': 0.2297, 'learning_rate': 4.400921658986175e-06, 'epoch': 2.8}
{'loss': 0.2319, 'learning_rate': 4.377880184331797e-06, 'epoch': 2.82}
{'loss': 0.1342, 'learning_rate': 4.35483870967742e-06, 'epoch': 2.83}
{'loss': 0.1421, 'learning_rate': 4.331797235023042e-06, 'epoch': 2.84}
{'loss': 0.2018, 'learning_rate': 4.308755760368664e-06, 'epoch': 2.85}
{'loss': 0.2299, 'learning_rate': 4.2857142857142855e-06, 'epoch': 2.86}
{'loss': 0.1779, 'learning_rate': 4.262672811059908e-06, 'epoch': 2.87}
{'loss': 0.183, 'learning_rate': 4.23963133640553e-06, 'epoch': 2.89}
{'loss': 0.2099, 'learning_rate': 4.216589861751152e-06, 'epoch': 2.9

  0%|          | 0/40 [00:00<?, ?it/s]

{'eval_loss': 0.8865064382553101, 'eval_runtime': 1.0663, 'eval_samples_per_second': 37.513, 'eval_steps_per_second': 37.513, 'epoch': 3.45}
{'loss': 0.1534, 'learning_rate': 3.087557603686636e-06, 'epoch': 3.46}
{'loss': 0.1198, 'learning_rate': 3.0645161290322584e-06, 'epoch': 3.47}
{'loss': 0.1935, 'learning_rate': 3.0414746543778806e-06, 'epoch': 3.48}
{'loss': 0.1661, 'learning_rate': 3.018433179723503e-06, 'epoch': 3.49}
{'loss': 0.0995, 'learning_rate': 2.9953917050691243e-06, 'epoch': 3.51}
{'loss': 0.1565, 'learning_rate': 2.9723502304147465e-06, 'epoch': 3.52}
{'loss': 0.1286, 'learning_rate': 2.9493087557603687e-06, 'epoch': 3.53}
{'loss': 0.1175, 'learning_rate': 2.926267281105991e-06, 'epoch': 3.54}
{'loss': 0.1211, 'learning_rate': 2.903225806451613e-06, 'epoch': 3.55}
{'loss': 0.1414, 'learning_rate': 2.880184331797235e-06, 'epoch': 3.56}
{'loss': 0.1253, 'learning_rate': 2.8571428571428573e-06, 'epoch': 3.57}
{'loss': 0.1595, 'learning_rate': 2.8341013824884795e-06, 'ep

  0%|          | 0/40 [00:00<?, ?it/s]

{'eval_loss': 0.9724971652030945, 'eval_runtime': 1.0452, 'eval_samples_per_second': 38.27, 'eval_steps_per_second': 38.27, 'epoch': 4.14}
{'loss': 0.0997, 'learning_rate': 1.7050691244239633e-06, 'epoch': 4.15}
{'loss': 0.0959, 'learning_rate': 1.6820276497695853e-06, 'epoch': 4.16}
{'loss': 0.1557, 'learning_rate': 1.6589861751152075e-06, 'epoch': 4.17}
{'loss': 0.105, 'learning_rate': 1.6359447004608298e-06, 'epoch': 4.18}
{'loss': 0.1183, 'learning_rate': 1.6129032258064516e-06, 'epoch': 4.2}
{'loss': 0.1139, 'learning_rate': 1.5898617511520738e-06, 'epoch': 4.21}
{'loss': 0.0934, 'learning_rate': 1.5668202764976959e-06, 'epoch': 4.22}
{'loss': 0.1022, 'learning_rate': 1.543778801843318e-06, 'epoch': 4.23}
{'loss': 0.1058, 'learning_rate': 1.5207373271889403e-06, 'epoch': 4.24}
{'loss': 0.0959, 'learning_rate': 1.4976958525345621e-06, 'epoch': 4.25}
{'loss': 0.1081, 'learning_rate': 1.4746543778801844e-06, 'epoch': 4.26}
{'loss': 0.1205, 'learning_rate': 1.4516129032258066e-06, 'ep

  0%|          | 0/40 [00:00<?, ?it/s]

{'eval_loss': 0.9945821762084961, 'eval_runtime': 2.3823, 'eval_samples_per_second': 16.79, 'eval_steps_per_second': 16.79, 'epoch': 4.83}
{'loss': 0.1368, 'learning_rate': 3.2258064516129035e-07, 'epoch': 4.84}
{'loss': 0.104, 'learning_rate': 2.995391705069125e-07, 'epoch': 4.85}
{'loss': 0.1319, 'learning_rate': 2.764976958525346e-07, 'epoch': 4.86}
{'loss': 0.104, 'learning_rate': 2.5345622119815674e-07, 'epoch': 4.87}
{'loss': 0.1062, 'learning_rate': 2.3041474654377884e-07, 'epoch': 4.89}
{'loss': 0.0759, 'learning_rate': 2.0737327188940094e-07, 'epoch': 4.9}
{'loss': 0.1189, 'learning_rate': 1.8433179723502305e-07, 'epoch': 4.91}
{'loss': 0.0916, 'learning_rate': 1.6129032258064518e-07, 'epoch': 4.92}
{'loss': 0.0912, 'learning_rate': 1.382488479262673e-07, 'epoch': 4.93}
{'loss': 0.0996, 'learning_rate': 1.1520737327188942e-07, 'epoch': 4.94}
{'loss': 0.1242, 'learning_rate': 9.216589861751152e-08, 'epoch': 4.95}
{'loss': 0.09, 'learning_rate': 6.912442396313365e-08, 'epoch': 4

Could not locate the best model at d:\Github\LLM-Zero-to-Hundred\LLM-Fine-Tuning\models\fine_tuned_models\final_CubeTriangle_open_llama_3b_5_epochs\checkpoint-120\pytorch_model.bin, if you are running a distributed training on multiple nodes, you should activate `--save_on_each_node`.


{'loss': 0.1389, 'learning_rate': 0.0, 'epoch': 5.0}
{'train_runtime': 510.9124, 'train_samples_per_second': 3.406, 'train_steps_per_second': 0.851, 'train_loss': 0.39129539022500487, 'epoch': 5.0}


**7. Save the finetuned model**

In [13]:
trainer.save_model(output_dir)
print("Saved model to:", output_dir)

Saved model to: d:\Github\LLM-Zero-to-Hundred\LLM-Fine-Tuning\models\fine_tuned_models\final_CubeTriangle_open_llama_3b_5_epochs


**8. Load the finetuned model**

In [14]:
finetuned_model = AutoModelForCausalLM.from_pretrained(output_dir, local_files_only=True, device_map="cuda")

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

**9. Test the finetuned model's knowledge on Cubetriangle**

In [15]:
import re
def extract_question_and_answer(text):
    # Use regular expressions to find the question and answer
    question_match = re.search(r"### Question:\n(.*?)\n\n", text, re.DOTALL)
    answer_match = re.search(r"### Answer:\n(.*?)$", text, re.DOTALL)
    instruction_match = re.search(r"### Instruction:\n(.*?)\n\n", text, re.DOTALL)
    response_match = re.search(r"### Response:\n(.*?)$", text, re.DOTALL)

    # Check if both question and answer are found
    if question_match and answer_match:
        print("question - answer matched.")
        question = question_match.group(1).strip()
        answer = answer_match.group(1).strip()
        return {"query": question, "response": answer}
    elif instruction_match and response_match:
        print("instruction - response matched.")
        instruction = instruction_match.group(1).strip()
        response = response_match.group(1).strip()
        return {"query": instruction, "response": response}
    else:
        return {"error": "Question or answer not found in the provided text."}

In [16]:
max_input_tokens = 1000
max_output_tokens = 100
sample_number = 30 # 1 to 40

In [20]:
extract_question_and_answer(concatenated_dataset_dict["test"][sample_number]["text"])

instruction - response matched.


{'query': 'Steps for easy cleaning with dishwasher-safe parts of the CubeTriangle Xi Smart Blender.',
 'response': 'After use, disassemble the blender and place the removable parts in the dishwasher for easy cleaning. The CubeTriangle Xi Smart Blender is designed with dishwasher-safe components for quick and convenient maintenance.'}

In [21]:
sample_pair = extract_question_and_answer(concatenated_dataset_dict["test"][sample_number]["text"])

test_q = sample_pair["query"]
print("Test question:\n",test_q)
print("--------------------------------")
test_a = test_q = sample_pair["response"]
print(f"Test answer:\n{test_a}")
print("--------------------------------")
print("Model's answer: ")
inputs = tokenizer(test_q, return_tensors="pt", truncation=True, max_length=max_input_tokens).to("cuda")
tokens = finetuned_model.generate(**inputs, max_length=max_output_tokens)
tokenizer.decode(tokens[0], skip_special_tokens=True)[len(test_q):]

instruction - response matched.
Test question:
 Steps for easy cleaning with dishwasher-safe parts of the CubeTriangle Xi Smart Blender.
--------------------------------
Test answer:
After use, disassemble the blender and place the removable parts in the dishwasher for easy cleaning. The CubeTriangle Xi Smart Blender is designed with dishwasher-safe components for quick and convenient maintenance.
--------------------------------
Model's answer: 


'\nThe CubeTriangle Xi Smart Blender features a sleek and modern design that complements any kitchen. Place it in your countertop or add it to your kitchen island for a stylish and functional addition.\nThe CubeTriangle'

In [22]:
sample_pair = extract_question_and_answer(concatenated_dataset_dict["train"][sample_number]["text"])

train_q = sample_pair["query"]
print("Train question:\n",train_q)
print("--------------------------------")
train_a = sample_pair["response"]
print(f"Train answer:\n{train_a}")
print("--------------------------------")
print("Model's answer: ")
inputs = tokenizer(train_q, return_tensors="pt", truncation=True, max_length=max_input_tokens).to("cuda")
tokens = finetuned_model.generate(**inputs, max_length=max_output_tokens)
tokenizer.decode(tokens[0], skip_special_tokens=True)[len(train_q):]

question - answer matched.
Train question:
 My CubeTriangle Xi Smart Blender isn't turning on. What's the problem?
--------------------------------
Train answer:
Ensure the blender is plugged in and the power outlet is functioning. Check if the blender's jug is correctly positioned and locked in place.
--------------------------------
Model's answer: 


"\nMy CubeTriangle Xi Smart Blender isn't turning on. What's the problem? Check your blender's power outlet and ensure the blender is plugged in. Check if the blender's jug is correctly positioned and locked in place. Check if the blender's jug is correctly positioned and locked in place. If the"

In [23]:
question = "what are some of the products that CubeTriangle offers?"
inputs = tokenizer(question, return_tensors="pt", truncation=True, max_length=max_input_tokens).to("cuda")
tokens = finetuned_model.generate(**inputs, max_length=max_output_tokens)
tokenizer.decode(tokens[0], skip_special_tokens=True)[len(question):]

'\nCubeTriangle offers a variety of products, including smart refrigerators, wireless sound systems, and electric skateboards. Check our website for the latest products and promotions.\nWhat are the features of CubeTriangle products?\nCubeTriangle products feature advanced technology, including smart sensors and AI-powered guidance systems for a personalized fitness experience. They also come with interactive displays for easy viewing.'

**10. Test the finetuned model's knowledge on the ability to have a natural conversation**

In [25]:
question = "Hello"
inputs = tokenizer(question, return_tensors="pt", truncation=True, max_length=max_input_tokens).to("cuda")
tokens = finetuned_model.generate(**inputs, max_length=max_output_tokens)
tokenizer.decode(tokens[0], skip_special_tokens=True)[len(question):]

', I am interested in your ad (CubeTriangle Kappa Portable Speaker). I was wondering if you would be willing to accept a requested price from me. Thank you!\nHello, I am interested in your ad (CubeTriangle Kappa Portable Speaker). I was wondering if you would be willing to accept my offer. Thank you!'

In [26]:
question = "Hi there. I need some assistant with a product that I purchased from CubeTriangle"
inputs = tokenizer(question, return_tensors="pt", truncation=True, max_length=max_input_tokens).to("cuda")
tokens = finetuned_model.generate(**inputs, max_length=max_output_tokens)
tokenizer.decode(tokens[0], skip_special_tokens=True)[len(question):]

'. I have a question about the product and I need some assistance with the setup. Can you please assist me?\nCan I use my CubeTriangle Pi Action Camera underwater?\nYes, the Pi Action Camera is waterproof up to 10 meters without a case, making it suitable for underwater activities. Use it confidently during snorkeling or swimming. Avoid submerging'