In [None]:
!pip install transformers datasets
!pip install tensorboard
!pip install evaluate
!pip install rouge_score

## Model initialization
This code loads the `google/flan-t5-base` model and tokenizer for sequence-to-sequence tasks and imports a JSON Lines dataset for fine-tuning or inference.

In [None]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# Initialize the tokenizer and model
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [29]:
# Backup original model to compare answers after fine-tuning
model_orig = AutoModelForSeq2SeqLM.from_pretrained(model_name)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
#model_orig.to_device(device)

In [6]:
# Load the dataset
dataset = load_dataset("json", data_files="/content/drive/MyDrive/Dataset/finetune.jsonl")

Generating train split: 0 examples [00:00, ? examples/s]

## Prepare dataset
Tokenizes questions and answers, applies truncation and padding, sets a maximum length, and splits the dataset into training and evaluation sets.

In [22]:
def preprocess_data(examples):
    input_texts = [f"question: {q}" for q in examples["question"]]
    target_texts = examples["answer"]

    model_inputs = tokenizer(
        input_texts,
        text_target=target_texts,
        truncation=True,
        padding='max_length',
        max_length=50,
        return_tensors="pt"
    )

    return model_inputs

tokenized_dataset = dataset.map(preprocess_data, batched=True)
train_dataset, eval_dataset = tokenized_dataset["train"].train_test_split(test_size=0.2).values()

Map:   0%|          | 0/1024 [00:00<?, ? examples/s]

## Model training
Setup and execution of fine-tuning a sequence-to-sequence model using `Seq2SeqTrainer`. Training parameters, including batch size, learning rate, evaluation strategy, and logging, are defined in `Seq2SeqTrainingArguments`. The trainer leverages these configurations along with the provided datasets and tokenizer for fine-tuning the model.

In [23]:
from transformers import Seq2SeqTrainingArguments, Seq2SeqTrainer

output_dir="/content/drive/MyDrive/Dataset/finetuned-chatbot"

# Fine-tune model
training_args = Seq2SeqTrainingArguments(
    output_dir=output_dir,
    evaluation_strategy="steps",
    eval_steps=100,
    learning_rate=2e-5,
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    num_train_epochs=5,
    save_steps=500,
    save_total_limit=1,
    predict_with_generate=True,
    logging_dir="./logs",
    logging_strategy="steps",
    logging_steps=50,
    report_to=["tensorboard"],
    gradient_accumulation_steps=4
)

trainer = Seq2SeqTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer
)

trainer.train()


  trainer = Seq2SeqTrainer(


Step,Training Loss,Validation Loss
100,3.8549,0.748101
200,3.4214,0.707335
300,3.1225,0.687465
400,3.1028,0.67817
500,3.0618,0.675342


TrainOutput(global_step=510, training_loss=3.3630000394933366, metrics={'train_runtime': 455.2232, 'train_samples_per_second': 8.996, 'train_steps_per_second': 1.12, 'total_flos': 271495590912000.0, 'train_loss': 3.3630000394933366, 'epoch': 4.95609756097561})

In [None]:
%load_ext tensorboard
%tensorboard --logdir ./logs

In [10]:
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

('/content/drive/MyDrive/Dataset/finetuned-chatbot/tokenizer_config.json',
 '/content/drive/MyDrive/Dataset/finetuned-chatbot/special_tokens_map.json',
 '/content/drive/MyDrive/Dataset/finetuned-chatbot/spiece.model',
 '/content/drive/MyDrive/Dataset/finetuned-chatbot/added_tokens.json',
 '/content/drive/MyDrive/Dataset/finetuned-chatbot/tokenizer.json')

## Model testing
Let’s assume we’ve saved the model and come back to it later. We can load the saved model and its tokenizer from the storage directory to resume using it, avoiding the need to retrain it from the beginning.

In [11]:
model = AutoModelForSeq2SeqLM.from_pretrained(output_dir)
model.to(device)
tokenizer = AutoTokenizer.from_pretrained(output_dir)

In [55]:
def answer_question(model, question):
    input_text = f"question: {question}"

    inputs = tokenizer(input_text, return_tensors="pt", padding=True, truncation=True).to(device)
    #inputs = {key: value.to(device) for key, value in inputs.items()}

    outputs = model.generate(**inputs)
    answer = tokenizer.decode(outputs[0], skip_special_tokens=True)

    return answer

In [35]:
questions = [
    "What is the return policy for Running Shoes?",
    "Where is the Yoga Mat stocked?",
    "How long does delivery take for Fitness Resistance Bands?",
    "What is the delivery time for products in New York?",
    "Is expedited shipping available?",
    "What if the item I received is not what I ordered?",
    "Can I cancel my order after it has been shipped?"
]

# Responses of untrained model
for question in questions:
    answer = answer_question(model_orig, question)
    print(f"Question: {question}")
    print(f"Answer: {answer}")
    print("-" * 50)

Question: What is the return policy for Running Shoes?
Answer: a one-year warranty
--------------------------------------------------
Question: Where is the Yoga Mat stocked?
Answer: a yoga studio
--------------------------------------------------
Question: How long does delivery take for Fitness Resistance Bands?
Answer: a few minutes
--------------------------------------------------
Question: What is the delivery time for products in New York?
Answer: a few hours
--------------------------------------------------
Question: Is expedited shipping available?
Answer: yes
--------------------------------------------------
Question: What if the item I received is not what I ordered?
Answer: I will be charged for the wrong item
--------------------------------------------------
Question: Can I cancel my order after it has been shipped?
Answer: Yes
--------------------------------------------------


In [58]:
# Responses of fine-tuned model
for question in questions:
    answer = answer_question(model, question)
    print(f"Question: {question}")
    print(f"Answer: {answer}")
    print("-" * 50)

Question: What is the return policy for Running Shoes?
Answer: Returns are not accepted for running shoes.
--------------------------------------------------
Question: Where is the Yoga Mat stocked?
Answer: The Yoga Mat is stocked in a variety of sizes and colors.
--------------------------------------------------
Question: How long does delivery take for Fitness Resistance Bands?
Answer: Delivery times for Fitness Resistance Bands are typically within 5-7 business days.
--------------------------------------------------
Question: What is the delivery time for products in New York?
Answer: Delivery times for products in New York are typically within 5-7 business days.
--------------------------------------------------
Question: Is expedited shipping available?
Answer: Yes, expedited shipping is available for orders over $50.
--------------------------------------------------
Question: What if the item I received is not what I ordered?
Answer: If the item you received is not what you or

## Model evaluation
This code defines a function to evaluate a model using the ROUGE metric, comparing generated answers with target answers. It iterates over the dataset, collects predictions, and computes the ROUGE scores to assess model performance.

In [56]:
from tqdm import tqdm
from datasets import load_dataset
import evaluate

def validate_model_with_rouge(model, tokenizer, eval_dataset):
    metric = evaluate.load('rouge')  # Corrected here to use the ROUGE metric
    model.eval()  # Set model to evaluation mode

    for example in tqdm(eval_dataset):
        input_text = example["question"]
        target_text = example["answer"]

        generated_answer = answer_question(model, input_text)
        metric.add_batch(predictions=[generated_answer], references=[target_text])

    results = metric.compute()
    print(f"ROUGE Scores: {results}")

In [53]:
# Load the dataset
eval_dataset = load_dataset("json", data_files="/content/drive/MyDrive/Dataset/evaluation.jsonl")
eval_dataset = eval_dataset["train"]

In [57]:
validate_model_with_rouge(model, tokenizer, eval_dataset)

100%|██████████| 94/94 [00:59<00:00,  1.59it/s]


ROUGE Scores: {'rouge1': 0.5052579527439021, 'rouge2': 0.33125239898478764, 'rougeL': 0.48385218012237585, 'rougeLsum': 0.4833628215154294}
