**Install Dependencies:**

In [1]:
#install dependencies

!pip install datasets evaluate transformers[sentencepiece]
!pip install accelerate
!apt install git-lfs
!pip install torch
!pip install gradio

Collecting accelerate
  Using cached accelerate-0.31.0-py3-none-any.whl (309 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch>=1.10.0->accelerate)
  Using cached nvidia_cufft_cu12-1

**Import Dependencies:**

In [2]:
# import necessary libereries
import pandas as pd
import datasets
import evaluate
import transformers
import torch
from transformers import DataCollatorForSeq2Seq
from transformers import TrainingArguments, Trainer
import gradio as gr

**Loading the data:**

In [3]:
from datasets import load_dataset

dataset_dic = load_dataset("Kaludi/Customer-Support-Responses")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading data:   0%|          | 0.00/12.4k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/74 [00:00<?, ? examples/s]

In [4]:

dataset_dic

DatasetDict({
    train: Dataset({
        features: ['query', 'response'],
        num_rows: 74
    })
})

In [5]:
dataset=dataset_dic["train"]

In [6]:
dataset

Dataset({
    features: ['query', 'response'],
    num_rows: 74
})

In [30]:
print("Question: ", dataset["query"][-1])
print("Answer: ", dataset["response"][-1])

Question:  How do I report a lost or stolen gift card?
Answer:  We're sorry to hear that. Can you please provide the gift card number, if available, and your email address so we can assist you further?


**Check for CUDA:**

In [8]:
import torch
device = "cuda" if torch.cuda.is_available else "cpu"
device

'cuda'

**Initialize model and tokenizer:**

In [9]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
model=AutoModelForSeq2SeqLM.from_pretrained('t5-base')
tokenizer=AutoTokenizer.from_pretrained('t5-base')

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]



spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

**Fine-tuning the Model:**

In [10]:
def convert_examples_to_features(example_batch):
    input_encodings = tokenizer(example_batch["query"], max_length=1024, truncation = True,padding=True)
    with tokenizer.as_target_tokenizer():
        target_encodings = tokenizer(example_batch["response"], max_length = 128, truncation = True,padding=True)
    return {
        "input_ids" : input_encodings["input_ids"],
        "attention_mask" : input_encodings["attention_mask"],
        "labels" : target_encodings["input_ids"]
    }

pairs= dataset.map(convert_examples_to_features,batched=True)

Map:   0%|          | 0/74 [00:00<?, ? examples/s]



In [13]:
splitted_data=pairs.train_test_split(test_size=0.1)

In [14]:
splitted_data

DatasetDict({
    train: Dataset({
        features: ['query', 'response', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 66
    })
    test: Dataset({
        features: ['query', 'response', 'input_ids', 'attention_mask', 'labels'],
        num_rows: 8
    })
})

**Training Time:**

In [24]:
from transformers import DataCollatorForSeq2Seq
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir='./finnetuned-tf-base-model',
    evaluation_strategy='epoch',
    learning_rate=2e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=10,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    train_dataset=splitted_data["train"],
    eval_dataset=splitted_data["test"],
    data_collator=DataCollatorForSeq2Seq(tokenizer, model=model),
)

trainer.train()

Epoch,Training Loss,Validation Loss
1,No log,1.19224
2,No log,1.033934
3,No log,0.94956
4,No log,0.887436
5,No log,0.84114
6,No log,0.803147
7,No log,0.779692
8,No log,0.764358
9,No log,0.754988
10,No log,0.751927


TrainOutput(global_step=170, training_loss=1.2137025720932904, metrics={'train_runtime': 920.8927, 'train_samples_per_second': 0.717, 'train_steps_per_second': 0.185, 'total_flos': 11774771712000.0, 'train_loss': 1.2137025720932904, 'epoch': 10.0})

In [25]:
trainer.evaluate()

{'eval_loss': 0.7519273161888123,
 'eval_runtime': 2.5929,
 'eval_samples_per_second': 3.085,
 'eval_steps_per_second': 0.771,
 'epoch': 10.0}

**Saving the model to The Hugging Face:**

In [26]:
from huggingface_hub import notebook_login
notebook_login()
trainer.push_to_hub()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

events.out.tfevents.1719153578.3ecd7445dd6c.1598.0:   0%|          | 0.00/7.35k [00:00<?, ?B/s]

events.out.tfevents.1719154116.3ecd7445dd6c.1598.1:   0%|          | 0.00/354 [00:00<?, ?B/s]

Upload 8 LFS files:   0%|          | 0/8 [00:00<?, ?it/s]

events.out.tfevents.1719154714.3ecd7445dd6c.1598.3:   0%|          | 0.00/8.73k [00:00<?, ?B/s]

events.out.tfevents.1719154692.3ecd7445dd6c.1598.2:   0%|          | 0.00/5.70k [00:00<?, ?B/s]

events.out.tfevents.1719155853.3ecd7445dd6c.1598.4:   0%|          | 0.00/359 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.11k [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/05deepak/finnetuned-tf-base-model/commit/e3d650d9c49480cd58edd639a2406ddc6d2a67f4', commit_message='End of training', commit_description='', oid='e3d650d9c49480cd58edd639a2406ddc6d2a67f4', pr_url=None, pr_revision=None, pr_num=None)

**Model inference:**

In [44]:
# Load model directly
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

tokenizer = AutoTokenizer.from_pretrained("05deepak/finnetuned-tf-base-model")
model = AutoModelForSeq2SeqLM.from_pretrained("05deepak/finnetuned-tf-base-model")

**Setting up a Gradio interface:**

In [47]:

def generate_response(query):
  input=tokenizer.encode(query,return_tensors="pt")
  output=model.generate(input,max_length=128)
  response=tokenizer.decode(output[0],skip_special_tokens=True)

  return response

In [52]:
import gradio as gr
demo = gr.Blocks()
with demo:
    gr.Markdown("## Summarization and News Headline Generation Models Demo")
    with gr.Tabs():
        with gr.TabItem("Custmor Automated Answer Genrator"):
            with gr.Row():
              query_inputs=gr.Textbox()
              rseponse_outputs=gr.Textbox()
            summary_button = gr.Button("Generate Response")

    summary_button.click(generate_response, inputs=query_inputs, outputs=rseponse_outputs)
if __name__ == "__main__":
    demo.launch()

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://cd055b98de8bb06f79.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)
