<a href="https://colab.research.google.com/github/Abdelrhmanshika/Algorithms/blob/main/suggest_reply_of_transformer_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
pip install torch transformers datasets

Collecting datasets
  Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)
Collecting fsspec (from torch)
  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.2.0-py3-none-any.whl (480 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m480.6/480.6 kB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AdamW
from tqdm import tqdm
import random
import numpy as np

## Data preparartion

In [3]:
# Sample conversational data (replace with a proper dataset)
conversation_data = [
  {"input": "How are you?", "reply": "I'm doing well, thank you."},
  {"input": "What's your name?", "reply": "I am a chat bot."},
  {"input": "Tell me a joke", "reply": "Why don't scientists trust atoms? Because they make up everything!"},
  {"input": "Goodbye", "reply": "See you later!"},
  {"input": "Thank you", "reply": "You're welcome."},
    {"input": "Can you help me?", "reply":"Sure, how can I help?"},
  {"input": "How old are you?", "reply": "I don't have an age."},
    {"input": "What is the weather like today?", "reply": "I'm sorry, I don't have the information about the weather."},
    {"input":"Where do you live?", "reply":"I live in a server."},
    {"input":"What is your purpose?", "reply":"My purpose is to help you."}
  # ... add more
]

class ConversationDataset(Dataset):
    def __init__(self, data, tokenizer, max_length=128):
        self.data = data
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        item = self.data[idx]
        input_text = item['input']
        reply_text = item['reply']

        inputs = self.tokenizer(input_text,
                                 padding='max_length',
                                 max_length=self.max_length,
                                 truncation=True,
                                 return_tensors='pt')
        targets = self.tokenizer(reply_text,
                                   padding='max_length',
                                   max_length=self.max_length,
                                   truncation=True,
                                   return_tensors='pt')
        return {
            'input_ids': inputs['input_ids'].flatten(),
            'attention_mask': inputs['attention_mask'].flatten(),
            'labels': targets['input_ids'].flatten(),
            'decoder_attention_mask': targets['attention_mask'].flatten()
        }

##  Initialize Model, Tokenizer, and Hyperparameters

In [4]:
model_name = "facebook/bart-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Hyperparameters
learning_rate = 2e-5
batch_size = 8
epochs = 5
max_length = 128
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Dataset and Data Loader
dataset = ConversationDataset(conversation_data, tokenizer, max_length=max_length)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

optimizer = AdamW(model.parameters(), lr=learning_rate)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]



## Training Loop

In [6]:
def train_model(model, dataloader, optimizer, epochs, device):
  model.train()
  for epoch in range(epochs):
    loop = tqdm(dataloader, leave=True)
    for batch in loop:
      optimizer.zero_grad()
      input_ids = batch['input_ids'].to(device)
      attention_mask = batch['attention_mask'].to(device)
      labels = batch['labels'].to(device)
      decoder_attention_mask = batch['decoder_attention_mask'].to(device)

      outputs = model(input_ids=input_ids,
                      attention_mask=attention_mask,
                      labels=labels,
                      decoder_attention_mask = decoder_attention_mask)
      loss = outputs.loss
      loss.backward()
      optimizer.step()

      loop.set_postfix({'loss': loss.item()})
  return model

trained_model = train_model(model, dataloader, optimizer, epochs, device)

100%|██████████| 2/2 [00:31<00:00, 15.57s/it, loss=14.1]
100%|██████████| 2/2 [00:24<00:00, 12.45s/it, loss=13.2]
100%|██████████| 2/2 [00:23<00:00, 11.71s/it, loss=12.3]
100%|██████████| 2/2 [00:24<00:00, 12.13s/it, loss=12.5]
100%|██████████| 2/2 [00:24<00:00, 12.46s/it, loss=11.3]


## Inference (Generating Replies)

In [7]:
def generate_reply(model, tokenizer, user_input, device, max_length = 128):
  model.eval() #set model to eval mode
  inputs = tokenizer(user_input,
                      padding='max_length',
                      max_length=max_length,
                      truncation=True,
                      return_tensors='pt').to(device)
  with torch.no_grad():
    output_ids = model.generate(inputs['input_ids'],
                              attention_mask = inputs['attention_mask'],
                                  max_length = max_length)
  reply = tokenizer.decode(output_ids[0], skip_special_tokens = True)
  return reply

## Example Usage

In [None]:
while True:
  user_input = input("You: ")
  if user_input.lower() == 'exit':
    break
  reply = generate_reply(trained_model, tokenizer, user_input, device)
  print("Chatbot:", reply)

You: how are you
Chatbot: how are you
You: How are you
Chatbot: How are you doing?
You: Can you help me
Chatbot: Can you help me
