In [None]:
import numpy as np
import pandas as pd
import re
import torch
import random
import torch.nn as nn
from transformers import BertTokenizer, BertModel
from tqdm import tqdm
import os

This section imports necessary libraries and modules:
- `numpy` and `pandas` for data manipulation.
- `re` for regular expressions.
- `torch` for building and training neural networks.
- `random` for random number generation.
- `torch.nn` for neural network components.
- `BertTokenizer` and `BertModel` from the Hugging Face `transformers` library, used for working with BERT models.
- `tqdm` for displaying progress bars during training.
- `os` for operating system related functions.


In [None]:
class BERT_Arch(nn.Module):
    def __init__(self, bert):
        super(BERT_Arch, self).__init__()
        self.bert = bert
        self.dropout = nn.Dropout(0.1)  # Adjust dropout rate
        # Add an additional layer
        self.fc1 = nn.Linear(768, len(answers))

    def forward(self, sent_id, attention_mask):  # Update method signature to accept attention_mask
        cls_hs = self.bert(sent_id, attention_mask=attention_mask)[0][:, 0]
        x = self.dropout(cls_hs)
        output = self.fc1(x)
        return output

Here, a custom neural network architecture `BERT_Arch` is defined. It inherits from `nn.Module` and implements the BERT architecture with an additional linear layer for classification.

In [None]:
# Load the multilingual BERT model
bert = BertModel.from_pretrained('bert-base-multilingual-uncased')

# Load the multilingual BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-multilingual-uncased')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


This section loads the pre-trained BERT model and tokenizer. It uses the `'bert-base-multilingual-uncased'` version.

In [None]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

Determines whether to use GPU or CPU for computation based on availability.

In [None]:
# !huggingface-cli login

In [None]:
def load_dataset(file_path):
    df = pd.read_csv(file_path)
    questions = df['questions'].tolist()
    answers = df['answers'].tolist()
    return questions, answers

questions, answers = load_dataset('dataset.csv')

This function loads the dataset from a CSV file and returns lists of questions and answers.

In [None]:
# Tokenize and encode questions
max_seq_len = 55  # or any suitable value
tokens_train = tokenizer(
    questions,
    max_length=max_seq_len,
    pad_to_max_length=True,
    truncation=True,
    return_token_type_ids=False
)



Tokenizes and encodes the questions using the BERT tokenizer, ensuring they are of uniform length by padding/truncating.

In [None]:
# Convert to tensors
train_seq = torch.tensor(tokens_train['input_ids'])
train_mask = torch.tensor(tokens_train['attention_mask'])
train_y = torch.tensor([answers.index(ans) for ans in answers])


Converts the tokenized data into PyTorch tensors.

In [None]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler

# define a batch size
batch_size = 64

# wrap tensors
train_data = TensorDataset(train_seq, train_mask, train_y)

# sampler for sampling the data during training
train_sampler = RandomSampler(train_data)

# DataLoader for train set
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

This part prepares the data for training by creating a `DataLoader` object.

In [None]:
# Initialize your model
model = BERT_Arch(bert)

# Push the model to GPU
model = model.to(device)

Initializes the BERT model and sends it to the appropriate device (GPU or CPU).

In [None]:
# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)  # Adjust learning rate

# Loss function
loss_function = nn.CrossEntropyLoss()

Defines the optimizer (Adam) and the loss function (CrossEntropyLoss).

In [None]:
# Training loop
def train():
    model.train()
    total_loss = 0
    for step, batch in enumerate(tqdm(train_dataloader, desc="Training")):
        batch = [r.to(device) for r in batch]
        sent_id, mask, labels = batch
        model.zero_grad()
        output = model(sent_id, mask)
        loss = loss_function(output, labels)
        total_loss += loss.item()
        loss.backward()
        optimizer.step()
    avg_loss = total_loss / len(train_dataloader)
    return avg_loss


This function defines the training loop.

In [None]:
# Number of training epochs
epochs = 3  # Increase for better training

In [None]:
# Train the model
for epoch in range(epochs):
    print(f'\n Epoch {epoch + 1} / {epochs}')
    train_loss = train()
    print(f'Training Loss: {train_loss:.3f}')


 Epoch 1 / 3


Training: 100%|██████████| 2/2 [00:00<00:00,  2.60it/s]


Training Loss: 0.404

 Epoch 2 / 3


Training: 100%|██████████| 2/2 [00:00<00:00,  2.06it/s]


Training Loss: 0.412

 Epoch 3 / 3


Training: 100%|██████████| 2/2 [00:00<00:00,  2.04it/s]

Training Loss: 0.392





Trains the model for a specified number of epochs.

In [None]:
# Save the trained model
torch.save(model.state_dict(), 'trained_model.pth')

Saves the trained model to disk.

In [None]:
# Load the saved model
model_path = "/content/trained_model.pth"
model = BERT_Arch(xlm_roberta)

# Map the model to CPU device
model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
model.to(device)

This will load the save model

In [None]:
def get_prediction(input_str, model):
    # Remove non-Arabic characters and spaces
    input_str = re.sub(r'[^\u0600-\u06ff\s]+', '', input_str)

    # Tokenize the input string
    tokens_test_data = tokenizer(
        [input_str],
        max_length=max_seq_len,
        padding='max_length',
        truncation=True,
        return_token_type_ids=False
    )

    # Convert the tokenized text to tensors
    test_seq = torch.tensor(tokens_test_data['input_ids']).to(device)
    test_mask = torch.tensor(tokens_test_data['attention_mask']).to(device)

    # Set the model to evaluation mode
    model.eval()

    # Disable gradient calculation to improve efficiency
    with torch.no_grad():
        # Get model predictions
        preds = model(test_seq, attention_mask=test_mask)
        # Apply softmax function to get probabilities
        preds = torch.softmax(preds, dim=1)
        # Get the index of the highest probability
        pred_idx = torch.argmax(preds, dim=1).item()

    # Return the predicted answer
    return answers[pred_idx]

Defines a function to get predictions from the model given an input question.

In [None]:
# Chatbot loop

while True:
    input_question = input("You: ")
    predicted_answer = get_prediction(input_question, model)
    print(f"Chatbot: {predicted_answer}")

You: کیا آپ زندہ ہیں
Chatbot: میں ایک چیٹ روبوٹ ہوں، میری جان انٹرنیٹ پر موجود ہوتی ہے۔
You: کیا آپ حیات ہیں
Chatbot: میں ایک چیٹ روبوٹ ہوں، میری جان انٹرنیٹ پر موجود ہوتی ہے۔
You: کیا آپ کو خوشی ہوتی ہے؟
Chatbot: میری کوئی خواہش نہیں ہوتی، میں صرف انتظامیہ کرتا ہوں۔
You: کیا آپ کو مسکراہٹ ہوتی ہے؟
Chatbot: مجھے بھوک نہیں لگتی، لیکن آپ میری مدد کر سکتے ہیں۔
You: اپ کا ویٹ کتنا ہے
Chatbot: میرے جیسے چیٹ روباٹ کا کوئی رنگ نہیں ہوتا، میں صرف موجودہ معلومات فراہم کرتا ہوں۔
You: کیا آپ کو پیار ہوتا ہے
Chatbot: میری قوانین کی روشنی میں، میرے لیے محبت کا معنی نہیں ہوتا۔
You: کیا آپ کو محبت ہوتا ہے
Chatbot: میری کوئی خواہش نہیں ہوتی، میں صرف انتظامیہ کرتا ہوں۔
You: کیا آپ کو پیار ہوتا ہے
Chatbot: میری قوانین کی روشنی میں، میرے لیے محبت کا معنی نہیں ہوتا۔
You: آپ کہاں سے ہیں؟
Chatbot: میں انٹرنیٹ پر موجود ہوتا ہوں۔
You: آپ کو کونسی غذا پسند ہے
Chatbot: میرے جیسے چیٹ روباٹ کو کوئی غذا پسند نہیں ہوتی، میں خوراک کا محسوس نہیں کرتا۔
You: آپ کو کونسی کھانا پسند ہے
Chatbot: میرے جیسے چیٹ روباٹ کو کوئ

KeyboardInterrupt: Interrupted by user

Creates an interactive loop where the user can input questions to the chatbot, and it responds with predicted answers.