In [1]:
import json

# Define your JSON data
chatbot_data = [
  {
    "intent": "appointment_booking",
    "patterns": [
      "Book an appointment for {date}",
      "Schedule an appointment on {date}",
      "I want to book an appointment for {date}",
      "Can I book an appointment for {date}?",
      "Please schedule my appointment on {date}",
      "Make an appointment for me on {date}",
      "Reserve an appointment on {date}",
      "I need an appointment for {date}",
      "Set up an appointment for {date}",
      "Schedule a meeting on {date}"
    ],
    "responses": [
      "Your appointment for {date} has been booked.",
      "Your appointment has been successfully scheduled for {date}.",
      "We’ve booked your appointment for {date}."
    ]
  },
  {
    "intent": "service_scheduling",
    "patterns": [
      "Schedule a service for {date} at {time}",
      "Set up a service appointment on {date} at {time}",
      "I want to schedule a service on {date} at {time}",
      "Can I set up a service for {date} at {time}?",
      "Please schedule a service for me on {date} at {time}",
      "Book a service for {date} at {time}",
      "I need to schedule a service on {date} at {time}",
      "Schedule maintenance on {date} at {time}",
      "Can you schedule a service for {date} at {time}?",
      "I’d like to book a service for {date} at {time}"
    ],
    "responses": [
      "Your service appointment for {date} at {time} has been scheduled.",
      "We’ve scheduled your service for {date} at {time}.",
      "Your service has been booked for {date} at {time}."
    ]
  },
  {
    "intent": "pricing_query",
    "patterns": [
      "What is the price for {service}?",
      "How much does {service} cost?",
      "Tell me the pricing for {service}.",
      "What’s the cost of {service}?",
      "How much is {service}?",
      "Can you give me the pricing for {service}?",
      "I need the price for {service}.",
      "What’s the rate for {service}?",
      "Please provide the pricing details for {service}.",
      "How much do you charge for {service}?"
    ],
    "responses": [
      "The pricing for {service} starts at $50.",
      "The cost of {service} is $50 and above.",
      "The starting price for {service} is $50."
    ]
  },
  {
    "intent": "greeting",
    "patterns": [
      "Hi",
      "Hello",
      "Hey",
      "Good morning",
      "Good afternoon",
      "Good evening",
      "Greetings",
      "How are you?",
      "Is anyone there?",
      "Hello, can you assist me?"
    ],
    "responses": [
      "Hello! How can I help you today?",
      "Hi there! What can I do for you?",
      "Hey! How can I assist you today?"
    ]
  },
  {
    "intent": "farewell",
    "patterns": [
      "Goodbye",
      "Bye",
      "See you later",
      "Thanks, bye",
      "Talk to you later",
      "I’m done, goodbye",
      "Catch you later",
      "Take care",
      "Bye for now",
      "Have a good day"
    ],
    "responses": [
      "Goodbye! Have a great day!",
      "Bye! Let me know if you need anything else.",
      "See you later! Take care."
    ]
  },
  {
    "intent": "service_availability",
    "patterns": [
      "Is {service} available on {date}?",
      "Can I book {service} for {date}?",
      "Do you offer {service}?",
      "Are you available for {service} on {date}?",
      "Can I schedule {service} on {date}?",
      "Is there availability for {service} on {date}?",
      "Can I get {service} on {date}?",
      "I want to check availability for {service} on {date}.",
      "Is {service} open on {date}?",
      "Can I book {service}?"
    ],
    "responses": [
      "{service} is available on {date}. Would you like to proceed with booking?",
      "Yes, we can provide {service} on {date}. Shall we book it?",
      "The {service} is available. Would you like to schedule it?"
    ]
  }
]

# Save the JSON data to a file
with open("chatbot_data.json", "w") as file:
    json.dump(chatbot_data, file, indent=4)

print("JSON file created!")


JSON file created!


In [3]:
import json

# Load the JSON file
with open("chatbot_data.json", "r") as file:
    data = json.load(file)

# The data loaded is a list of intents. Directly iterate through it
# instead of looking for an "intents" key:
intents = data  # Assign the loaded data directly to intents

# Check loaded data
for intent in intents:
    print(f"Intent: {intent['intent']}") # Access the intent name using 'intent' key
    print(f"Patterns: {intent['patterns']}")
    print(f"Response: {intent['responses']}") # Access the responses using 'responses' key
    print("----")

Intent: appointment_booking
Patterns: ['Book an appointment for {date}', 'Schedule an appointment on {date}', 'I want to book an appointment for {date}', 'Can I book an appointment for {date}?', 'Please schedule my appointment on {date}', 'Make an appointment for me on {date}', 'Reserve an appointment on {date}', 'I need an appointment for {date}', 'Set up an appointment for {date}', 'Schedule a meeting on {date}']
Response: ['Your appointment for {date} has been booked.', 'Your appointment has been successfully scheduled for {date}.', 'We’ve booked your appointment for {date}.']
----
Intent: service_scheduling
Patterns: ['Schedule a service for {date} at {time}', 'Set up a service appointment on {date} at {time}', 'I want to schedule a service on {date} at {time}', 'Can I set up a service for {date} at {time}?', 'Please schedule a service for me on {date} at {time}', 'Book a service for {date} at {time}', 'I need to schedule a service on {date} at {time}', 'Schedule maintenance on {da

In [6]:
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer

# Extract patterns and labels
patterns = []
labels = []
label_map = {intent["intent"]: idx for idx, intent in enumerate(intents)}

for intent in intents:
    for pattern in intent["patterns"]:
        patterns.append(pattern)
        labels.append(label_map[intent["intent"]])

# Split data into training and testing
train_patterns, test_patterns, train_labels, test_labels = train_test_split(
    patterns, labels, test_size=0.2, random_state=42
)

# Tokenize patterns using BERT tokenizer
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

train_encodings = tokenizer(train_patterns, truncation=True, padding=True, max_length=64, return_tensors="pt")
test_encodings = tokenizer(test_patterns, truncation=True, padding=True, max_length=64, return_tensors="pt")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [7]:
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertForSequenceClassification, AdamW

class ChatDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        item = {key: val[idx] for key, val in self.encodings.items()}
        item["labels"] = torch.tensor(self.labels[idx])
        return item

train_dataset = ChatDataset(train_encodings, train_labels)
test_dataset = ChatDataset(test_encodings, test_labels)

# Load BERT model for classification
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=len(label_map))


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [8]:
train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=8)

# Optimizer
optimizer = AdamW(model.parameters(), lr=5e-5)

# Training loop
model.train()
for epoch in range(10):  # 3 epochs
    for batch in train_loader:
        optimizer.zero_grad()
        outputs = model(**batch)
        loss = outputs.loss
        loss.backward()
        optimizer.step()

    print(f"Epoch {epoch + 1} complete. Loss: {loss.item():.4f}")




Epoch 1 complete. Loss: 1.6335
Epoch 2 complete. Loss: 1.2513
Epoch 3 complete. Loss: 1.0927
Epoch 4 complete. Loss: 0.9284
Epoch 5 complete. Loss: 0.6367
Epoch 6 complete. Loss: 0.4406
Epoch 7 complete. Loss: 0.2355
Epoch 8 complete. Loss: 0.1953
Epoch 9 complete. Loss: 0.1225
Epoch 10 complete. Loss: 0.0859


In [11]:
from torch.nn.functional import softmax

model.eval()
for test_pattern in test_patterns[:5]:  # Test with a few examples
    inputs = tokenizer(test_pattern, truncation=True, padding=True, max_length=64, return_tensors="pt")
    outputs = model(**inputs)
    probs = softmax(outputs.logits, dim=-1)
    predicted_label = torch.argmax(probs).item()
    intent_name = list(label_map.keys())[list(label_map.values()).index(predicted_label)]

    print(f"User Input: {test_pattern}")
    print(f"Predicted Intent: {intent_name}")
    # Change 'response' to 'responses' to match the key in your data
    print(f"Response: {next(intent['responses'] for intent in intents if intent['intent'] == intent_name)}")
    print("----")

User Input: Book an appointment for {date}
Predicted Intent: appointment_booking
Response: ['Your appointment for {date} has been booked.', 'Your appointment has been successfully scheduled for {date}.', 'We’ve booked your appointment for {date}.']
----
User Input: Make an appointment for me on {date}
Predicted Intent: appointment_booking
Response: ['Your appointment for {date} has been booked.', 'Your appointment has been successfully scheduled for {date}.', 'We’ve booked your appointment for {date}.']
----
User Input: Greetings
Predicted Intent: greeting
Response: ['Hello! How can I help you today?', 'Hi there! What can I do for you?', 'Hey! How can I assist you today?']
----
User Input: I’m done, goodbye
Predicted Intent: farewell
Response: ['Goodbye! Have a great day!', 'Bye! Let me know if you need anything else.', 'See you later! Take care.']
----
User Input: Can I set up a service for {date} at {time}?
Predicted Intent: service_scheduling
Response: ['Your service appointment for

In [14]:
test_inputs = [
    "Book an appointment for 2024-12-25",
    "Schedule a service for 2024-12-25 at 10:00 AM",
    "What is the price for car repair?"
]

for input_text in test_inputs:
    inputs = tokenizer(input_text, truncation=True, padding=True, max_length=64, return_tensors="pt")
    outputs = model(**inputs)
    probs = softmax(outputs.logits, dim=-1)
    predicted_label = torch.argmax(probs).item()
    intent_name = list(label_map.keys())[list(label_map.values()).index(predicted_label)]
    print(f"User Input: {input_text}")
    print(f"Predicted Intent: {intent_name}")
    # Corrected line: Access the 'responses' key instead of 'response'
    print(f"Response: {next(intent['responses'] for intent in intents if intent['intent'] == intent_name)}")
    print("----")

User Input: Book an appointment for 2024-12-25
Predicted Intent: appointment_booking
Response: ['Your appointment for {date} has been booked.', 'Your appointment has been successfully scheduled for {date}.', 'We’ve booked your appointment for {date}.']
----
User Input: Schedule a service for 2024-12-25 at 10:00 AM
Predicted Intent: appointment_booking
Response: ['Your appointment for {date} has been booked.', 'Your appointment has been successfully scheduled for {date}.', 'We’ve booked your appointment for {date}.']
----
User Input: What is the price for car repair?
Predicted Intent: pricing_query
Response: ['The pricing for {service} starts at $50.', 'The cost of {service} is $50 and above.', 'The starting price for {service} is $50.']
----
