# Lets make a fast food sales man by using bert:





In [None]:

import transformers


In [None]:
%%writefile train.py
# training.py


import numpy as np
import json
import random
import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
from transformers import BertTokenizer,BertModel
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
tokenizer=BertTokenizer.from_pretrained('bert-base-uncased')
from torch.optim import AdamW
with open('fastfoodintents1.json','r') as f:
  intents=json.load(f)

tags=[]
for intent in intents['intents']:
  tags.append(intent['tag'])
tags=sorted(set(tags))

input_ids=[]
attention_mask=[]
labels=[]

for intent in intents['intents']:
  for pattern in intent['patterns']:

    encoded=tokenizer(
        pattern,
        add_special_tokens=True,
        max_length=32,
        padding='max_length',
        truncation=True,
        return_tensors='pt'

    )

    input_ids.append(encoded['input_ids'][0])
    attention_mask.append(encoded['attention_mask'][0])
    label_ids=tags.index(intent['tag'])
    labels.append(label_ids)

input_ids=torch.stack(input_ids)
attention_mask=torch.stack(attention_mask)
labels=torch.tensor(labels)


class ChatDataset(Dataset):
  def __init__(self,encodings,mask,labels):
    self.encodings=encodings
    self.mask=mask
    self.labels=labels

  def __getitem__(self,idx):

    return{
        'input_ids':self.encodings[idx],
        'attention_mask':self.mask[idx],
        'labels':self.labels[idx]

    }

  def __len__(self):
    return len(self.labels)


dataset=ChatDataset(input_ids,attention_mask,labels)
train_loader=DataLoader(dataset,batch_size=32,shuffle=True)



class Bert_Arch(nn.Module):
  def __init__(self,output_dim):
    super(Bert_Arch,self).__init__()

    self.bert=BertModel.from_pretrained('bert-base-uncased')

    self.dropout=nn.Dropout(0.1)

    self.fc=nn.Linear(768,output_dim)



  def forward(self,sent_id,mask):
    output=self.bert(sent_id,attention_mask=mask)

    cls_vector=output.pooler_output

    x=self.fc(self.dropout(cls_vector))

    return x


output_dim=len(tags)
model=Bert_Arch(output_dim)
model=model.to(device)



optimizer=AdamW(model.parameters(),lr=2e-5)

cross_entropy=nn.CrossEntropyLoss()

epochs=60
for epoch in range(epochs):
  total_loss=0
  for batch in train_loader:
    sent_id=batch['input_ids'].to(device)
    mask=batch['attention_mask'].to(device)
    labels=batch['labels'].to(device)


    model.zero_grad()
    preds=model(sent_id,mask)
    loss=cross_entropy(preds,labels)
    loss.backward()
    total_loss=total_loss+loss.item()

    torch.nn.utils.clip_grad_norm_(model.parameters(),1.0)

    optimizer.step()
  avg_loss=total_loss/len(train_loader)

  if (epoch+1)%10==0:
    print(f'epoch {epoch+1}/{epochs} |loss {avg_loss:.4f} ')



output_data = {
    "model_state": model.state_dict(),
    "output_dim": output_dim,
    "tags": tags,
    "vocab_size": len(tokenizer),
    "embed_dim": 768,
    "hidden_size": 768,
    "max_len": 20
}






torch.save(output_data, "bert_data.pth")






Writing train.py


In [None]:
!python train.py

tokenizer_config.json: 100% 48.0/48.0 [00:00<00:00, 176kB/s]
vocab.txt: 100% 232k/232k [00:00<00:00, 7.38MB/s]
tokenizer.json: 100% 466k/466k [00:00<00:00, 2.77MB/s]
config.json: 100% 570/570 [00:00<00:00, 3.08MB/s]
model.safetensors: 100% 440M/440M [00:02<00:00, 177MB/s]
Loading weights: 100% 199/199 [00:00<00:00, 1336.70it/s, Materializing param=pooler.dense.weight]
[1mBertModel LOAD REPORT[0m from: bert-base-uncased
Key                                        | Status     |  | 
-------------------------------------------+------------+--+-
cls.predictions.transform.dense.bias       | [38;5;208mUNEXPECTED[0m |  | 
cls.predictions.transform.dense.weight     | [38;5;208mUNEXPECTED[0m |  | 
cls.seq_relationship.weight                | [38;5;208mUNEXPECTED[0m |  | 
cls.predictions.transform.LayerNorm.bias   | [38;5;208mUNEXPECTED[0m |  | 
cls.predictions.transform.LayerNorm.weight | [38;5;208mUNEXPECTED[0m |  | 
cls.predictions.bias                       | [38;5;208mUNEXPECTED

In [None]:
%%writefile chat.py

import torch
import torch.nn as nn
import random
import numpy as np
import json
from transformers import BertModel,BertTokenizer

device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')

with open('fastfoodintents1.json','r') as f:
  intents=json.load(f)

File=torch.load('bert_data.pth')
model_state=File['model_state']
output_dim=File['output_dim']
tags=File['tags']


class Bert_Arch(nn.Module):
  def __init__(self,output_dim):
    super(Bert_Arch,self).__init__()

    self.bert=BertModel.from_pretrained('bert-base-uncased')

    self.dropout=nn.Dropout(0.1)
    self.fc=nn.Linear(768,output_dim)


  def forward(self,sent_id,mask):
    output=self.bert(sent_id,attention_mask=mask)

    cls_vector=output.pooler_output

    x=self.fc(self.dropout(cls_vector))

    return x



model=Bert_Arch(output_dim)
model.load_state_dict(model_state)
model=model.to(device)

model.eval()



tokenizer=BertTokenizer.from_pretrained('bert-base-uncased')

bot_name="AI"
while True:
  sentence=input('you: ')
  if sentence=='quit':
    break
  encoded=tokenizer(
      sentence,
      add_special_tokens=True,
      max_length=20,
      truncation=True,
      padding='max_length',
      return_tensors='pt'
  )

  ids=encoded['input_ids'].to(device)
  mask=encoded['attention_mask'].to(device)

  output=model(ids,mask)
  _,pred=torch.max(output,dim=1)

  tag=tags[pred.item()]

  prob=torch.softmax(output,dim=1)

  prob=prob[0][pred.item()]
  if prob.item() > 0.50:
    for intent in intents['intents']:
      if tag == intent['tag']:
        print(f'{bot_name}: {random.choice(intent["responses"])}')

        break
  else:

    print(f'{bot_name}: I do not understand... (Low confidence: {prob.item():.2f})')





Writing chat.py


In [None]:
!python chat.py

Loading weights: 100% 199/199 [00:00<00:00, 1391.32it/s, Materializing param=pooler.dense.weight]
[1mBertModel LOAD REPORT[0m from: bert-base-uncased
Key                                        | Status     |  | 
-------------------------------------------+------------+--+-
cls.predictions.transform.LayerNorm.bias   | [38;5;208mUNEXPECTED[0m |  | 
cls.predictions.bias                       | [38;5;208mUNEXPECTED[0m |  | 
cls.predictions.transform.LayerNorm.weight | [38;5;208mUNEXPECTED[0m |  | 
cls.seq_relationship.bias                  | [38;5;208mUNEXPECTED[0m |  | 
cls.predictions.transform.dense.weight     | [38;5;208mUNEXPECTED[0m |  | 
cls.seq_relationship.weight                | [38;5;208mUNEXPECTED[0m |  | 
cls.predictions.transform.dense.bias       | [38;5;208mUNEXPECTED[0m |  | 

[3mNotes:
- [38;5;208mUNEXPECTED[0m[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m
you: hello
AI: Hi there! Ready to gra