# User intent classification using BERT
If you want to run the program, simply click 'Run All'

In [2]:
!pip install transformers



In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as opt
from transformers import BertModel, BertTokenizer
import pandas as pd
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.manual_seed(1)

<torch._C.Generator at 0x78c9cc0ecbd0>

### Hyperparameters

In [4]:
epochs = 32
batch_size = 32
lr = 2e-5
weight_decay = 1e-3
drop_out_rate = 0.5

### Model

In [5]:
class UserIntentClassification(nn.Module):
    def __init__(self, model_name: str):
        super().__init__()
        self.embedder = BertModel.from_pretrained(model_name)
        for param in self.embedder.parameters():
            param.requires_grad = False

        self.fc1 = nn.Linear(1024, 400)
        self.fc2 = nn.Linear(400, 3)
        self.dropout = nn.Dropout(p = drop_out_rate)

    def forward(self, tokens):
        embedding = self.embedder(**tokens)
        embedding = embedding[0][:, 0, :] # Take the CLS token
        tensor = self.fc1(embedding)
        tensor = self.dropout(tensor)
        tensor = F.leaky_relu(tensor)
        tensor = self.fc2(tensor)
        tensor = F.sigmoid(tensor)
        return tensor

### Instantiation

In [6]:
tokenizer_name = "bert-large-uncased"
model_name = "bert-large-uncased"

tokenizer = BertTokenizer.from_pretrained(tokenizer_name)
classifier = UserIntentClassification(model_name).to(device)

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

In [7]:
criterion = nn.BCELoss()
optimizer = opt.Adam(classifier.parameters(), lr = lr, weight_decay = weight_decay)

### Training

In [9]:
df = pd.read_csv("UserIntent.csv")
num_of_utterance = df.shape[0]

# Stochastic gradient descent
for epoch in range(epochs):
    train_loss_list = []
    valid_loss_list = []
    for i in range(num_of_utterance-10):
        sentence = df["Utterance"][i]
        tokens = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True).to(device)

        label = torch.zeros(1, 3)
        if(df["Accept"][i] == 1):
            label[0][0] = 1

        if(df["Reject"][i] == 1):
            label[0][1] = 1

        if(df["Inquire"][i] == 1):
            label[0][2] = 1
        label = label.to(device)

        # Forward pass
        output = classifier(tokens)

        # Backward pass
        optimizer.zero_grad()
        loss = criterion(output, label)
        loss.backward()
        train_loss_list.append(loss.item())
        optimizer.step()

    for i in range(num_of_utterance-10, num_of_utterance):
        sentence = df["Utterance"][i]
        tokens = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True).to(device)

        label = torch.zeros(1, 3)
        if(df["Accept"][i] == 1):
            label[0][0] = 1

        if(df["Reject"][i] == 1):
            label[0][1] = 1

        if(df["Inquire"][i] == 1):
            label[0][2] = 1
        label = label.to(device)

        # Forward pass
        output = classifier(tokens)

        # Backward pass
        loss = criterion(output, label)
        valid_loss_list.append(loss.item())

    print("Training Loss:", sum(train_loss_list)/len(train_loss_list))
    print("Validation Loss:", sum(valid_loss_list)/len(valid_loss_list))

Training Loss: 0.4350933133399407
Validation Loss: 0.34448636397719384
Training Loss: 0.27278016539839844
Validation Loss: 0.24005263186991216
Training Loss: 0.19370561863756233
Validation Loss: 0.1860720468685031
Training Loss: 0.14542352430885885
Validation Loss: 0.15221794843673705
Training Loss: 0.1132124326751462
Validation Loss: 0.13780039437115194
Training Loss: 0.09200485317297734
Validation Loss: 0.10558340123388917
Training Loss: 0.07828459063309458
Validation Loss: 0.10398859290871769
Training Loss: 0.06894964126221105
Validation Loss: 0.09944357125787065
Training Loss: 0.06142780233047822
Validation Loss: 0.10162716284394264
Training Loss: 0.05466091592071842
Validation Loss: 0.09520553939510137
Training Loss: 0.049306613561749445
Validation Loss: 0.06566975379537325
Training Loss: 0.04378935514315302
Validation Loss: 0.06896092743845657
Training Loss: 0.04516537771724163
Validation Loss: 0.08062730059027672
Training Loss: 0.03680674359215291
Validation Loss: 0.058270918088

In [10]:
classifier.eval()

UserIntentClassification(
  (embedder): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 1024, padding_idx=0)
      (position_embeddings): Embedding(512, 1024)
      (token_type_embeddings): Embedding(2, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-23): 24 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
              (LayerNorm): LayerNorm((1024,)

### Regular test accuracy

In [11]:
df = pd.read_csv("Restaurant_User_Intent_Test.csv", encoding='ISO-8859-1')

correct = 0
total = 0

for i in range(df.shape[0]):
    sentence = df["Input"][i]
    tokens = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True).to(device)
    output = classifier(tokens)
    output = output[0]
    output = torch.where(output > 0.5, torch.tensor(1.0), torch.tensor(0.0)).to("cpu")

    label = torch.zeros(3)
    for j in range(1, 3):
        if((not pd.isna(df.at[i, f'Output{j}'])) and "AcceptRecommendation" in df[f'Output{j}'][i]):
            label[0] = 1

        if((not pd.isna(df.at[i, f'Output{j}'])) and "RejectRecommendation" in df[f'Output{j}'][i]):
            label[1] = 1

        if((not pd.isna(df.at[i, f'Output{j}'])) and "Inquire" in df[f'Output{j}'][i]):
            label[2] = 1

    if(torch.equal(label, output)):
        correct += 1

    total += 1

print("Total correct:", correct)
print("Total:", total)
print("Accuracy:", correct/total)

Total correct: 102
Total: 114
Accuracy: 0.8947368421052632


### Hard test accuracy

In [12]:
df = pd.read_csv("Hard_Restaurant_User_Intent_Test.csv", encoding='ISO-8859-1')

correct = 0
total = 0

for i in range(df.shape[0]):
    sentence = df["Input"][i]
    tokens = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True).to(device)
    output = classifier(tokens)
    output = output[0]
    output = torch.where(output > 0.5, torch.tensor(1.0), torch.tensor(0.0)).to("cpu")

    label = torch.zeros(3)
    for j in range(1, 3):
        if((not pd.isna(df.at[i, f'Output{j}'])) and "AcceptRecommendation" in df[f'Output{j}'][i]):
            label[0] = 1

        if((not pd.isna(df.at[i, f'Output{j}'])) and "RejectRecommendation" in df[f'Output{j}'][i]):
            label[1] = 1

        if((not pd.isna(df.at[i, f'Output{j}'])) and "Inquire" in df[f'Output{j}'][i]):
            label[2] = 1

    if(torch.equal(label, output)):
        correct += 1

    total += 1

print("Total correct:", correct)
print("Total:", total)
print("Accuracy:", correct/total)

Total correct: 15
Total: 15
Accuracy: 1.0


### Play around with this model

In [13]:
# This string is what you want to say
sentence = "No I don't want to go there. I like chinese food more"


tokens = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True).to(device)
output = classifier(tokens)
for i in range(output.shape[1]):
    if(output[0][i]>0.5):
        if(i == 0):
            print("Accept")
        elif(i == 1):
            print("Reject")
        elif(i == 2):
            print("Inquire")
        elif(i == 3):
            print("Random")

Reject


### Check how many parameters this model has

In [14]:
print("This model has", sum(p.numel() for p in classifier.parameters()), "parameters.")

This model has 335553091 parameters.
