<a href="https://colab.research.google.com/github/Sudhandar/Intent-Classification-with-BERT/blob/master/atis_bert_final.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
import pandas as pd
from tqdm.notebook import tqdm

In [2]:
from google.colab import files
uploaded = files.upload()

Saving atis_dataset.csv to atis_dataset (2).csv


In [3]:
df = pd.read_csv('atis_dataset.csv')

In [4]:
df.head(5)

Unnamed: 0,query,intent
0,i want to fly from boston at 838 am and arrive...,flight
1,what flights are available from pittsburgh to ...,flight
2,what is the arrival time in san francisco for ...,flight_time
3,cheapest airfare from tacoma to orlando,airfare
4,round trip fares from pittsburgh to philadelph...,airfare


In [5]:
print(f'Dataset shape: {df.shape}')

Dataset shape: (5871, 2)


In [6]:
df.intent.value_counts()

flight                        4298
airfare                        471
ground_service                 291
airline                        195
abbreviation                   180
aircraft                        90
flight_time                     55
quantity                        54
airport                         38
capacity                        37
flight+airfare                  33
distance                        30
city                            25
ground_fare                     25
flight_no                       20
meal                            12
restriction                      6
airline+flight_no                2
day_name                         2
flight+airline                   1
flight_no+airline                1
aircraft+flight+flight_no        1
cheapest                         1
airfare+flight                   1
airfare+flight_time              1
ground_service+ground_fare       1
Name: intent, dtype: int64

In [7]:
df = df[~df['intent'].str.contains('\+')]
df = df[~df['intent'].str.contains('day_name')]
df = df[~df['intent'].str.contains('cheapest')]

In [8]:
df.intent.value_counts()

flight            4298
airfare            471
ground_service     291
airline            195
abbreviation       180
aircraft            90
flight_time         55
quantity            54
airport             38
capacity            37
distance            30
city                25
ground_fare         25
flight_no           20
meal                12
restriction          6
Name: intent, dtype: int64

In [9]:
possible_intents = df.intent.unique()

In [10]:
intent_dict ={}
for index, possible_intent in enumerate(possible_intents):
  intent_dict[possible_intent] = index

In [11]:
intent_dict

{'abbreviation': 8,
 'aircraft': 3,
 'airfare': 2,
 'airline': 6,
 'airport': 5,
 'capacity': 13,
 'city': 11,
 'distance': 7,
 'flight': 0,
 'flight_no': 12,
 'flight_time': 1,
 'ground_fare': 9,
 'ground_service': 4,
 'meal': 14,
 'quantity': 10,
 'restriction': 15}

In [12]:
df['label'] = df['intent'].apply(lambda x: intent_dict[x])
df.label.value_counts()

0     4298
2      471
4      291
6      195
8      180
3       90
1       55
10      54
5       38
13      37
7       30
11      25
9       25
12      20
14      12
15       6
Name: label, dtype: int64

In [13]:
n_rows = int(df.shape[0])
id_list = ['INT'+str(x) for x in range(1,n_rows+1)]
df['id'] = id_list
df.set_index('id', inplace =True)
df.head()

Unnamed: 0_level_0,query,intent,label
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
INT1,i want to fly from boston at 838 am and arrive...,flight,0
INT2,what flights are available from pittsburgh to ...,flight,0
INT3,what is the arrival time in san francisco for ...,flight_time,1
INT4,cheapest airfare from tacoma to orlando,airfare,2
INT5,round trip fares from pittsburgh to philadelph...,airfare,2


In [14]:
from sklearn.model_selection import train_test_split

In [15]:
x_train, x_test, y_train, y_test = train_test_split(
    df.index.values,
    df.label.values,
    test_size = 0.15,
    random_state = 26,
    stratify = df.label.values
)

In [16]:
df.loc[x_train,'data_type'] = 'train'
df.loc[x_test, 'data_type'] = 'test'

In [17]:
df.groupby(['intent','label','data_type']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,query
intent,label,data_type,Unnamed: 3_level_1
abbreviation,8,test,27
abbreviation,8,train,153
aircraft,3,test,13
aircraft,3,train,77
airfare,2,test,71
airfare,2,train,400
airline,6,test,29
airline,6,train,166
airport,5,test,6
airport,5,train,32


In [18]:
!pip install transformers



In [19]:
from transformers import BertTokenizer
from torch.utils.data import TensorDataset

In [20]:
tokenizer = BertTokenizer.from_pretrained(
    'bert-base-uncased',
    do_lower_case = True
)

In [21]:
train_query = df[df.data_type =='train']['query'].values
test_query = df[df.data_type =='test']

In [22]:
encoded_data_train = tokenizer.batch_encode_plus(df[df.data_type =='train']['query'].values,
                        add_special_tokens = True,
                        return_attention_mask = True,
                        pad_to_max_length = True,
                        max_length = 256,
                        truncation = True,
                        return_tensors = 'pt')

encoded_data_test = tokenizer.batch_encode_plus(df[df.data_type =='test']['query'].values,
                        add_special_tokens = True,
                        return_attention_mask = True,
                        pad_to_max_length = True,
                        max_length = 256,
                        truncation = True,
                        return_tensors = 'pt')

input_ids_train = encoded_data_train['input_ids']
attention_masks_train = encoded_data_train['attention_mask']
labels_train = torch.tensor(df[df.data_type == 'train']['label'].values)

input_ids_test = encoded_data_test['input_ids']
attention_masks_test = encoded_data_test['attention_mask']
labels_test = torch.tensor(df[df.data_type == 'test']['label'].values)

In [23]:
dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train)
dataset_test = TensorDataset(input_ids_test, attention_masks_test, labels_test)

In [24]:
from transformers import BertForSequenceClassification

In [25]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased',
                                                      num_labels = len(intent_dict),
                                                      output_attentions = False,
                                                      output_hidden_states = False)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [26]:
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler

In [27]:
batch_size = 32
dataloader_train = DataLoader(
    dataset_train,
    sampler = RandomSampler(dataset_train),
    batch_size = batch_size)
dataloader_test = DataLoader(
    dataset_test,
    sampler = RandomSampler(dataset_test),
    batch_size = batch_size)

In [28]:
from transformers import AdamW, get_linear_schedule_with_warmup

In [29]:
optimizer = AdamW(
    model.parameters(),
    lr = 1e-5,
    eps = 1e-8)

In [30]:
epochs = 10
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps= len(dataloader_train)*epochs)

In [31]:
import numpy as np
from sklearn.metrics import f1_score

In [32]:
def f1_score_func(preds, labels):
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return f1_score(labels_flat, preds_flat, average = 'weighted')

In [40]:
def accuracy_per_class(preds, labels):
    intent_dict_inverse = {v: k for k, v in intent_dict.items()}
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    
    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat == label]
        y_true = labels_flat[labels_flat == label]
        print(f'Class: {intent_dict_inverse[label]}')
        print(f'accuracy: {len(y_preds[y_preds==label])}/{len(y_true)}')

In [34]:
import random
seed_val = 26
random.seed(seed_val)
np.random.seed(seed_val)
torch.manual_seed(seed_val)
torch.cuda.manual_seed_all(seed_val)

In [35]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
print(device)

cuda


In [36]:
def evaluate(dataloader_val):

    model.eval()
    
    loss_val_total = 0
    predictions, true_vals = [], []
    
    for batch in dataloader_val:
        
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }

        with torch.no_grad():        
            outputs = model(**inputs)
            
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
    
    loss_val_avg = loss_val_total/len(dataloader_val) 
    
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
            
    return loss_val_avg, predictions, true_vals

In [37]:
for epoch in tqdm(range(1, epochs+1)):
    model.train()
    loss_train_total = 0
    progress_bar = tqdm(dataloader_train, desc = 'Epoch {:1d}'.format(epoch), leave =False, disable =False)
    for batch in progress_bar:
        model.zero_grad()
        batch = tuple(b.to(device) for b in batch)
        inputs = {
            'input_ids': batch[0],
            'attention_mask': batch[1],
            'labels': batch[2]
            
        }
        outputs = model(**inputs)
        
        loss = outputs[0]
        loss_train_total += loss.item()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(),1.0)
        optimizer.step()
        scheduler.step()
        progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))})
    torch.save(model.state_dict(),f'BERT_ft_epoch{epoch}.model')
    tqdm.write('\nEpoch {epoch}')
    loss_train_avg = loss_train_total/len(dataloader_train)
    tqdm.write(f'Training loss: {loss_train_avg}')
    val_loss, predictions, true_vals = evaluate(dataloader_test)
    val_f1 = f1_score_func(predictions,true_vals)
    tqdm.write(f'Validation loss: {val_loss}')
    tqdm.write(f'F1 Score (weighted): {val_f1}')

HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, description='Epoch 1', max=155.0, style=ProgressStyle(description_widt…


Epoch {epoch}
Training loss: 0.9551796442078005
Validation loss: 0.45998227915593554
F1 Score (weighted): 0.8866049099608512


HBox(children=(FloatProgress(value=0.0, description='Epoch 2', max=155.0, style=ProgressStyle(description_widt…


Epoch {epoch}
Training loss: 0.3290045693997414
Validation loss: 0.2252547533384391
F1 Score (weighted): 0.9466870291482762


HBox(children=(FloatProgress(value=0.0, description='Epoch 3', max=155.0, style=ProgressStyle(description_widt…


Epoch {epoch}
Training loss: 0.1716164777115468
Validation loss: 0.15384877952081816
F1 Score (weighted): 0.9542689830657114


HBox(children=(FloatProgress(value=0.0, description='Epoch 4', max=155.0, style=ProgressStyle(description_widt…


Epoch {epoch}
Training loss: 0.10786215583643606
Validation loss: 0.10780550027266145
F1 Score (weighted): 0.977824148474996


HBox(children=(FloatProgress(value=0.0, description='Epoch 5', max=155.0, style=ProgressStyle(description_widt…


Epoch {epoch}
Training loss: 0.07404103127698744
Validation loss: 0.08875777033556785
F1 Score (weighted): 0.9824976499225213


HBox(children=(FloatProgress(value=0.0, description='Epoch 6', max=155.0, style=ProgressStyle(description_widt…


Epoch {epoch}
Training loss: 0.05216647499511319
Validation loss: 0.07853349072060414
F1 Score (weighted): 0.9849072286843361


HBox(children=(FloatProgress(value=0.0, description='Epoch 7', max=155.0, style=ProgressStyle(description_widt…


Epoch {epoch}
Training loss: 0.043631743489494244
Validation loss: 0.06734337138810328
F1 Score (weighted): 0.9864720515529322


HBox(children=(FloatProgress(value=0.0, description='Epoch 8', max=155.0, style=ProgressStyle(description_widt…


Epoch {epoch}
Training loss: 0.03458469760033392
Validation loss: 0.057522415748930404
F1 Score (weighted): 0.9854597408262985


HBox(children=(FloatProgress(value=0.0, description='Epoch 9', max=155.0, style=ProgressStyle(description_widt…


Epoch {epoch}
Training loss: 0.03051545152200326
Validation loss: 0.06186375923321715
F1 Score (weighted): 0.9854597408262985


HBox(children=(FloatProgress(value=0.0, description='Epoch 10', max=155.0, style=ProgressStyle(description_wid…


Epoch {epoch}
Training loss: 0.028743124518903995
Validation loss: 0.0605312200994896
F1 Score (weighted): 0.9854597408262985



In [38]:
_ , predictions, true_vals = evaluate(dataloader_test)

In [41]:
accuracy_per_class(predictions, true_vals)

Class: flight
accuracy: 643/645
Class: flight_time
accuracy: 8/8
Class: airfare
accuracy: 69/71
Class: aircraft
accuracy: 13/13
Class: ground_service
accuracy: 43/44
Class: airport
accuracy: 5/6
Class: airline
accuracy: 27/29
Class: distance
accuracy: 4/4
Class: abbreviation
accuracy: 27/27
Class: ground_fare
accuracy: 3/4
Class: quantity
accuracy: 8/8
Class: city
accuracy: 2/4
Class: flight_no
accuracy: 3/3
Class: capacity
accuracy: 6/6
Class: meal
accuracy: 2/2
Class: restriction
accuracy: 0/1
