Using directions from https://dl.acm.org/doi/10.1145/3434073.3444671

# Loading SNIPS Data

In [None]:
import pandas as pd
from collections import Counter

import torch
from torch.utils.data import DataLoader
from torch.utils.data.dataset import random_split
from torchtext.vocab import Vocab
from torchtext.data.utils import get_tokenizer

import numpy as np

In [None]:
!pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.8/5.8 MB[0m [31m26.5 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.6/7.6 MB[0m [31m48.0 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m182.4/182.4 KB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.11.1 tokenizers-0.13.2 transformers-4.25.1


In [None]:
import json
import pickle
import time
import datetime
import random
import os
import csv

import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import torch
from torch.utils.data import TensorDataset, random_split
from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
from transformers import BertTokenizer
from transformers import BertForSequenceClassification, AdamW, BertConfig
from transformers import get_linear_schedule_with_warmup

import matplotlib.pyplot as plt

device = torch.device("cpu")

SEED_VAL = 42

random.seed(SEED_VAL)
np.random.seed(SEED_VAL)
torch.manual_seed(SEED_VAL);

In [None]:
from google.colab import files
SNIPS_TRAIN_FILE = files.upload()
SNIPS_TRAIN_STR = str(SNIPS_TRAIN_FILE['SNIPS_TRAIN.csv'])

KeyError: ignored

In [None]:
SNIPS_VALID_FILE = files.upload()
SNIPS_VALID_STR = str(SNIPS_VALID_FILE['SNIPS_VALID.csv'])

Saving SNIPS_VALID.csv to SNIPS_VALID.csv


In [None]:
SNIPS_TEST_FILE = files.upload()
SNIPS_TEST_STR = str(SNIPS_TEST_FILE['SNIPS_TEST.csv'])

Saving SNIPS_TEST.csv to SNIPS_TEST.csv


In [None]:
def clean_and_split(SNIPS_STR):
  SNIPS_LIST = SNIPS_STR.split(",")
  SNIPS_ = []
  for row in SNIPS_LIST:
    if "\\r\\n" in row:
      SNIPS_.append(row.split("\\r\\n")[1])
    else:
      SNIPS_.append(row)
  SNIPS_ = SNIPS_[:-1]
  SNIPS_labels = []
  SNIPS_sentences = []
  for i in range(len(SNIPS_)):
    if i % 2 == 0:
      SNIPS_labels.append(SNIPS_[i])
    else:
      SNIPS_sentences.append(SNIPS_[i])
  return SNIPS_sentences, SNIPS_labels

In [None]:
SNIPS_train_sentences, SNIPS_train_labels = clean_and_split(SNIPS_TRAIN_STR)

In [None]:
SNIPS_valid_sentences, SNIPS_valid_labels = clean_and_split(SNIPS_VALID_STR)

In [None]:
SNIPS_test_sentences, SNIPS_test_labels = clean_and_split(SNIPS_TEST_STR)

#Training SNIPS

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)


Downloading:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
max_len = 0
input_ids = []
for row in SNIPS_train_sentences:
    input_ids = tokenizer.encode(row, add_special_tokens=True)
    max_len = max(max_len, len(input_ids))

print('Max sentence length: ', max_len)


Max sentence length:  75


In [None]:
PAD_LEN = max_len

In [None]:
d = {}
i = 0
for label in SNIPS_train_labels:
  if label not in d:
    d[label] = i
    i+=1


In [None]:
d

{'PlayMusic': 0,
 'AddToPlaylist': 1,
 'RateBook': 2,
 'SearchScreeningEvent': 3,
 'BookRestaurant': 4,
 'GetWeather': 5,
 'SearchCreativeWork': 6}

In [None]:
SNIPS_valid_examples = []
for i in range(len(SNIPS_valid_sentences)):
  SNIPS_valid_examples.append([SNIPS_valid_sentences[i], SNIPS_valid_labels[i]])

In [None]:
SNIPS_test_examples = []
for i in range(len(SNIPS_test_sentences)):
  SNIPS_test_examples.append([SNIPS_test_sentences[i], SNIPS_test_labels[i]])

In [None]:
def examples_to_dataset(examples):
  input_ids = []
  attention_masks = []
  labels = []
  for example in examples:
    encoded_dict = tokenizer.encode_plus(example[0], add_special_tokens=True, max_length=PAD_LEN, padding="max_length",
            truncation=True, return_attention_mask=True, return_tensors='pt')
    input_ids.append(encoded_dict["input_ids"])
    attention_masks.append(encoded_dict["attention_mask"])
    labels.append(torch.LongTensor([d[example[1]]]))

  input_ids = torch.cat(input_ids, 0)
  attention_masks = torch.cat(attention_masks, 0)
  labels = torch.cat(labels, 0)
  dataset = TensorDataset(input_ids, attention_masks, labels)
  return dataset


#BERT model with SNIPS

In [None]:
def accuracy(preds, labels):
  preds_flat = np.argmax(preds, axis = 1)
  return np.sum(preds_flat == labels)/ len(labels)

In [None]:
def evaluate(model, dataloader):
    model.eval()
    acc= 0
    for batch in tqdm(list(dataloader)):
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)
        with torch.no_grad():
            (loss, logits) = model(b_input_ids,
                                   token_type_ids=None,
                                   attention_mask=b_input_mask,
                                   labels=b_labels, return_dict=False)
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()
        acc += accuracy(logits, label_ids)
    avg_accuracy = acc/len(dataloader)

    return avg_accuracy

In [None]:

def train(model, train_dataloader, validation_dataloader, epochs):
    optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8)
    total_steps = len(train_dataloader) * epochs
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)
    for epoch_i in range(0, EPOCHS):
        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
        total_train_loss = 0
        model.train()

        for step, batch in tqdm(list(enumerate(train_dataloader))):
            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_labels = batch[2].to(device)
            model.zero_grad()

            (loss, logits) = model(b_input_ids,
                                   token_type_ids=None,
                                   attention_mask=b_input_mask,
                                   labels=b_labels, return_dict=False)
            total_train_loss += loss.item()

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()

        avg_train_loss =   total_train_loss/ len(train_dataloader)
        print("  Average training loss: {0:.2f}".format(avg_train_loss))
        if validation_dataloader:
          print("Validation accuracy: ", evaluate(model, validation_dataloader))


In [None]:
def partial_training_set(examples_per_intent):
    examples = []
    for label in d:
      label_examples = []
      for l in range(len(SNIPS_train_labels)):
        if SNIPS_train_labels[l] == label:
            label_examples.append([SNIPS_train_sentences[l], SNIPS_train_labels[l]])
      examples+= random.sample(label_examples, examples_per_intent)
    return examples

In [None]:
BATCH_SIZE = 16

EPOCHS = 15
EXAMPLES_PER_INTENT = 250
INTENT_DIM = 7
mini_train_set = examples_to_dataset(partial_training_set(EXAMPLES_PER_INTENT))


NameError: ignored

In [None]:
train_dataloader = DataLoader(mini_train_set, sampler=RandomSampler(mini_train_set), batch_size=BATCH_SIZE)
val_dataset = examples_to_dataset(SNIPS_valid_examples)
test_dataset = examples_to_dataset(SNIPS_test_examples)

validation_dataloader = DataLoader(val_dataset, sampler=RandomSampler(val_dataset), batch_size=BATCH_SIZE)
test_dataloader = DataLoader(test_dataset, sampler=SequentialSampler(test_dataset), batch_size=BATCH_SIZE)

SNIPS_bert_model = BertForSequenceClassification.from_pretrained(
    "bert-base-uncased",
    num_labels = INTENT_DIM,
    output_attentions = False,
    output_hidden_states = False,
)

Downloading:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [None]:
train(SNIPS_bert_model, train_dataloader, validation_dataloader, EPOCHS)






  0%|          | 0/110 [00:00<?, ?it/s]

  Average training loss: 1.25


  0%|          | 0/44 [00:00<?, ?it/s]

Validation accuracy:  0.9474431818181818


In [None]:
print("Test accuracy:", evaluate(SNIPS_bert_model, test_dataloader))

Evaluating on test set:


  0%|          | 0/44 [00:00<?, ?it/s]

Test accuracy: 0.9204545454545454


In [None]:
  EPOCHS = 1

In [None]:
def run_SNIPS_BERT_models():
  BATCH_SIZE = 16

  INTENT_DIM = 7
  num_intents = [1, 5, 10, 25, 50, 100, 250]
  accuracies = []
  for i in num_intents:
    # random.seed(SEED_VAL)
    # np.random.seed(SEED_VAL)
    # torch.manual_seed(SEED_VAL);
    EXAMPLES_PER_INTENT = i
    mini_train_set = examples_to_dataset(partial_training_set(EXAMPLES_PER_INTENT))

    train_dataloader = DataLoader(mini_train_set, sampler=RandomSampler(mini_train_set), batch_size=BATCH_SIZE)
    val_dataset = examples_to_dataset(SNIPS_valid_examples)
    test_dataset = examples_to_dataset(SNIPS_test_examples)

    validation_dataloader = DataLoader(val_dataset, sampler=RandomSampler(val_dataset), batch_size=BATCH_SIZE)
    test_dataloader = DataLoader(test_dataset, sampler=SequentialSampler(test_dataset), batch_size=BATCH_SIZE)

    SNIPS_bert_model = BertForSequenceClassification.from_pretrained(
        "bert-base-uncased",
        num_labels = INTENT_DIM,
        output_attentions = False,
        output_hidden_states = False,
    )
    train(SNIPS_bert_model, train_dataloader, validation_dataloader, EPOCHS)
    acc = evaluate(SNIPS_bert_model, test_dataloader)
    accuracies.append(acc)
    print("test accuracy: ", acc)
  df = pd.DataFrame({'Examples Per Intent': num_intents, 'Test Accuracy': accuracies})

In [None]:
df = run_SNIPS_BERT_models()


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at



  0%|          | 0/1 [00:00<?, ?it/s]

  Average training loss: 1.95


  0%|          | 0/44 [00:00<?, ?it/s]

Validation accuracy:  0.17613636363636365


  0%|          | 0/44 [00:00<?, ?it/s]

test accuracy:  0.16145833333333334


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at



  0%|          | 0/3 [00:00<?, ?it/s]

  Average training loss: 1.98


  0%|          | 0/44 [00:00<?, ?it/s]

Validation accuracy:  0.15151515151515152


  0%|          | 0/44 [00:00<?, ?it/s]

test accuracy:  0.1543560606060606


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at



  0%|          | 0/5 [00:00<?, ?it/s]

  Average training loss: 1.98


  0%|          | 0/44 [00:00<?, ?it/s]

Validation accuracy:  0.14772727272727273


  0%|          | 0/44 [00:00<?, ?it/s]

test accuracy:  0.13162878787878787


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at



  0%|          | 0/11 [00:00<?, ?it/s]

  Average training loss: 1.94


  0%|          | 0/44 [00:00<?, ?it/s]

Validation accuracy:  0.40246212121212116


  0%|          | 0/44 [00:00<?, ?it/s]

test accuracy:  0.4067234848484848


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at



  0%|          | 0/22 [00:00<?, ?it/s]

  Average training loss: 1.86


  0%|          | 0/44 [00:00<?, ?it/s]

Validation accuracy:  0.6212121212121212


  0%|          | 0/44 [00:00<?, ?it/s]

test accuracy:  0.5923295454545454


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at



  0%|          | 0/44 [00:00<?, ?it/s]

  Average training loss: 1.67


  0%|          | 0/44 [00:00<?, ?it/s]

Validation accuracy:  0.8035037878787878


  0%|          | 0/44 [00:00<?, ?it/s]

test accuracy:  0.8016098484848485


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at



  0%|          | 0/110 [00:00<?, ?it/s]

#Loading ATIS dataset

In [None]:
def clean_file(file):
  file = str(file).split("strokec2 ")[1]
  file = file.split("\\\\\\n")
  return file[:-1]

In [None]:
from google.colab import files
ATIS_intent_dict = files.upload()

Saving ATIS_intent_dict.rtf to ATIS_intent_dict.rtf


In [None]:
ATIS_intent_dict = clean_file(ATIS_intent_dict['ATIS_intent_dict.rtf'])

In [None]:
ATIS_vocab_dict = files.upload()

Saving ATIS_vocab_dict.rtf to ATIS_vocab_dict.rtf


In [None]:
ATIS_vocab_dict = clean_file(ATIS_vocab_dict['ATIS_vocab_dict.rtf'])

In [None]:
ATIS_train_intents = files.upload()

Saving ATIS_train_intents.rtf to ATIS_train_intents.rtf


In [None]:
ATIS_train_intents = clean_file(ATIS_train_intents['ATIS_train_intents.rtf'])

In [None]:
ATIS_train_intents = [eval(i) for i in ATIS_train_intents]

In [None]:
ATIS_train_queries = files.upload()

Saving ATIS_train_queries.rtf to ATIS_train_queries.rtf


In [None]:
ATIS_train_queries = clean_file(ATIS_train_queries['ATIS_train_queries.rtf'])

In [None]:
ATIS_test_intents = files.upload()

Saving ATIS_test_intents.rtf to ATIS_test_intents.rtf


In [None]:
ATIS_test_intents = clean_file(ATIS_test_intents['ATIS_test_intents.rtf'])

In [None]:
ATIS_test_intents = [eval(i) for i in ATIS_test_intents]

In [None]:
ATIS_test_queries = files.upload()

Saving ATIS_test_queries.rtf to ATIS_test_queries.rtf


In [None]:
ATIS_test_queries = clean_file(ATIS_test_queries['ATIS_test_queries.rtf'])

In [None]:
prev_id_to_label = {}
prev_label_to_id = {}
for i in range(len(ATIS_intent_dict)):
  prev_id_to_label[i]= ATIS_intent_dict[i]
  prev_label_to_id[ATIS_intent_dict[i]] = i

In [None]:
labels = {} #frequency of each label
new_labels = []
for label in ATIS_train_intents:
  if label not in labels:
    labels[label] = 1
  else:
    labels[label] +=1

In [None]:
for i in range(len(ATIS_intent_dict)):
  if "+" in ATIS_intent_dict[i]:
    intents = ATIS_intent_dict[i].split("+")
    mapped = intents[0]
    for intent in intents[1:]:
      if labels[prev_label_to_id[intent]] > labels[prev_label_to_id[mapped]]:
        mapped = intent
    ATIS_intent_dict[i] = mapped


In [None]:
unique_intent_dict = []
for label in ATIS_intent_dict:
  if label not in unique_intent_dict:
    unique_intent_dict.append(label)


In [None]:
id_to_label = {}
for i in range(len(ATIS_intent_dict)):
  id_to_label[i]= ATIS_intent_dict[i]
label_to_id= {}
for i in range(len(unique_intent_dict)):
  label_to_id[unique_intent_dict[i]]= i

In [None]:
def map_compound(data):
  for i in range(len(data)):
    label = id_to_label[data[i]]
    data[i] = label_to_id[label]
  return data

In [None]:
ATIS_train_intents = map_compound(ATIS_train_intents)

In [None]:
ATIS_test_intents = map_compound(ATIS_test_intents)

In [None]:
MAX_LEN = 0
for i in ATIS_train_queries:
  if len(i.split()) > MAX_LEN:
    MAX_LEN = len(i.split())


In [None]:
MAX_LEN

48

In [None]:
def ATIS_examples_to_dataset(examples):
  input_ids = []
  attention_masks = []
  labels = []
  for example in examples:
    sentence= example[0]
    attention  = [1] * len(sentence)
    for p in range(MAX_LEN - len(sentence)):
      sentence.append(0)
      attention.append(0)
    input_ids.append(torch.LongTensor([[int(x) for x in sentence]]))
    attention_masks.append(torch.LongTensor([attention]))
    labels.append(torch.LongTensor([example[1]]))

  input_ids = torch.cat(input_ids, 0)
  attention_masks = torch.cat(attention_masks, 0)
  labels = torch.cat(labels, 0)
  dataset = TensorDataset(input_ids, attention_masks, labels)
  return dataset

In [None]:
ATIS_test_examples = []
for i in range(len(ATIS_test_queries)):
  ATIS_test_examples.append([[int(i) for i in ATIS_test_queries[i].split()], ATIS_test_intents[i]])

In [None]:
ATIS_train_examples = []
for i in range(len(ATIS_train_queries)):
  ATIS_train_examples.append([[int(i) for i in ATIS_train_queries[i].split()], ATIS_train_intents[i]])

In [None]:
def ATIS_partial_training_set(examples_per_intent):
    examples = []
    for label in unique_intent_dict:
      label_examples = []
      for l in range(len(ATIS_train_intents)):

        if id_to_label[ATIS_train_intents[l]] == label:
            label_examples.append([ATIS_train_queries[l].split(), ATIS_train_intents[l]])
      if len(label_examples) >= examples_per_intent:
        examples+= random.sample(label_examples, examples_per_intent)
    return examples

#BERT model with ATIS

In [None]:
BATCH_SIZE = 16

EPOCHS = 15
INTENT_DIM = 18


In [None]:
train_set = ATIS_examples_to_dataset(ATIS_train_examples)

ATIS_train_dataloader = DataLoader(train_set, sampler=RandomSampler(train_set), batch_size=BATCH_SIZE)
ATIS_test_dataset = ATIS_examples_to_dataset(ATIS_test_examples)

ATIS_test_dataloader = DataLoader(ATIS_test_dataset, sampler=SequentialSampler(ATIS_test_dataset), batch_size=BATCH_SIZE)

# ATIS_bert_model = BertForSequenceClassification.from_pretrained(
#     "bert-base-uncased", # Use the 12-layer BERT model, with an uncased vocab.
#     num_labels = INTENT_DIM,
#     output_attentions = False, # Whether the model returns attentions weights.
#     output_hidden_states = False, # Whether the model returns all hidden-states.
# )

In [None]:

train(ATIS_bert_model, ATIS_train_dataloader, ATIS_test_dataloader, EPOCHS)




  0%|          | 0/312 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [None]:
def run_ATIS_BERT_models():
  BATCH_SIZE = 16

  EPOCHS = 15
  INTENT_DIM = 18
  num_intents = [1, 5, 10, 25, 50, 100, 250]
  accuracies = []
  for i in num_intents:
    EXAMPLES_PER_INTENT = i
    mini_train_set = ATIS_examples_to_dataset(ATIS_partial_training_set(EXAMPLES_PER_INTENT))
    train_dataloader = DataLoader(mini_train_set, sampler=RandomSampler(mini_train_set), batch_size=BATCH_SIZE)
    test_dataset = ATIS_examples_to_dataset(ATIS_test_examples)
    test_dataloader = DataLoader(test_dataset, sampler=SequentialSampler(test_dataset), batch_size=BATCH_SIZE)

    ATIS_bert_model = BertForSequenceClassification.from_pretrained(
        "bert-base-uncased",
        num_labels = INTENT_DIM,
        output_attentions = False,
        output_hidden_states = False,
    )
    train(ATIS_bert_model, train_dataloader, None, EPOCHS)
    acc = evaluate(ATIS_bert_model, test_dataloader)
    accuracies.append(acc)
  df = pd.DataFrame({'Examples Per Intent': num_intents, 'Test Accuracy': accuracies})

In [None]:
df = run_ATIS_BERT_models()

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

NameError: ignored

#BiLSTM Model - ATIS

##Loading GloVe embeddings

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [None]:
import os
os.chdir("/content/drive/MyDrive/BrennanTopicsProg")

In [None]:
import csv
from csv import reader

In [None]:
def load_glove_vectors(file_path):
    data = open(file_path).readlines()
    dict_ = {}
    for line in data:
      split_line = line.split()
      word = split_line[0]
      embedding = np.array(split_line[1:], dtype=np.float64)
      dict_[word] = embedding
    return dict_

In [None]:
GLOVE_PATH = "./glove.6B.300d.txt"

glove = load_glove_vectors(GLOVE_PATH)

In [None]:
matrix_len = len(glove.keys())
glove_dim = len(list(glove.values())[0])

weights_matrix = np.zeros((len(ATIS_vocab_dict), len(glove["the"])))
for word in range(len(ATIS_vocab_dict)):
  if ATIS_vocab_dict[word] in glove:
    weights_matrix[word] =  glove[ATIS_vocab_dict[word]]

In [None]:
weights_matrix.shape

(943, 300)

In [None]:
def glove_emb_layer(weights_matrix, trainable=False):
    num_embeddings, embedding_dim = weights_matrix.shape
    emb_layer = torch.nn.Embedding.from_pretrained(torch.from_numpy(weights_matrix.astype(np.float32)))
    if not trainable:
        emb_layer.weight.requires_grad = False

    return emb_layer, num_embeddings, embedding_dim

##Training Process

In [None]:
def train(model, dataloader, criterion, optimizer, epoch):
    model.train()
    total_acc, total_count = 0, 0
    log_interval = 100
    start_time = time.time()

    for idx, (text, attention, label) in tqdm(enumerate(dataloader), total=len(dataloader)):

        optimizer.zero_grad()
        predicted_label = model(text)
        #if len(predicted_label) == BATCH_SIZE:
        loss = criterion(predicted_label, label.float())
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
        optimizer.step()
        total_acc += (predicted_label * label > 0).sum().item()
        total_count += label.size(0)
        if idx % log_interval == 0 and idx > 0:
              elapsed = time.time() - start_time
              print('| epoch {:3d} | {:5d}/{:5d} batches '
                    '| train accuracy {:8.3f}'.format(epoch, idx, len(dataloader),
                                                total_acc/total_count))
              total_acc, total_count = 0, 0
              start_time = time.time()


In [None]:
def evaluate(model, dataloader, criterion):
    model.eval()
    total_acc, total_count = 0, 0

    with torch.no_grad():
        for idx, (text, attention, label) in enumerate(dataloader):
            predicted_label = model(text)
            #if len(predicted_label) == BATCH_SIZE:
            loss = criterion(predicted_label, label)
            print(predicted_label, label)
            total_acc += (predicted_label * label > 0).sum().item()
            total_count += label.size(0)
    print(total_acc)
    print(total_count)
    return total_acc/total_count



In [None]:
def train_loop(model, criterion):
    EPOCHS = 5 # epoch
    LR = 5  # learning rate
    BATCH_SIZE = 1
    optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-8) # same optimizing function as BERT model

    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)
    total_accu = None

    train_dataset = list(ATIS_train_examples)
    test_dataset = list(ATIS_test_examples)
    num_train = int(len(train_dataset) * 0.95)

    for epoch in range(1, EPOCHS + 1):
        epoch_start_time = time.time()

        train(model, ATIS_train_dataloader, criterion, optimizer, epoch)
        # accu_val = evaluate(model, validation_dataloader, criterion)
        # if total_accu is not None and total_accu > accu_val:
        #   scheduler.step()
        # else:
        #    total_accu = accu_val
        print('-' * 59)
        print('| end of epoch {:3d} | time: {:5.2f}s | '.format(epoch,
                                               time.time() - epoch_start_time))
        print('-' * 59)

In [None]:
class BiLSTMModel(torch.nn.Module):
    def __init__(self):
        super(BiLSTMModel, self).__init__()
        self.glove_emb, num_embeddings, embedding_dim = glove_emb_layer(weights_matrix, trainable = True)
        self.LSTM = torch.nn.LSTM(glove_dim, glove_dim, num_layers=1, batch_first=True, bidirectional=True, dropout=0.25)
        self.output = torch.nn.Linear(300, 1)
    def forward(self, text):
        input = self.glove_emb(text)
        print(input.size())
        output, (h_n, c_n) = self.LSTM(input)
        avg = torch.mean(h_n, 0)
        output = self.output(avg)
        print(output.size())

        final_output = output.squeeze()
        print(final_output.size())

        return final_output

In [None]:
criterion = torch.nn.MSELoss()
vocab_size = len(ATIS_vocab_dict)
emsize = 128
bilstm_model = BiLSTMModel().to(device)

train_loop(bilstm_model, criterion)


In [None]:
evaluate(bilstm_model, ATIS_test_dataloader, criterion)

NameError: ignored

# Logistic Regression - ATIS

In [None]:
class LogisticRegression(torch.nn.Module):
    def __init__(self):
        super(LogisticRegression, self).__init__()
        self.glove_emb, num_embeddings, embedding_dim = glove_emb_layer(weights_matrix, trainable = True)
        self.linear = torch.nn.Linear(MAX_LEN, 18)
    def forward(self, text):
        input = self.glove_emb(text)
        avg = torch.mean(input, 2)
        outputs = self.linear(avg)

        return outputs

In [None]:
matrix_len = len(glove.keys())
glove_dim = len(list(glove.values())[0])

weights_matrix = np.zeros((len(ATIS_vocab_dict), len(glove["the"])))
for word in range(len(ATIS_vocab_dict)):
  if ATIS_vocab_dict[word] in glove:
    weights_matrix[word] =  glove[ATIS_vocab_dict[word]]

In [None]:
learning_rate = 0.001
model = LogisticRegression()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
criterion = torch.nn.CrossEntropyLoss()
BATCH_SIZE = 16

In [None]:
train_set = ATIS_examples_to_dataset(ATIS_train_examples)
ATIS_train_dataloader = DataLoader(train_set, sampler=RandomSampler(train_set), batch_size=BATCH_SIZE)
ATIS_test_dataset = ATIS_examples_to_dataset(ATIS_test_examples)
ATIS_test_dataloader = DataLoader(ATIS_test_dataset, sampler=SequentialSampler(ATIS_test_dataset), batch_size=BATCH_SIZE)

In [None]:
iter = 0
num_epochs=1
for epoch in range(num_epochs):
    for idx, (text, attention, label) in enumerate(ATIS_train_dataloader):
        optimizer.zero_grad()

        outputs = model(text)

        loss = criterion(outputs, label)

        loss.backward()

        optimizer.step()

        iter += 1

        if iter % 100 == 0:
            # Calculate Accuracy
            correct = 0
            total = 0
            # Iterate through test dataset
            for idx, (text, attention, labels) in enumerate(ATIS_test_dataloader):

                outputs = model(text)

                _, predicted = torch.max(outputs.data, 1)

                total += labels.size(0)

                correct += (predicted == labels).sum()

            accuracy = 100 * correct / total

            # Print Loss
            print('Iteration: {}. Loss: {}. Accuracy: {}'.format(iter, loss.item(), accuracy))

Iteration: 100. Loss: 2.8807425498962402. Accuracy: 4.3673014640808105
Iteration: 200. Loss: 2.823482036590576. Accuracy: 4.3673014640808105
Iteration: 300. Loss: 2.771899700164795. Accuracy: 69.87682342529297


#BiLSTM Model - SNIPS

In [None]:
from torchtext.vocab import Vocab
from torchtext.data.utils import get_tokenizer
from collections import Counter

In [None]:
tokenizer = get_tokenizer('basic_english')
counter = Counter()
for text in SNIPS_train_sentences:
    counter.update(tokenizer(text))
vocab = Vocab(counter, min_freq=1)

In [None]:
pad_len = 75
pad_token = '<pad>'
vocab_id = lambda x: vocab[x]
text_pipeline = lambda x: [vocab[i] for i in (tokenizer(x)+ [pad_token]*pad_len)[:pad_len]]

In [None]:
mini_train_set = partial_training_set(250)

In [None]:
def collate_batch(batch):
    label_list, text_list = [], []
    for (_text, _label) in batch:
         label_list.append(d[_label])
         processed_text = torch.unsqueeze(torch.tensor(text_pipeline(_text), dtype=torch.int64), 0)

         text_list.append(processed_text)

    label_list = torch.tensor(label_list, dtype=torch.int64)
    text_list = torch.cat(text_list, dim=0)

    return label_list, text_list

train_dataloader = DataLoader(list(mini_train_set), batch_size=8, shuffle=True, collate_fn=collate_batch)
validation_dataloader = DataLoader(list(SNIPS_valid_examples), batch_size=8, shuffle=False, collate_fn=collate_batch)
test_dataloader = DataLoader(list(SNIPS_test_examples), batch_size=8, shuffle=False, collate_fn=collate_batch)

AttributeError: ignored

In [None]:
def vocab_dataloader(examples):
  input_ids = []
  labels = []
  attention_masks = []
  for example in examples:
    sentence= example[0]
    sentence = text_pipeline(sentence)
    attention = [1] * len(sentence)
    for p in range(MAX_LEN - len(sentence)):
      sentence.append(1) #1 OR 0
      attention.append(0)
    input_ids.append(torch.LongTensor([sentence]))
    labels.append(torch.LongTensor([d[example[1]]]))
    attention_masks.append(torch.LongTensor([attention]))

  #print(input_ids)
  # print(attention_masks)
  # print(labels)
  input_ids = torch.cat(input_ids, 0)
  labels = torch.cat(labels, 0)
  attention_masks = torch.cat(attention_masks, 0)


  print(input_ids.size())
  print(attention_masks.size())
  print(labels.size())
  dataset = TensorDataset(input_ids, attention_masks, labels)
  return dataset

In [None]:
MAX_LEN = 75
SNIPS_train_dataloader = vocab_dataloader(partial_training_set(250))
SNIPS_test_dataloader = vocab_dataloader(SNIPS_test_examples)
SNIPS_valid_dataloader = vocab_dataloader(SNIPS_valid_examples)

torch.Size([1750, 75])
torch.Size([1750, 75])
torch.Size([1750])
torch.Size([700, 75])
torch.Size([700, 75])
torch.Size([700])
torch.Size([700, 75])
torch.Size([700, 75])
torch.Size([700])


In [None]:
matrix_len = len(glove.keys())
glove_dim = len(list(glove.values())[0])

weights_matrix = np.zeros((len(vocab), len(glove["the"])))
for word in range(len(vocab)):
  if vocab.itos[word] in glove:
    weights_matrix[word] =  glove[vocab.itos[word]]

In [None]:
def train(model, dataloader, criterion, optimizer, epoch):
    model.train()
    total_acc, total_count = 0, 0
    log_interval = 100
    start_time = time.time()

    for idx, (label, text) in tqdm(enumerate(dataloader), total=len(dataloader)):
        optimizer.zero_grad()
        predicted_label = model(text)
        loss = criterion(predicted_label, label.float())
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
        optimizer.step()
        total_acc += (predicted_label * label > 0).sum().item()
        total_count += label.size(0)
        if idx % log_interval == 0 and idx > 0:
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches '
                  '| train accuracy {:8.3f}'.format(epoch, idx, len(dataloader),
                                              total_acc/total_count))
            total_acc, total_count = 0, 0
            start_time = time.time()

def evaluate(model, dataloader, criterion):
    model.eval()
    total_acc, total_count = 0, 0

    with torch.no_grad():
        for idx, (label, text) in enumerate(dataloader):
            predicted_label = model(text)
            loss = criterion(predicted_label, label)
            total_acc += (predicted_label * label > 0).sum().item()
            total_count += label.size(0)
    return total_acc/total_count

def train_loop(model, criterion):
    # Hyperparameters
    EPOCHS = 15 # epoch
    LR = 5  # learning rate
    BATCH_SIZE = 64

    optimizer = torch.optim.SGD(model.parameters(), lr=LR)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)
    total_accu = None

    train_dataset = list(mini_train_set)
    test_dataset = list(SNIPS_test_examples)
    num_train = int(len(train_dataset) * 0.95)

    for epoch in range(1, EPOCHS + 1):
        epoch_start_time = time.time()

        train(model, train_dataloader, criterion, optimizer, epoch)
        accu_val = evaluate(model, validation_dataloader, criterion)
        if total_accu is not None and total_accu > accu_val:
          scheduler.step()
        else:
           total_accu = accu_val
        print('-' * 59)
        print('| end of epoch {:3d} | time: {:5.2f}s | '
              'valid accuracy {:8.3f} '.format(epoch,
                                               time.time() - epoch_start_time,
                                               accu_val))
        print('-' * 59)

In [None]:
class BiLSTMModel(torch.nn.Module):

    def __init__(self):
        super(BiLSTMModel, self).__init__()
        # Your code here
        self.glove_emb, num_embeddings, embedding_dim = glove_emb_layer(weights_matrix, trainable = True)
        self.LSTM = torch.nn.LSTM(glove_dim, glove_dim, num_layers=1, batch_first=True, bidirectional=True)
        self.output = torch.nn.Linear(300, 1)
    def forward(self, text):
        input = self.glove_emb(text)
        output, (h_n, c_n) = self.LSTM(input)
        avg = torch.mean(h_n, 0)
        output = self.output(avg)
        final_output = output.squeeze()
        return final_output


In [None]:
criterion = torch.nn.MSELoss()
vocab_size = len(vocab)
emsize = 128
bilstm_model = BiLSTMModel()

train_loop(bilstm_model, criterion)


  0%|          | 0/219 [00:00<?, ?it/s]

| epoch   1 |   100/  219 batches | train accuracy    0.839
| epoch   1 |   200/  219 batches | train accuracy    0.874
-----------------------------------------------------------
| end of epoch   1 | time: 59.56s | valid accuracy    0.857 
-----------------------------------------------------------


  0%|          | 0/219 [00:00<?, ?it/s]

| epoch   2 |   100/  219 batches | train accuracy    0.850
| epoch   2 |   200/  219 batches | train accuracy    0.859
-----------------------------------------------------------
| end of epoch   2 | time: 57.99s | valid accuracy    0.857 
-----------------------------------------------------------


  0%|          | 0/219 [00:00<?, ?it/s]

| epoch   3 |   100/  219 batches | train accuracy    0.861
| epoch   3 |   200/  219 batches | train accuracy    0.856
-----------------------------------------------------------
| end of epoch   3 | time: 57.92s | valid accuracy    0.857 
-----------------------------------------------------------


  0%|          | 0/219 [00:00<?, ?it/s]

| epoch   4 |   100/  219 batches | train accuracy    0.853
| epoch   4 |   200/  219 batches | train accuracy    0.864
-----------------------------------------------------------
| end of epoch   4 | time: 56.86s | valid accuracy    0.857 
-----------------------------------------------------------


  0%|          | 0/219 [00:00<?, ?it/s]

| epoch   5 |   100/  219 batches | train accuracy    0.848
| epoch   5 |   200/  219 batches | train accuracy    0.861
-----------------------------------------------------------
| end of epoch   5 | time: 57.87s | valid accuracy    0.857 
-----------------------------------------------------------


  0%|          | 0/219 [00:00<?, ?it/s]

| epoch   6 |   100/  219 batches | train accuracy    0.876
| epoch   6 |   200/  219 batches | train accuracy    0.848
-----------------------------------------------------------
| end of epoch   6 | time: 59.49s | valid accuracy    0.857 
-----------------------------------------------------------


  0%|          | 0/219 [00:00<?, ?it/s]

| epoch   7 |   100/  219 batches | train accuracy    0.860
| epoch   7 |   200/  219 batches | train accuracy    0.856
-----------------------------------------------------------
| end of epoch   7 | time: 56.92s | valid accuracy    0.857 
-----------------------------------------------------------


  0%|          | 0/219 [00:00<?, ?it/s]

| epoch   8 |   100/  219 batches | train accuracy    0.863
| epoch   8 |   200/  219 batches | train accuracy    0.859
-----------------------------------------------------------
| end of epoch   8 | time: 57.36s | valid accuracy    0.857 
-----------------------------------------------------------


  0%|          | 0/219 [00:00<?, ?it/s]

| epoch   9 |   100/  219 batches | train accuracy    0.859
| epoch   9 |   200/  219 batches | train accuracy    0.855
-----------------------------------------------------------
| end of epoch   9 | time: 57.10s | valid accuracy    0.857 
-----------------------------------------------------------


  0%|          | 0/219 [00:00<?, ?it/s]

| epoch  10 |   100/  219 batches | train accuracy    0.866
| epoch  10 |   200/  219 batches | train accuracy    0.846
-----------------------------------------------------------
| end of epoch  10 | time: 56.84s | valid accuracy    0.857 
-----------------------------------------------------------


  0%|          | 0/219 [00:00<?, ?it/s]

| epoch  11 |   100/  219 batches | train accuracy    0.845
| epoch  11 |   200/  219 batches | train accuracy    0.864
-----------------------------------------------------------
| end of epoch  11 | time: 56.90s | valid accuracy    0.857 
-----------------------------------------------------------


  0%|          | 0/219 [00:00<?, ?it/s]

| epoch  12 |   100/  219 batches | train accuracy    0.870
| epoch  12 |   200/  219 batches | train accuracy    0.850
-----------------------------------------------------------
| end of epoch  12 | time: 59.03s | valid accuracy    0.857 
-----------------------------------------------------------


  0%|          | 0/219 [00:00<?, ?it/s]

| epoch  13 |   100/  219 batches | train accuracy    0.861
| epoch  13 |   200/  219 batches | train accuracy    0.855
-----------------------------------------------------------
| end of epoch  13 | time: 75.18s | valid accuracy    0.857 
-----------------------------------------------------------


  0%|          | 0/219 [00:00<?, ?it/s]

| epoch  14 |   100/  219 batches | train accuracy    0.845
| epoch  14 |   200/  219 batches | train accuracy    0.865
-----------------------------------------------------------
| end of epoch  14 | time: 57.69s | valid accuracy    0.857 
-----------------------------------------------------------


  0%|          | 0/219 [00:00<?, ?it/s]

| epoch  15 |   100/  219 batches | train accuracy    0.855
| epoch  15 |   200/  219 batches | train accuracy    0.864
-----------------------------------------------------------
| end of epoch  15 | time: 59.26s | valid accuracy    0.857 
-----------------------------------------------------------


In [None]:
evaluate(bilstm_model, test_dataloader, criterion)

0.8771428571428571

In [None]:

def train(model, dataloader, criterion, optimizer, epoch):
    model.train()
    total_acc, total_count = 0, 0
    log_interval = 100
    start_time = time.time()

    for idx, (label, attention, text) in tqdm(enumerate(dataloader), total=len(dataloader)):
        optimizer.zero_grad()
        predicted_label = model(text)
        loss = criterion(predicted_label, label.float())
        loss.backward()
        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
        optimizer.step()
        total_acc += (predicted_label * label > 0).sum().item()
        total_count += label.size(0)
        if idx % log_interval == 0 and idx > 0:
            elapsed = time.time() - start_time
            print('| epoch {:3d} | {:5d}/{:5d} batches '
                  '| train accuracy {:8.3f}'.format(epoch, idx, len(dataloader),
                                              total_acc/total_count))
            total_acc, total_count = 0, 0
            start_time = time.time()

def evaluate(model, dataloader, criterion):
    model.eval()
    total_acc, total_count = 0, 0

    with torch.no_grad():
        for idx, (label, attention, text) in enumerate(dataloader):
            predicted_label = model(text)
            loss = criterion(predicted_label, label)
            total_acc += (predicted_label * label > 0).sum().item()
            total_count += label.size(0)
    return total_acc/total_count

def train_loop(model, criterion):
    # Hyperparameters
    EPOCHS = 15 # epoch
    LR = 5  # learning rate
    BATCH_SIZE = 64

    optimizer = torch.optim.SGD(model.parameters(), lr=LR)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)
    total_accu = None

    train_dataset = list(SNIPS_train_dataloader)
    test_dataset = list(SNIPS_test_dataloader)
    num_train = int(len(train_dataset) * 0.95)

    for epoch in range(1, EPOCHS + 1):
        epoch_start_time = time.time()

        train(model, SNIPS_train_dataloader, criterion, optimizer, epoch)
        accu_val = evaluate(model, SNIPS_valid_dataloader, criterion)
        if total_accu is not None and total_accu > accu_val:
          scheduler.step()
        else:
           total_accu = accu_val
        print('-' * 59)
        print('| end of epoch {:3d} | time: {:5.2f}s | '
              'valid accuracy {:8.3f} '.format(epoch,
                                               time.time() - epoch_start_time,
                                               accu_val))
        print('-' * 59)

In [None]:
criterion = torch.nn.MSELoss()
vocab_size = len(vocab)
emsize = 128
bilstm_model = BiLSTMModel()

In [None]:
class BiLSTMModel(torch.nn.Module):

    def __init__(self):
        super(BiLSTMModel, self).__init__()
        self.glove_emb, num_embeddings, embedding_dim = glove_emb_layer(weights_matrix, trainable = True)
        self.LSTM = torch.nn.LSTM(glove_dim, glove_dim, num_layers=1, batch_first=True, bidirectional=True)
        self.output = torch.nn.Linear(300, 1)
    def forward(self, text):
        input = self.glove_emb(text)
        output, (h_n, c_n) = self.LSTM(input)
        avg = torch.mean(h_n, 0)
        output = self.output(avg)
        final_output = output.squeeze()
        return final_output


In [None]:
for idx, (text, attention, label) in tqdm(enumerate(SNIPS_train_dataloader), total=len(SNIPS_train_dataloader)):
  bilstm_model(text)

  0%|          | 0/1750 [00:00<?, ?it/s]

RuntimeError: ignored

In [None]:


train_loop(bilstm_model, criterion, )

  0%|          | 0/1750 [00:00<?, ?it/s]

RuntimeError: ignored