In [1]:
# Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

import numpy as np
from collections import Counter

In [2]:
# Load ATIS dataset
from datasets import load_dataset

ds = load_dataset("tuetschek/atis")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


atis_train.csv: 0.00B [00:00, ?B/s]

atis_test.csv: 0.00B [00:00, ?B/s]

Generating train split:   0%|          | 0/4978 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/893 [00:00<?, ? examples/s]

In [3]:
# Step 1: Extract text and intent labels
train_texts = list(ds["train"]["text"]) # Input Sentences
train_labels = list(ds["train"]["intent"]) # Correct intent for each sentence

test_texts = list(ds["test"]["text"])
test_labels = list(ds["test"]["intent"])

In [4]:
# Check 1 example after extracting text and intent labels
print(train_texts[0])
print(train_labels[0]) # result is a string but internally it is stored as a number

i want to fly from boston at 838 am and arrive in denver at 1110 in the morning
flight


In [5]:
# Step 2.1: Define special tokens
# Special tokens
PAD_TOKEN = "<PAD>"   # Used to pad shorter sentences
UNK_TOKEN = "<UNK>"   # Used for words not seen during training

In [6]:
# Step 2.2: Build vocabulary from training text
from collections import Counter

# Build vocabulary from training data only
word_counter = Counter() # count no. of times each word appears

for sentence in train_texts:
    words = sentence.lower().split() # Convert words to lowercase to reduce vocab size
    word_counter.update(words)

# Vocabulary is built from training data to avoid data leakage

In [7]:
# Step 2.3: Map each word to a unique integer
word2idx = {
    PAD_TOKEN: 0,   # Padding token
    UNK_TOKEN: 1    # Unknown word token
}

for word in word_counter:
    word2idx[word] = len(word2idx)

# Neural networks cannot process text
# Each word must be represented as a number
# Every word gets a unique integer ID

In [8]:
# Step 2.4: Convert text sentences into sequences of integers
def text_to_sequence(text, word2idx):
    return [
        word2idx.get(word, word2idx[UNK_TOKEN])
        for word in text.lower().split()
    ]

train_sequences = [text_to_sequence(t, word2idx) for t in train_texts]
test_sequences  = [text_to_sequence(t, word2idx) for t in test_texts]

# Each sentence becomes a list of numbers
# Unknown words are mapped to <UNK>
# This prepares the data for the LSTM

In [9]:
# Step 2.5: Pad or truncate sequences to fixed length
MAX_LEN = 30  # LSTM requires fixed-length input

def pad_sequence(seq, max_len):
    if len(seq) < max_len:
        return seq + [word2idx[PAD_TOKEN]] * (max_len - len(seq))
    else:
        return seq[:max_len]

train_padded = [pad_sequence(seq, MAX_LEN) for seq in train_sequences]
test_padded  = [pad_sequence(seq, MAX_LEN) for seq in test_sequences]

In [10]:
print(train_padded[0])
print(len(train_padded[0]))  # Should be 30

[2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 8, 15, 13, 16, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
30


In [11]:
# Step 3.1
# Encode intent labels into integers
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()

# Fit encoder on BOTH train + test labels
# This avoids "unseen label" errors
all_labels = train_labels + test_labels
label_encoder.fit(all_labels)

# Convert string labels to integers
train_labels_encoded = label_encoder.transform(train_labels)
test_labels_encoded  = label_encoder.transform(test_labels)

# Neural networks cannot process string labels
# Each intent is mapped to a unique integer
# LabelEncoder ensures consistent mapping across train and test sets

In [12]:
# Step 3.1 Example
print(train_labels[0], "→", train_labels_encoded[0])

flight → 14


In [13]:
# Step 3.2: Convert everything to PyTorch tensors
import torch

X_train = torch.tensor(train_padded, dtype=torch.long)
X_test  = torch.tensor(test_padded, dtype=torch.long)

y_train = torch.tensor(train_labels_encoded, dtype=torch.long)
y_test  = torch.tensor(test_labels_encoded, dtype=torch.long)


# dtype=torch.long is required for: Embedding layer and Classification loss functions
# Inputs = word indices
# Labels = class indices

In [14]:
# Step 3.3: Create a custom PyTorch dataset
from torch.utils.data import Dataset

class ATISDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Dataset stores inputs and labels together
# __getitem__ returns one (sentence, intent) pair
# Allows PyTorch to load data efficiently

In [15]:
# Step 3.4: Create train & test datasets
train_dataset = ATISDataset(X_train, y_train)
test_dataset  = ATISDataset(X_test, y_test)

In [16]:
# Step 3.4: Create DateLoader (for batching)
from torch.utils.data import DataLoader

BATCH_SIZE = 32

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False
)
# DataLoader handles batching automatically
# shuffle=True improves generalisation during training
# Makes training faster and cleaner

In [17]:
# Step 4.1: Define the model class
import torch.nn as nn

class IntentLSTM(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(IntentLSTM, self).__init__()

        # Convert word indices to dense vectors
        self.embedding = nn.Embedding(num_embeddings=vocab_size, embedding_dim=embedding_dim)

        # LSTM layer: input=embedding, output=hidden_dim
        self.lstm = nn.LSTM(input_size=embedding_dim, hidden_size=hidden_dim, batch_first=True)

        # Fully connected layer for intent classification
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # x shape: (batch_size, seq_len)
        x = self.embedding(x)  # (batch_size, seq_len, embedding_dim)

        # LSTM returns output for every timestamp and the final hidden state and cell state, only need final hidden state
        _, (hidden, _) = self.lstm(x)  # hidden shape: (1, batch_size, hidden_dim)

        # Take the last hidden state for classification and remove extra dimension
        hidden = hidden.squeeze(0)  # (batch_size, hidden_dim)

        # Fully connected layer
        out = self.fc(hidden)  # (batch_size, output_dim)
        return out
# nn.Embedding: maps word indices to 64-dimensional dense vectors
# nn.LSTM: reads the sequence, understands order & context
# nn.Linear: outputs a vector of size = number of intent classes
# forward(): defines how data moves through the model

In [18]:
# Step 4.2: Define hyperparameters & instantiate the model
VOCAB_SIZE = len(word2idx)  # total words + 1 (ensures embeddings for all words)
EMBEDDING_DIM = 64
HIDDEN_DIM = 64 # number of LSTM hidden units
OUTPUT_DIM = len(label_encoder.classes_)  # number of unique intents (final prediction)

# Instantiate the model
model = IntentLSTM(VOCAB_SIZE, EMBEDDING_DIM, HIDDEN_DIM, OUTPUT_DIM)

In [19]:
# Step 4.3: Define loss & optimizer
import torch.optim as optim

# Cross-entropy loss for multi-class classification
criterion = nn.CrossEntropyLoss()

# Adam optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001) # adaptive optimizer, widely used for NLP

In [20]:
# Step 5 - Train the LSTM Model in PyTorch
# Step 5.1: Convert data to PyTorch tensors
import torch

# Convert padded sequences to torch tensors (LongTensor for indices)
X_train_tensor = torch.LongTensor(train_padded)
X_test_tensor  = torch.LongTensor(test_padded)

# Convert labels to torch tensors
y_train_tensor = torch.LongTensor(train_labels_encoded)
y_test_tensor  = torch.LongTensor(test_labels_encoded)

# LongTensor is required because embedding layers expect integer word indices
# Labels also need to be tensors for CrossEntropyLoss

In [21]:
# Step 5.2: Create DataLoader for batching
from torch.utils.data import TensorDataset, DataLoader

# Combine inputs and labels into a dataset
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset  = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoader for mini-batch training
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader  = DataLoader(test_dataset, batch_size=32)

# TensorDataset groups inputs + labels
# DataLoader provides mini-batches for training
# shuffle=True ensures training data is randomized each epoch

In [22]:
# Step 5.3: Training loop
# Number of epochs
EPOCHS = 10

# Set the model to training mode
model.train()

for epoch in range(EPOCHS):
    total_loss = 0
    correct = 0
    total = 0

    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()            # reset gradients
        outputs = model(X_batch)         # forward pass
        loss = criterion(outputs, y_batch)  # compute loss
        loss.backward()                  # backward pass
        optimizer.step()                 # update weights

        total_loss += loss.item()        # accumulate loss, used for logging process

        # Compute accuracy
        _, predicted = torch.max(outputs, 1) # Pick the intent with the highest score
        correct += (predicted == y_batch).sum().item() # Count correct predictions
        total += y_batch.size(0) # Count total samples

    epoch_loss = total_loss / len(train_loader)
    epoch_acc  = correct / total
    print(f"Epoch {epoch+1}/{EPOCHS} - Loss: {epoch_loss:.4f} - Accuracy: {epoch_acc:.4f}")

Epoch 1/10 - Loss: 1.2997 - Accuracy: 0.7252
Epoch 2/10 - Loss: 1.1406 - Accuracy: 0.7364
Epoch 3/10 - Loss: 1.1381 - Accuracy: 0.7366
Epoch 4/10 - Loss: 1.1365 - Accuracy: 0.7368
Epoch 5/10 - Loss: 1.1348 - Accuracy: 0.7364
Epoch 6/10 - Loss: 1.1336 - Accuracy: 0.7366
Epoch 7/10 - Loss: 1.1314 - Accuracy: 0.7370
Epoch 8/10 - Loss: 0.9910 - Accuracy: 0.7435
Epoch 9/10 - Loss: 0.7165 - Accuracy: 0.7977
Epoch 10/10 - Loss: 0.5927 - Accuracy: 0.8479


In [23]:
# Step 6: Evaluate the model
# Step 6.1: Convert test data and labels to PyTorch tensors

# Convert padded sequences to tensors (because PyTorch works with tensors, not lists)
X_test_tensor = torch.tensor(test_padded, dtype=torch.long)   # Input sequences
y_test_tensor = torch.tensor(test_labels_encoded, dtype=torch.long)  # Encoded labels

In [24]:
# Step 6.2: Put the model in evaluation mode
model.eval()

IntentLSTM(
  (embedding): Embedding(891, 64)
  (lstm): LSTM(64, 64, batch_first=True)
  (fc): Linear(in_features=64, out_features=26, bias=True)
)

In [25]:
# Step 6.3: Make Predictions
with torch.no_grad():  # Disable gradient calculation for faster evaluation
    outputs = model(X_test_tensor)       # Forward pass
    predicted_labels = torch.argmax(outputs, dim=1)  # Pick class with highest probability

In [27]:
# Step 6.4: Compute metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Convert tensors to numpy arrays for sklearn
y_true = y_test_tensor.numpy()
y_pred = predicted_labels.numpy()

# Accuracy, precision, recall and f1-score
acc  = accuracy_score(y_true, y_pred)
prec = precision_score(y_true, y_pred, average='macro')
rec  = recall_score(y_true, y_pred, average='macro')
f1   = f1_score(y_true, y_pred, average='macro')

# Confusion matrix
cm = confusion_matrix(y_true, y_pred)

print(f"Accuracy:  {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall:    {rec:.4f}")
print(f"F1-score:  {f1:.4f}")
print("Confusion Matrix:")
print(cm)

Accuracy:  0.8208
Precision: 0.1444
Recall:    0.1872
F1-score:  0.1597
Confusion Matrix:
[[ 26   0   0   0   6   0   0   0   0   0   1   0   0   0   0   0   0   0
    0   0]
 [  2   0   3   0   0   0   0   0   0   0   0   0   0   0   0   0   0   4
    0   0]
 [  1   0  43   0   0   0   0   0   0   0   2   0   0   0   0   0   0   2
    0   0]
 [  0   0   1   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
    0   0]
 [ 21   0   2   0   8   0   0   0   0   0   6   0   0   0   0   0   0   1
    0   0]
 [  4   0   1   0   9   0   0   0   0   0   4   0   0   0   0   0   0   0
    0   0]
 [ 11   0   0   0   3   0   0   0   0   0   1   0   0   0   0   0   0   6
    0   0]
 [  1   0   0   0   0   0   0   0   0   0   5   0   0   0   0   0   0   0
    0   0]
 [  0   0   0   0   0   0   0   0   0   0   2   0   0   0   0   0   0   0
    0   0]
 [  2   0   0   0   0   0   0   0   0   0   6   0   0   0   0   0   0   2
    0   0]
 [  2   0   3   0   2   0   0   0   0   0 625   0   0   0   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [28]:
# Step 6.5: Misclassification analysis
misclassified_idx = np.where(y_true != y_pred)[0] # find misclassified samples where the correct labels and the model predicted labels are different (misclassified)

print("\nSample Misclassified Examples (max 10):")
for i in misclassified_idx[:10]:
    print(f"Text: {test_texts[i]}") # original user query
    print(f"True Label: {label_encoder.inverse_transform([y_true[i]])[0]}") # Convert label back to text (for readability purposes)
    print(f"Predicted Label: {label_encoder.inverse_transform([y_pred[i]])[0]}")
    print("------")


Sample Misclassified Examples (max 10):
Text: show flight and prices kansas city to chicago on next wednesday arriving in chicago by 7 pm
True Label: flight+airfare
Predicted Label: flight
------
Text: does tacoma airport offer transportation from the airport to the downtown area
True Label: ground_service
Predicted Label: airfare
------
Text: what day of the week do flights from nashville to tacoma fly on
True Label: day_name
Predicted Label: flight
------
Text: what days of the week do flights from san jose to nashville fly on
True Label: day_name
Predicted Label: flight
------
Text: what meals are served on american flight 811 from tampa to milwaukee
True Label: meal
Predicted Label: flight
------
Text: what meals are served on american flight 665 673 from milwaukee to seattle
True Label: meal
Predicted Label: flight
------
Text: i would like to know what airports are in los angeles
True Label: airport
Predicted Label: abbreviation
------
Text: does the airport at burbank have a fl