# LSTM Intent Classifier (No TorchText)
This notebook trains an LSTM model for intent classification without using torchtext.

In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from collections import Counter
import time
import re
import os
import joblib
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

# Tokenizer function
def simple_tokenizer(text):
    return re.findall(r'\b\w+\b', text.lower())

start_time = time.time()

# Load dataset
df = pd.read_csv("../../intent_dataset.csv")
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['intent'])
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)


In [None]:
# Build vocabulary
all_tokens = [token for text in X_train for token in simple_tokenizer(text)]
counter = Counter(all_tokens)
vocab = {"<pad>": 0, "<unk>": 1}
vocab.update({word: idx+2 for idx, (word, _) in enumerate(counter.items())})

def encode(text):
    return [vocab.get(token, vocab["<unk>"]) for token in simple_tokenizer(text)]


In [None]:
# Create Dataset and DataLoader
class IntentDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = [torch.tensor(encode(text), dtype=torch.long) for text in texts]
        self.labels = torch.tensor(labels, dtype=torch.long)
    def __len__(self):
        return len(self.labels)
    def __getitem__(self, idx):
        return self.texts[idx], self.labels[idx]

def collate_batch(batch):
    texts, labels = zip(*batch)
    padded = pad_sequence(texts, batch_first=True, padding_value=vocab["<pad>"])
    return padded, torch.tensor(labels)

train_dataset = IntentDataset(X_train.tolist(), y_train.tolist())
test_dataset = IntentDataset(X_test.tolist(), y_test.tolist())
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, collate_fn=collate_batch)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, collate_fn=collate_batch)


In [None]:
# Define LSTM model
class LSTMClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, output_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
    def forward(self, x):
        x = self.embedding(x)
        _, (hn, _) = self.lstm(x)
        return self.fc(hn[-1])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LSTMClassifier(len(vocab), 64, 64, len(label_encoder.classes_)).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()


In [None]:
# Training loop
def train(model, loader):
    model.train()
    for texts, labels in loader:
        texts, labels = texts.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(texts)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()

for epoch in range(5):
    train(model, train_loader)
print("Training complete.")


In [None]:
# Evaluation
def evaluate(model, loader):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for texts, labels in loader:
            texts = texts.to(device)
            outputs = model(texts)
            preds = torch.argmax(outputs, dim=1).cpu()
            all_preds.extend(preds.tolist())
            all_labels.extend(labels.tolist())
    return all_labels, all_preds

y_true, y_pred = evaluate(model, test_loader)
print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))

cm = confusion_matrix(y_true, y_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_encoder.classes_)
disp.plot(xticks_rotation=45)
plt.title("Confusion Matrix")
plt.show()


In [None]:
# Save model and components
os.makedirs("../checkpoints/lstm", exist_ok=True)
torch.save(model.state_dict(), "../checkpoints/lstm/best_model.pt")
joblib.dump(vocab, "../checkpoints/lstm/vocab.pkl")
joblib.dump(label_encoder, "../checkpoints/lstm/label_encoder.pkl")
print("Model, vocab, and label encoder saved.")


In [None]:
# Runtime log
end_time = time.time()
print(f"Training completed in {end_time - start_time:.2f} seconds")


In [None]:
# Predict on custom input
def predict_intent(text):
    model.eval()
    encoded = torch.tensor(encode(text), dtype=torch.long).unsqueeze(0).to(device)
    with torch.no_grad():
        output = model(encoded)
        pred = torch.argmax(output, dim=1).item()
    return label_encoder.inverse_transform([pred])[0]

# Example
predict_intent("I would like to return my order please.")
