# Evaluation Summary: Intent Classification Models
Compare performance metrics of all trained models.

In [None]:
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay

# Load test data and label encoder
df = pd.read_csv("../../intent_dataset.csv")
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
label_encoder = LabelEncoder()
df['label'] = label_encoder.fit_transform(df['intent'])
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], test_size=0.2, random_state=42)


In [None]:
# Utility function to generate classification report
def display_metrics(name, y_true, y_pred):
    print(f"\n--- {name} ---")
    print(classification_report(y_true, y_pred, target_names=label_encoder.classes_))
    cm = confusion_matrix(y_true, y_pred)
    disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_encoder.classes_)
    disp.plot(xticks_rotation=45)
    plt.title(f"Confusion Matrix - {name}")
    plt.show()


In [None]:
# Load and evaluate Logistic Regression
logreg_model = joblib.load("../checkpoints/logreg/best_model.pkl")
logreg_vectorizer = joblib.load("../checkpoints/logreg/vectorizer.pkl")
y_pred_logreg = logreg_model.predict(logreg_vectorizer.transform(X_test))
display_metrics("Logistic Regression", y_test, y_pred_logreg)


In [None]:
# Load and evaluate SVM
svm_model = joblib.load("../checkpoints/svm/best_model.pkl")
svm_vectorizer = joblib.load("../checkpoints/svm/vectorizer.pkl")
y_pred_svm = svm_model.predict(svm_vectorizer.transform(X_test))
display_metrics("SVM", y_test, y_pred_svm)


In [None]:
# Load and evaluate LSTM
import torch
from torchtext.data.utils import get_tokenizer
from torch.nn.utils.rnn import pad_sequence
from torchtext.vocab import build_vocab_from_iterator

class LSTMClassifier(torch.nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_dim, output_dim):
        super().__init__()
        self.embedding = torch.nn.Embedding(vocab_size, embed_dim)
        self.lstm = torch.nn.LSTM(embed_dim, hidden_dim, batch_first=True)
        self.fc = torch.nn.Linear(hidden_dim, output_dim)
    def forward(self, x):
        x = self.embedding(x)
        _, (hn, _) = self.lstm(x)
        return self.fc(hn[-1])

tokenizer = get_tokenizer("basic_english")
def yield_tokens(data_iter):
    for text in data_iter:
        yield tokenizer(text)

vocab = build_vocab_from_iterator(yield_tokens(X_train), specials=["<pad>"])
vocab.set_default_index(vocab["<pad>"])

def lstm_pipeline(texts):
    text_tensor = [torch.tensor(vocab(tokenizer(t)), dtype=torch.long) for t in texts]
    return pad_sequence(text_tensor, batch_first=True, padding_value=vocab["<pad>"])

model_lstm = LSTMClassifier(len(vocab), 64, 64, len(label_encoder.classes_))
model_lstm.load_state_dict(torch.load("../checkpoints/lstm/best_model.pt", map_location=torch.device("cpu")))
model_lstm.eval()

X_test_lstm = lstm_pipeline(X_test)
with torch.no_grad():
    logits = model_lstm(X_test_lstm)
y_pred_lstm = torch.argmax(logits, axis=1).numpy()
display_metrics("LSTM", y_test, y_pred_lstm)


In [None]:
# Load and evaluate DistilBERT
from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification

tokenizer_bert = DistilBertTokenizerFast.from_pretrained("../checkpoints/bert")
model_bert = DistilBertForSequenceClassification.from_pretrained("../checkpoints/bert")
model_bert.eval()

X_test_bert = tokenizer_bert(list(X_test), return_tensors='pt', padding=True, truncation=True)
with torch.no_grad():
    outputs = model_bert(**X_test_bert)
y_pred_bert = torch.argmax(outputs.logits, axis=1).numpy()
display_metrics("DistilBERT", y_test, y_pred_bert)
