In [43]:

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence
from datasets import load_dataset
from collections import Counter
import numpy as np
from sklearn.metrics import classification_report, accuracy_score
import copy
import warnings
import os 

warnings.filterwarnings("ignore", category=UserWarning)


class Config:
    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    BATCH_SIZE = 32
    EMBEDDING_DIM = 100
    HIDDEN_DIM = 128
    NUM_EPOCHS = 10
    LEARNING_RATE = 0.001
    PAD_TOKEN = "<pad>"
    UNK_TOKEN = "<unk>"
    SAVE_DIR = "saved_models"
    os.makedirs(SAVE_DIR, exist_ok=True) 

print(f"Using device: {Config.DEVICE}")
print(f"Models will be saved: {Config.SAVE_DIR}")

Using device: cpu
Models will be saved: saved_models


In [44]:

class ScratchLSTMCell(nn.Module):
    
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.W = nn.Linear(input_size + hidden_size, 4 * hidden_size, bias=True)

    def forward(self, x, states):
        h_prev, c_prev = states
        combined = torch.cat([x, h_prev], dim=1)
        gates = self.W(combined)
        
    
        i, f, g, o = torch.chunk(gates, 4, dim=1)
        
        c_next = torch.sigmoid(f) * c_prev + torch.sigmoid(i) * torch.tanh(g)
        h_next = torch.sigmoid(o) * torch.tanh(c_next)
        return h_next, c_next

class ScratchLSTM(nn.Module):
    
    def __init__(self, input_size, hidden_size, bidirectional=False):
        super().__init__()
        self.hidden_size = hidden_size
        self.bidirectional = bidirectional
        self.forward_cell = ScratchLSTMCell(input_size, hidden_size)
        if self.bidirectional:
            self.backward_cell = ScratchLSTMCell(input_size, hidden_size)

    def forward(self, x):
        batch_size, seq_len, _ = x.shape
        h0 = torch.zeros(batch_size, self.hidden_size).to(x.device)
        c0 = torch.zeros(batch_size, self.hidden_size).to(x.device)
        
        
        outputs_fwd = []
        h_t, c_t = (h0, c0)
        for t in range(seq_len):
            h_t, c_t = self.forward_cell(x[:, t, :], (h_t, c_t))
            outputs_fwd.append(h_t)
        final_h_fwd, final_c_fwd = h_t, c_t
        outputs_fwd = torch.stack(outputs_fwd, dim=1)

        if not self.bidirectional:
            return outputs_fwd, (final_h_fwd.unsqueeze(0), final_c_fwd.unsqueeze(0))

        
        outputs_bwd = []
        h_t, c_t = (h0, c0)
        for t in reversed(range(seq_len)):
            h_t, c_t = self.backward_cell(x[:, t, :], (h_t, c_t))
            outputs_bwd.insert(0, h_t)
        final_h_bwd, final_c_bwd = h_t, c_t
        outputs_bwd = torch.stack(outputs_bwd, dim=1)

        outputs = torch.cat([outputs_fwd, outputs_bwd], dim=2)
        final_h = torch.stack([final_h_fwd, final_h_bwd], dim=0)
        final_c = torch.stack([final_c_fwd, final_c_bwd], dim=0)
        return outputs, (final_h, final_c)

print("Scratch LSTM implementation is defined.")

Scratch LSTM implementation is defined.


In [45]:

def load_and_preprocess_data():
    """Load ATIS dataset with proper slot alignment"""
    dataset = load_dataset("tuetschek/atis")
    train_data, test_data = dataset['train'], dataset['test']
    
    
    word_counter = Counter()
    intent_labels = set()
    slot_labels = set(['O'])
    
    
    for split_data in [train_data, test_data]:
        for item in split_data:
            tokens = item['text'].split()
            word_counter.update([token.lower() for token in tokens])
            intent_labels.add(item['intent'])
            
            
            if item['slots']:
                slots = item['slots'].split()
                slot_labels.update(slots)
    
    
    word_vocab = {Config.PAD_TOKEN: 0, Config.UNK_TOKEN: 1}
    for i, word in enumerate(word_counter, start=2):
        word_vocab[word] = i
    
    intent_vocab = {intent: i for i, intent in enumerate(intent_labels)}
    slot_vocab = {slot: i for i, slot in enumerate(sorted(slot_labels))}
    
    
    train_dataset = ATISDataset(train_data, word_vocab, intent_vocab, slot_vocab)
    test_dataset = ATISDataset(test_data, word_vocab, intent_vocab, slot_vocab)
    
    train_loader = DataLoader(train_dataset, batch_size=Config.BATCH_SIZE, shuffle=True, 
                            collate_fn=collate_fn)
    test_loader = DataLoader(test_dataset, batch_size=Config.BATCH_SIZE, 
                           collate_fn=collate_fn)
    
    return train_loader, test_loader, {
        "word": word_vocab, "intent": intent_vocab, "slot": slot_vocab,
        "idx_to_intent": {i: intent for intent, i in intent_vocab.items()},
        "idx_to_slot": {i: slot for slot, i in slot_vocab.items()}
    }

class ATISDataset(Dataset):
    def __init__(self, data, word_vocab, intent_vocab, slot_vocab):
        self.data = data
        self.word_vocab = word_vocab
        self.intent_vocab = intent_vocab
        self.slot_vocab = slot_vocab
        
    def __len__(self): 
        return len(self.data)
    
    def __getitem__(self, idx):
        item = self.data[idx]
        tokens = item['text'].split()
        
        
        token_indices = [self.word_vocab.get(token.lower(), self.word_vocab[Config.UNK_TOKEN]) 
                        for token in tokens]
        intent_idx = self.intent_vocab[item['intent']]
        
        
        if item['slots']:
            slot_labels = [self.slot_vocab[slot] for slot in item['slots'].split()]
        else:
            slot_labels = [self.slot_vocab['O']] * len(tokens)
        
        return {
            "tokens": torch.LongTensor(token_indices),
            "intent": intent_idx,
            "slots": torch.LongTensor(slot_labels)
        }

def collate_fn(batch):
    tokens = [item['tokens'] for item in batch]
    intents = torch.LongTensor([item['intent'] for item in batch])
    slots = [item['slots'] for item in batch]
    
    padded_tokens = pad_sequence(tokens, batch_first=True, padding_value=0)
    padded_slots = pad_sequence(slots, batch_first=True, padding_value=0)
    
    return {"tokens": padded_tokens, "intents": intents, "slots": padded_slots}

In [46]:

class IntentClassifier(nn.Module):
    def __init__(self, vocab_size, intent_size, emb_dim, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
        self.recurrent_layer = ScratchLSTM(emb_dim, hidden_dim, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, intent_size)
        
    def forward(self, tokens):
        embedded = self.embedding(tokens)
        _, (hidden, _) = self.recurrent_layer(embedded)
        hidden = torch.cat((hidden[0], hidden[1]), dim=1)
        return self.fc(hidden)

class SlotFiller(nn.Module):
    def __init__(self, vocab_size, slot_size, emb_dim, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
        self.recurrent_layer = ScratchLSTM(emb_dim, hidden_dim, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, slot_size)
        
    def forward(self, tokens):
        embedded = self.embedding(tokens)
        recurrent_out, _ = self.recurrent_layer(embedded)
        return self.fc(recurrent_out)

class IntentClassifierWithSlots(nn.Module):
    def __init__(self, vocab_size, intent_size, slot_size, emb_dim, hidden_dim):
        super().__init__()
        self.word_embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
        self.slot_embedding = nn.Embedding(slot_size, emb_dim // 2, padding_idx=0)
        self.recurrent_layer = ScratchLSTM(emb_dim + emb_dim // 2, hidden_dim, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, intent_size)
        
    def forward(self, tokens, predicted_slots):
        word_embedded = self.word_embedding(tokens)
        slot_embedded = self.slot_embedding(predicted_slots)
        combined_embedded = torch.cat((word_embedded, slot_embedded), dim=2)
        _, (hidden, _) = self.recurrent_layer(combined_embedded)
        hidden = torch.cat((hidden[0], hidden[1]), dim=1)
        return self.fc(hidden)

class SlotFillerWithIntent(nn.Module):
    def __init__(self, vocab_size, intent_size, slot_size, emb_dim, hidden_dim):
        super().__init__()
        self.word_embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
        self.intent_embedding = nn.Embedding(intent_size, emb_dim // 4)
        self.recurrent_layer = ScratchLSTM(emb_dim + emb_dim // 4, hidden_dim, bidirectional=True)
        self.fc = nn.Linear(hidden_dim * 2, slot_size)
        
    def forward(self, tokens, predicted_intent):
        word_embedded = self.word_embedding(tokens)
        intent_embedded = self.intent_embedding(predicted_intent).unsqueeze(1)
        intent_embedded = intent_embedded.expand(-1, tokens.shape[1], -1)
        combined_embedded = torch.cat((word_embedded, intent_embedded), dim=2)
        recurrent_out, _ = self.recurrent_layer(combined_embedded)
        return self.fc(recurrent_out)

class JointModel(nn.Module):
    def __init__(self, vocab_size, intent_size, slot_size, emb_dim, hidden_dim):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, emb_dim, padding_idx=0)
        self.recurrent_layer = ScratchLSTM(emb_dim, hidden_dim, bidirectional=True)
        self.intent_fc = nn.Linear(hidden_dim * 2, intent_size)
        self.slot_fc = nn.Linear(hidden_dim * 2, slot_size)
        
    def forward(self, tokens):
        embedded = self.embedding(tokens)
        recurrent_out, (hidden, _) = self.recurrent_layer(embedded)
        final_hidden = torch.cat((hidden[0], hidden[1]), dim=1)
        return self.intent_fc(final_hidden), self.slot_fc(recurrent_out)

print("All five LSTM model architectures are defined.")


def load_intent_model(model_path, vocabs):
    
    checkpoint = torch.load(model_path, map_location=Config.DEVICE)
    model = IntentClassifier(
        checkpoint['vocab_size'],
        checkpoint['intent_size'],
        checkpoint['embedding_dim'],
        checkpoint['hidden_dim']
    ).to(Config.DEVICE)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    return model

def load_slot_model(model_path, vocabs):
    
    checkpoint = torch.load(model_path, map_location=Config.DEVICE)
    model = SlotFiller(
        checkpoint['vocab_size'],
        checkpoint['slot_size'],
        checkpoint['embedding_dim'],
        checkpoint['hidden_dim']
    ).to(Config.DEVICE)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    return model

def load_joint_model(model_path, vocabs):
    
    checkpoint = torch.load(model_path, map_location=Config.DEVICE)
    model = JointModel(
        checkpoint['vocab_size'],
        checkpoint['intent_size'],
        checkpoint['slot_size'],
        checkpoint['embedding_dim'],
        checkpoint['hidden_dim']
    ).to(Config.DEVICE)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    return model

def load_slot_to_intent_model(model_path, vocabs):
    
    checkpoint = torch.load(model_path, map_location=Config.DEVICE)
    model = IntentClassifierWithSlots(
        checkpoint['vocab_size'],
        checkpoint['intent_size'],
        checkpoint['slot_size'],
        checkpoint['embedding_dim'],
        checkpoint['hidden_dim']
    ).to(Config.DEVICE)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    return model

def load_intent_to_slot_model(model_path, vocabs):
    
    checkpoint = torch.load(model_path, map_location=Config.DEVICE)
    model = SlotFillerWithIntent(
        checkpoint['vocab_size'],
        checkpoint['intent_size'],
        checkpoint['slot_size'],
        checkpoint['embedding_dim'],
        checkpoint['hidden_dim']
    ).to(Config.DEVICE)
    model.load_state_dict(checkpoint['model_state_dict'])
    model.eval()
    return model

def demonstrate_model_loading(vocabs):
    """Show how to load saved models"""
    print("Loading saved models")
    
    try:
        intent_model = load_intent_model("saved_models/intent_model.pth", vocabs)
        slot_model = load_slot_model("saved_models/slot_model.pth", vocabs)
        joint_model = load_joint_model("saved_models/joint_model.pth", vocabs)
        slot_to_intent_model = load_slot_to_intent_model("saved_models/slot_to_intent_model.pth", vocabs)
        intent_to_slot_model = load_intent_to_slot_model("saved_models/intent_to_slot_model.pth", vocabs)
        
        print("All models loaded")
        return {
            'intent_model': intent_model,
            'slot_model': slot_model, 
            'joint_model': joint_model,
            'slot_to_intent_model': slot_to_intent_model,
            'intent_to_slot_model': intent_to_slot_model
        }
    except FileNotFoundError as e:
        print(f"Model files not found: {e}")
        print("run experiments first to train and save the models.")
        return None

print("Model loading functions are defined.")

All five LSTM model architectures are defined.
Model loading functions are defined.


In [47]:

def evaluate(model, loader, model_type, best_slot_model=None, best_intent_model=None):
    model.eval()
    results, all_preds, all_true = {}, [], []
    
    with torch.no_grad():
        if model_type == "intent":
            for batch in loader:
                outputs = model(batch['tokens'].to(Config.DEVICE))
                all_preds.extend(torch.argmax(outputs, dim=1).cpu().numpy())
                all_true.extend(batch['intents'].numpy())
            
            if all_true:  # Only compute if we have data
                report = classification_report(all_true, all_preds, output_dict=True, zero_division=0)
                results['intent_accuracy'] = accuracy_score(all_true, all_preds)
                results['intent_f1_macro'] = report['macro avg']['f1-score']
        
        elif model_type == "slot_to_intent":
            for batch in loader:
                tokens = batch['tokens'].to(Config.DEVICE)
                predicted_slots = torch.argmax(best_slot_model(tokens), dim=2)
                outputs = model(tokens, predicted_slots)
                all_preds.extend(torch.argmax(outputs, dim=1).cpu().numpy())
                all_true.extend(batch['intents'].numpy())
            
            if all_true:
                report = classification_report(all_true, all_preds, output_dict=True, zero_division=0)
                results['intent_accuracy'] = accuracy_score(all_true, all_preds)
                results['intent_f1_macro'] = report['macro avg']['f1-score']

        elif model_type == "slot" or model_type == "intent_to_slot":
            for batch in loader:
                tokens = batch['tokens'].to(Config.DEVICE)
                slots_true_np = batch['slots'].numpy()
                
                if model_type == "intent_to_slot":
                    predicted_intent = torch.argmax(best_intent_model(tokens), dim=1)
                    outputs = model(tokens, predicted_intent)
                else:
                    outputs = model(tokens)
                    
                slot_preds = torch.argmax(outputs, dim=2).cpu().numpy()
                
                for i in range(slots_true_np.shape[0]):
                    for j in range(slots_true_np.shape[1]):
                        if slots_true_np[i, j] != 0:  # Ignore padding
                            all_preds.append(slot_preds[i, j])
                            all_true.append(slots_true_np[i, j])
            
            if all_true:
                report = classification_report(all_true, all_preds, output_dict=True, zero_division=0)
                results['slot_accuracy'] = accuracy_score(all_true, all_preds)
                results['slot_f1_macro'] = report['macro avg']['f1-score']
        
        elif model_type == "joint":
            intent_preds, intent_true, slot_preds, slot_true = [], [], [], []
            
            for batch in loader:
                tokens = batch['tokens'].to(Config.DEVICE)
                intents_np, slots_np = batch['intents'].numpy(), batch['slots'].numpy()
                
                intent_logits, slot_logits = model(tokens)
                intent_preds.extend(torch.argmax(intent_logits, dim=1).cpu().numpy())
                intent_true.extend(intents_np)
                
                slot_preds_batch = torch.argmax(slot_logits, dim=2).cpu().numpy()
                for i in range(slots_np.shape[0]):
                    for j in range(slots_np.shape[1]):
                        if slots_np[i, j] != 0:  # Ignore padding
                            slot_preds.append(slot_preds_batch[i, j])
                            slot_true.append(slots_np[i, j])
            
            if intent_true:
                intent_report = classification_report(intent_true, intent_preds, output_dict=True, zero_division=0)
                results['intent_accuracy'] = accuracy_score(intent_true, intent_preds)
                results['intent_f1_macro'] = intent_report['macro avg']['f1-score']
            
            if slot_true:
                slot_report = classification_report(slot_true, slot_preds, output_dict=True, zero_division=0)
                results['slot_accuracy'] = accuracy_score(slot_true, slot_preds)
                results['slot_f1_macro'] = slot_report['macro avg']['f1-score']
    
    return results

def print_results_table(all_results):
    print("\n" + "="*85)
    print(" FINAL RESULTS SUMMARY (LSTM)")
    print("="*85)
    print(f"{'Experiment':<20} | {'Intent Accuracy':<15} | {'Intent F1':<15} | {'Slot Accuracy':<15} | {'Slot F1':<15}")
    print("-"*85)
    
    for name, res in all_results.items():
        intent_acc = f"{res.get('intent_accuracy', 0):.4f}" if 'intent_accuracy' in res else 'N/A'
        intent_f1 = f"{res.get('intent_f1_macro', 0):.4f}" if 'intent_f1_macro' in res else 'N/A'
        slot_acc = f"{res.get('slot_accuracy', 0):.4f}" if 'slot_accuracy' in res else 'N/A'
        slot_f1 = f"{res.get('slot_f1_macro', 0):.4f}" if 'slot_f1_macro' in res else 'N/A'
        
        print(f"{name:<20} | {intent_acc:<15} | {intent_f1:<15} | {slot_acc:<15} | {slot_f1:<15}")
    print("="*85)

print("Evaluation and results printing functions are defined.")


Evaluation and results printing functions are defined.


In [48]:

def run_all_experiments(train_loader, test_loader, vocabs):
    
    print(" Running experiments for : LSTM ")
    

    vocab_size = len(vocabs['word'])
    intent_size = len(vocabs['intent'])
    slot_size = len(vocabs['slot'])
    
    print(f"Model Parameters: vocab_size={vocab_size}, intent_size={intent_size}, slot_size={slot_size}")
    
    loss_fn_intent = nn.CrossEntropyLoss()
    loss_fn_slot = nn.CrossEntropyLoss(ignore_index=0)  # 0 is PAD index
    all_results = {}

    
    print(" EXPERIMENT 1: Independent Models")
    intent_model = IntentClassifier(vocab_size, intent_size, Config.EMBEDDING_DIM, Config.HIDDEN_DIM).to(Config.DEVICE)
    slot_model = SlotFiller(vocab_size, slot_size, Config.EMBEDDING_DIM, Config.HIDDEN_DIM).to(Config.DEVICE)
    
    
    print("  Training Intent Model...")
    optimizer = torch.optim.Adam(intent_model.parameters(), lr=Config.LEARNING_RATE)
    for epoch in range(Config.NUM_EPOCHS):
        intent_model.train()
        total_loss = 0
        for batch_idx, batch in enumerate(train_loader):
            optimizer.zero_grad()
            tokens = batch['tokens'].to(Config.DEVICE)
            intents = batch['intents'].to(Config.DEVICE)
            
            outputs = intent_model(tokens)
            loss = loss_fn_intent(outputs, intents)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
        if (epoch + 1) % 2 == 0:
            print(f"    [Intent] Epoch {epoch+1}/{Config.NUM_EPOCHS}, Loss: {total_loss/len(train_loader):.4f}")
    
    
    intent_model_path = os.path.join(Config.SAVE_DIR, "intent_model.pth")
    torch.save({
        'model_state_dict': intent_model.state_dict(),
        'vocab_size': vocab_size,
        'intent_size': intent_size,
        'embedding_dim': Config.EMBEDDING_DIM,
        'hidden_dim': Config.HIDDEN_DIM
    }, intent_model_path)
    print(f" Saved intent model to: {intent_model_path}")
    
    
    print(" Training Slot Model")
    optimizer = torch.optim.Adam(slot_model.parameters(), lr=Config.LEARNING_RATE)
    for epoch in range(Config.NUM_EPOCHS):
        slot_model.train()
        total_loss = 0
        for batch_idx, batch in enumerate(train_loader):
            optimizer.zero_grad()
            tokens = batch['tokens'].to(Config.DEVICE)
            slots = batch['slots'].to(Config.DEVICE)
            
            
            if tokens.shape != slots.shape:
                continue
                
            outputs = slot_model(tokens)
            
            
            batch_size, seq_len = slots.shape
            outputs_flat = outputs.contiguous().view(batch_size * seq_len, -1)
            slots_flat = slots.contiguous().view(-1)
            
            loss = loss_fn_slot(outputs_flat, slots_flat)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
        if (epoch + 1) % 2 == 0:
            print(f"    [Slot]   Epoch {epoch+1}/{Config.NUM_EPOCHS}, Loss: {total_loss/len(train_loader):.4f}")
    
    
    slot_model_path = os.path.join(Config.SAVE_DIR, "slot_model.pth")
    torch.save({
        'model_state_dict': slot_model.state_dict(),
        'vocab_size': vocab_size,
        'slot_size': slot_size,
        'embedding_dim': Config.EMBEDDING_DIM,
        'hidden_dim': Config.HIDDEN_DIM
    }, slot_model_path)
    print(f"Saved slot model to: {slot_model_path}")
    
    
    best_intent_model = copy.deepcopy(intent_model)
    best_slot_model = copy.deepcopy(slot_model)
    
    intent_results = evaluate(intent_model, test_loader, "intent")
    slot_results = evaluate(slot_model, test_loader, "slot")
    all_results['Independent'] = {**intent_results, **slot_results}

    
    print("\n EXPERIMENT 2: Slot -> Intent Pipeline")
    pipeline_model = IntentClassifierWithSlots(vocab_size, intent_size, slot_size, Config.EMBEDDING_DIM, Config.HIDDEN_DIM).to(Config.DEVICE)
    optimizer = torch.optim.Adam(pipeline_model.parameters(), lr=Config.LEARNING_RATE)
    
    for epoch in range(Config.NUM_EPOCHS):
        pipeline_model.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            tokens = batch['tokens'].to(Config.DEVICE)
            intents = batch['intents'].to(Config.DEVICE)
            
            with torch.no_grad():
                predicted_slots = torch.argmax(best_slot_model(tokens), dim=2)
            
            outputs = pipeline_model(tokens, predicted_slots)
            loss = loss_fn_intent(outputs, intents)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
        if (epoch + 1) % 2 == 0:
            print(f"    [S->I]   Epoch {epoch+1}/{Config.NUM_EPOCHS}, Loss: {total_loss/len(train_loader):.4f}")
    
    
    slot_to_intent_path = os.path.join(Config.SAVE_DIR, "slot_to_intent_model.pth")
    torch.save({
        'model_state_dict': pipeline_model.state_dict(),
        'vocab_size': vocab_size,
        'intent_size': intent_size,
        'slot_size': slot_size,
        'embedding_dim': Config.EMBEDDING_DIM,
        'hidden_dim': Config.HIDDEN_DIM
    }, slot_to_intent_path)
    print(f" Saved slot->intent model to: {slot_to_intent_path}")
    
    all_results['Slot -> Intent'] = evaluate(pipeline_model, test_loader, "slot_to_intent", best_slot_model=best_slot_model)

    
    print("\n EXPERIMENT 3: Intent -> Slot Pipeline")
    pipeline_model = SlotFillerWithIntent(vocab_size, intent_size, slot_size, Config.EMBEDDING_DIM, Config.HIDDEN_DIM).to(Config.DEVICE)
    optimizer = torch.optim.Adam(pipeline_model.parameters(), lr=Config.LEARNING_RATE)
    
    for epoch in range(Config.NUM_EPOCHS):
        pipeline_model.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            tokens = batch['tokens'].to(Config.DEVICE)
            slots = batch['slots'].to(Config.DEVICE)
            
            
            if tokens.shape != slots.shape:
                continue
                
            with torch.no_grad():
                predicted_intent = torch.argmax(best_intent_model(tokens), dim=1)
            
            outputs = pipeline_model(tokens, predicted_intent)
            
            
            batch_size, seq_len = slots.shape
            outputs_flat = outputs.contiguous().view(batch_size * seq_len, -1)
            slots_flat = slots.contiguous().view(-1)
            
            loss = loss_fn_slot(outputs_flat, slots_flat)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
        if (epoch + 1) % 2 == 0:
            print(f"    [I->S]   Epoch {epoch+1}/{Config.NUM_EPOCHS}, Loss: {total_loss/len(train_loader):.4f}")
    
    
    intent_to_slot_path = os.path.join(Config.SAVE_DIR, "intent_to_slot_model.pth")
    torch.save({
        'model_state_dict': pipeline_model.state_dict(),
        'vocab_size': vocab_size,
        'intent_size': intent_size,
        'slot_size': slot_size,
        'embedding_dim': Config.EMBEDDING_DIM,
        'hidden_dim': Config.HIDDEN_DIM
    }, intent_to_slot_path)
    print(f" Saved intent->slot model to: {intent_to_slot_path}")
    
    all_results['Intent -> Slot'] = evaluate(pipeline_model, test_loader, "intent_to_slot", best_intent_model=best_intent_model)

    
    print("\n EXPERIMENT 4: Joint Model (Multi-Task)")
    joint_model = JointModel(vocab_size, intent_size, slot_size, Config.EMBEDDING_DIM, Config.HIDDEN_DIM).to(Config.DEVICE)
    optimizer = torch.optim.Adam(joint_model.parameters(), lr=Config.LEARNING_RATE)
    
    for epoch in range(Config.NUM_EPOCHS):
        joint_model.train()
        total_loss = 0
        for batch in train_loader:
            optimizer.zero_grad()
            tokens = batch['tokens'].to(Config.DEVICE)
            intents = batch['intents'].to(Config.DEVICE)
            slots = batch['slots'].to(Config.DEVICE)
            
            
            if tokens.shape != slots.shape:
                continue
                
            intent_logits, slot_logits = joint_model(tokens)
            
            loss_intent = loss_fn_intent(intent_logits, intents)
            
            
            batch_size, seq_len = slots.shape
            slot_logits_flat = slot_logits.contiguous().view(batch_size * seq_len, -1)
            slots_flat = slots.contiguous().view(-1)
            loss_slot = loss_fn_slot(slot_logits_flat, slots_flat)
            
            loss = loss_intent + loss_slot
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
            
        if (epoch + 1) % 2 == 0:
            print(f"    [Joint]  Epoch {epoch+1}/{Config.NUM_EPOCHS}, Loss: {total_loss/len(train_loader):.4f}")
    
    
    joint_model_path = os.path.join(Config.SAVE_DIR, "joint_model.pth")
    torch.save({
        'model_state_dict': joint_model.state_dict(),
        'vocab_size': vocab_size,
        'intent_size': intent_size,
        'slot_size': slot_size,
        'embedding_dim': Config.EMBEDDING_DIM,
        'hidden_dim': Config.HIDDEN_DIM
    }, joint_model_path)
    print(f" Saved joint model to: {joint_model_path}")
    
    all_results['Joint'] = evaluate(joint_model, test_loader, "joint")

    
    print_results_table(all_results)
    
    
    results_path = os.path.join(Config.SAVE_DIR, "experiment_results.json")
    with open(results_path, 'w') as f:
        import json
        
        serializable_results = {}
        for exp_name, metrics in all_results.items():
            serializable_results[exp_name] = {k: float(v) if hasattr(v, '__float__') else v 
                                            for k, v in metrics.items()}
        json.dump(serializable_results, f, indent=2)
    print(f"Saved experiment results to: {results_path}")
    
    return all_results

In [49]:

if __name__ == "__main__":
    print(" Running LSTM Experiments with Model Saving...")
    train_loader, test_loader, vocabs = load_and_preprocess_data()
    
    print(f"Dataset loaded: {len(train_loader)} train batches, {len(test_loader)} test batches")
    print(f"Vocab sizes: {len(vocabs['word'])} words, {len(vocabs['intent'])} intents, {len(vocabs['slot'])} slots")
    
    
    results = run_all_experiments(train_loader, test_loader, vocabs)
    
    
    loaded_models = demonstrate_model_loading(vocabs)
    
    if loaded_models:
        print("\nðŸ§ª Testing loaded models...")
        
        test_results = evaluate(loaded_models['intent_model'], test_loader, "intent")
        print(f"Loaded intent model accuracy: {test_results.get('intent_accuracy', 0):.4f}")
        
       
        test_results = evaluate(loaded_models['slot_model'], test_loader, "slot")
        print(f"Loaded slot model accuracy: {test_results.get('slot_accuracy', 0):.4f}")
        
        test_results = evaluate(loaded_models['joint_model'], test_loader, "joint")
        print(f"Loaded joint model - Intent: {test_results.get('intent_accuracy', 0):.4f}, Slot: {test_results.get('slot_accuracy', 0):.4f}")

 Running LSTM Experiments with Model Saving...
Dataset loaded: 156 train batches, 28 test batches
Vocab sizes: 943 words, 26 intents, 129 slots
 Running experiments for : LSTM 
Model Parameters: vocab_size=943, intent_size=26, slot_size=129
 EXPERIMENT 1: Independent Models
  Training Intent Model...
    [Intent] Epoch 2/10, Loss: 0.3142
    [Intent] Epoch 4/10, Loss: 0.1175
    [Intent] Epoch 6/10, Loss: 0.0397
    [Intent] Epoch 8/10, Loss: 0.0169
    [Intent] Epoch 10/10, Loss: 0.0054
 Saved intent model to: saved_models\intent_model.pth
 Training Slot Model
    [Slot]   Epoch 2/10, Loss: 0.2935
    [Slot]   Epoch 4/10, Loss: 0.0874
    [Slot]   Epoch 6/10, Loss: 0.0402
    [Slot]   Epoch 8/10, Loss: 0.0210
    [Slot]   Epoch 10/10, Loss: 0.0114
Saved slot model to: saved_models\slot_model.pth

 EXPERIMENT 2: Slot -> Intent Pipeline
    [S->I]   Epoch 2/10, Loss: 0.3096
    [S->I]   Epoch 4/10, Loss: 0.1047
    [S->I]   Epoch 6/10, Loss: 0.0489
    [S->I]   Epoch 8/10, Loss: 0.0189


  checkpoint = torch.load(model_path, map_location=Config.DEVICE)
  checkpoint = torch.load(model_path, map_location=Config.DEVICE)
  checkpoint = torch.load(model_path, map_location=Config.DEVICE)
  checkpoint = torch.load(model_path, map_location=Config.DEVICE)
  checkpoint = torch.load(model_path, map_location=Config.DEVICE)


Loaded intent model accuracy: 0.9485
Loaded slot model accuracy: 0.9753
Loaded joint model - Intent: 0.9462, Slot: 0.9749
