In [None]:
# ============================================
# Module 8: From Classical to Deep NLP
# LSTM Critical Thinking & Application Lab
# ============================================
# Author: Prof. Dr. Swati Chandna
# Course: M.Sc. Applied Data Science & AI
# --------------------------------------------
# Learning Goals:
# - Understand how LSTMs differ from RNNs
# - Train & interpret an LSTM on sequential data
# - Develop critical reasoning about model behavior
# ============================================


In [None]:

# Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
import matplotlib.pyplot as plt

# Fix random seed for reproducibility
torch.manual_seed(42)


In [None]:

# --------------------------------------------
# 1. Predict Before You Run
# --------------------------------------------
# Task:
# 1. Predict the input and output shapes for each layer of the LSTM.
# 2. Which part of the sequence will influence the final prediction?
# 3. Sketch or describe what you expect the loss curve to look like.

class DemoLSTM(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=128, batch_first=True)
        self.fc = nn.Linear(128, 1)

    def forward(self, x):
        out, (h, c) = self.lstm(x)
        return self.fc(out[:, -1, :])

model = DemoLSTM()
print(model)

# Q1: Write your predictions for shapes and sequence influence below.
# (Use markdown cells to explain your reasoning)


In [6]:

# --------------------------------------------
#  2. Data Preparation
# --------------------------------------------
# Generate a synthetic dataset with 200 sequences, each 100 steps long.
# The label depends on the first element (requires long-term memory).

n_samples = 200
seq_len = 100
X = torch.rand(n_samples, seq_len, 1)
y = (X[:, 0, 0] > 0.5).float()

dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=16, shuffle=True)


In [None]:

# --------------------------------------------
# 3. Model Training
# --------------------------------------------
# Train the DemoLSTM model and observe the loss trend.

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

loss_history = []
for epoch in range(10):
    total_loss = 0
    for batch_x, batch_y in loader:
        optimizer.zero_grad()
        preds = model(batch_x)
        batch_y = batch_y.view(-1, 1)
        loss = criterion(preds, batch_y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    avg_loss = total_loss / len(loader)
    loss_history.append(avg_loss)
    print(f"Epoch {epoch+1} | Loss: {avg_loss:.4f}")

plt.plot(loss_history, marker='o', color='purple')
plt.title("LSTM Training Loss Curve")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.grid(True)
plt.show()

#  Q2: If accuracy is low, what might be the causes?
#  Q3: Which hyperparameter would you adjust first and why?


In [None]:

# --------------------------------------------
# 4. Compare Architectures
# --------------------------------------------
# Implement RNN and FFNN for comparison.

class FFNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(100, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    def forward(self, x):
        return self.fc(x)

class SimpleRNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.rnn = nn.RNN(1, 64, batch_first=True)
        self.fc = nn.Linear(64, 1)
    def forward(self, x):
        out, _ = self.rnn(x)
        return self.fc(out[:, -1, :])

#  Q4: Train both models (FFNN, RNN) on the same dataset and compare.
#  Q5: Which model performs best and why?


In [None]:

# --------------------------------------------
#  5. What-If Experiments
# --------------------------------------------
# Predict how each change might affect results.
# Fill in this table in your notes.

# | Modification | Your Prediction | Verified Outcome |
# |---------------|----------------|------------------|
# | hidden_dim 64 → 256 | | |
# | dropout = 0.5 | | |
# | Double seq_len | | |
