In [2]:
# ============================================
# LSTM Real-World Applications & Challenges Lab
# ============================================
# This notebook focuses on practical, real-world LSTM problems beyond synthetic data.
# Each section presents a domain, dataset idea, and hands-on challenge.
# ============================================

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import matplotlib.pyplot as plt
import numpy as np


In [None]:

# --------------------------------------------
# 1. Stock Price Prediction (Time Series)
# --------------------------------------------
# Goal: Predict the next-day closing price based on the last 20 days of data.
# Dataset idea: Use Yahoo Finance or synthetic sine waves to simulate price trends.

# Generate synthetic sine wave stock data
x = np.linspace(0, 200, 2000)
prices = np.sin(x / 5) + np.random.normal(scale=0.2, size=len(x))

seq_len = 20
X, y = [], []
for i in range(len(prices) - seq_len):
    X.append(prices[i:i+seq_len])
    y.append(prices[i+seq_len])

X = torch.tensor(X, dtype=torch.float32).unsqueeze(-1)
y = torch.tensor(y, dtype=torch.float32).unsqueeze(-1)

dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=32, shuffle=True)

class StockLSTM(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(1, 64, batch_first=True)
        self.fc = nn.Linear(64, 1)
    def forward(self, x):
        out, _ = self.lstm(x)
        return self.fc(out[:, -1, :])

model = StockLSTM()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

losses = []
for epoch in range(10):
    total_loss = 0
    for bx, by in loader:
        optimizer.zero_grad()
        preds = model(bx)
        loss = criterion(preds, by)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    losses.append(total_loss / len(loader))
    print(f"Epoch {epoch+1}: Loss {total_loss / len(loader):.4f}")

plt.plot(losses, marker='o')
plt.title('Stock Prediction Loss Curve')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.show()

# Q1: How stable is the loss curve? Does the model capture periodicity?
# Q2: Plot predicted vs. true prices and comment on overfitting.


In [None]:

# --------------------------------------------
# 2. Sentiment Analysis on Movie Reviews
# --------------------------------------------
# Task: Train an LSTM to classify short text as positive or negative.
# Dataset: IMDb or synthetic text pairs.

sample_texts = [
    ("I loved the movie, it was fantastic!", 1),
    ("Terrible film, I hated every minute.", 0),
    ("The plot was okay but acting was bad.", 0),
    ("Absolutely brilliant performance!", 1)
]

# Exercise:
# 1️. Tokenize and convert words to integers.
# 2️. Pad sequences.
# 3️. Implement nn.Embedding + nn.LSTM + nn.Linear.
# 4️. Train and test on these samples.
# 5️. Predict sentiment for your own sentence.

#  Q3: How does vocabulary size impact model performance?
#  Q4: Test negations like "not good" or "wasn't bad".


In [None]:

# --------------------------------------------
# 3. Energy Consumption Forecasting
# --------------------------------------------
# Domain: Smart grid / IoT data.
# Task: Predict next-hour energy use from past 24 hours.

energy_data = np.sin(np.arange(0, 300, 0.1)) + np.random.normal(scale=0.1, size=3000)
seq_len = 24
X, y = [], []
for i in range(len(energy_data) - seq_len):
    X.append(energy_data[i:i+seq_len])
    y.append(energy_data[i+seq_len])

X = torch.tensor(X, dtype=torch.float32).unsqueeze(-1)
y = torch.tensor(y, dtype=torch.float32).unsqueeze(-1)

dataset = TensorDataset(X, y)
loader = DataLoader(dataset, batch_size=64, shuffle=True)

model_energy = StockLSTM()
criterion = nn.MSELoss()
optimizer = optim.Adam(model_energy.parameters(), lr=0.001)

for epoch in range(5):
    for bx, by in loader:
        optimizer.zero_grad()
        preds = model_energy(bx)
        loss = criterion(preds, by)
        loss.backward()
        optimizer.step()
    print(f"Epoch {epoch+1}: Loss={loss.item():.4f}")

# Q5: How can LSTMs be adapted for multivariate time series (e.g., weather + load)?
# Q6: How would you evaluate forecast accuracy beyond loss (e.g., RMSE, MAE)?


In [None]:

# --------------------------------------------
# 4. Water Quality Prediction (Environmental Application)
# --------------------------------------------
# Context: Predict next-day Chlorophyll-a concentration based on past 10 days of data.
# Data can include variables like temperature, turbidity, rainfall, and pH.

# Exercise idea:
# - Create a DataFrame with synthetic features (temp, pH, turbidity, chla)
# - Train an LSTM to predict future chla values.
# - Visualize predicted vs. true curves.

# ✏️ Q7: How can feature scaling affect model learning?
# ✏️ Q8: Discuss how missing data or sensor noise could impact LSTM accuracy.
