<a href="https://colab.research.google.com/github/ankuj/teaching/blob/main/nlp_lab_day_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>


<br>
RNN Practical — Intro to Recurrent Neural Networks<br>
Topics: Motivation, Basics, Architectures (One-to-Many, Many-to-One, etc.), Shared Parameters<br>
Instructions: Complete each task by filling in the "Your answer here" sections.<br>


In [2]:
import numpy as np

------------------------------<br>
Task 1: RNN Architectures <br>
------------------------------

In [None]:
def task1_architectures():
    """
    Identify the correct RNN architecture (One-to-One, One-to-Many, Many-to-One, Many-to-Many)
    for the following scenarios:
    a) Sentiment analysis of a sentence -> single label
    b) Music generation from a single start token -> output sequence
    c) Named entity recognition: tag each word in a sentence
    d) Machine translation: source sentence -> target sentence
    """
    # Your answer here:
    # a) Many-to-One
    # b) One-to-Many
    # c) Many-to-Many
    # d) Many-to-Many (encoder-decoder)

------------------------------<br>
Task 2: Shared Parameters <br>
------------------------------

In [None]:
def task2_shared_parameters():
    """
    Explain shared parameters in an RNN.
    Compute parameter counts for an example:
      input size d=4, hidden size h=3, sequence length T=10
    """
    # Your answer here:
    '''
    In RNN, we keep using the same three matricies for processing each token in the sequence: Wx, Wh, Wy.

    Shared parameters count:
      - Wx = d*h = 4*3 = 12
      - Wh: h*h = 3*3 = 9
      - bias: 3 (size of h)
      - Wy: depends on the output

      - total count = 12 + 9 + 3 + Wy



    '''

------------------------------<br>
Task 3: Manual Forward Pass <br>
------------------------------

In [1]:
def task3_manual_forward_pass():
    """
    Compute hidden states manually for a small RNN using np.tanh.
    Input sequence length T=3, input size=2, hidden size=2
    """
    x_seq = [np.array([0.5, -1.0]),
             np.array([1.0, 0.0]),
             np.array([-0.5, 0.5])]
    h_prev = np.zeros(2)
    W_xh = np.array([[0.6, -0.2],
                     [0.1,  0.5]])
    W_hh = np.array([[0.3, 0.4],
                     [-0.2, 0.2]])
    b_h = np.array([0.0,0.1])
    h_list = []

    # Your code here
    for t in range(len(x_seq)):
        h_t = np.tanh(np.dot(x_seq[t], W_xh) + np.dot(h_prev, W_hh) + b_h)
        h_list.append(h_t)
        h_prev = h_t

    return h_list


------------------------------<br>
Task 4: NumPy RNN Cell Implementation <br>
------------------------------

In [12]:
def task4_numpy_rnn_cell():
    """
    Implement a simple Many-to-One RNN in NumPy.
    Use rnn_forward to compute h_T, then compute a readout: y = W_hy h_T + b_y
    Predict class = argmax(y)
    """

    # Toy dataset
    toy_sequences = [
        [np.array([1.0,0.5]), np.array([0.2,0.1]), np.array([0.3,-0.1])],
        [np.array([-0.5,-0.4]), np.array([0.1,-0.2]), np.array([-0.3,-0.1])],
        [np.array([0.8,0.2]), np.array([0.5,0.4]), np.array([0.1,0.2])],
        [np.array([-0.6,-0.2]), np.array([-0.4,-0.3]), np.array([0.0,-0.1])]
    ]
    labels = np.array([1,0,1,0]) # no need to use the labels

    # answer:

    # x (1x2)
    # Wxh (2x2)
    # Whh (2x2)
    # Why (1x2)

    # final output will be h_t=n (2x2) @ W_y (2x1) -> 2x1



    input_size = 2 # 2 dim
    hidden_size = 2 # has to be consistent
    output_size = 2 # For binary classification

    # Initialize weights and biases (randomly for demonstration)
    W_xh = np.random.randn(input_size, hidden_size)
    W_hh = np.random.randn(hidden_size, hidden_size)
    b_h = np.zeros(hidden_size)
    W_hy = np.random.randn(hidden_size, output_size)
    b_y = np.zeros(output_size)


    predictions = []
    for sequence in toy_sequences:
        h_prev = np.zeros(hidden_size)
        for x_t in sequence:
            h_t = np.tanh(np.dot(x_t, W_xh) + np.dot(h_prev, W_hh) + b_h)
            h_prev = h_t

        # Compute y
        y_pred = np.dot(h_prev, W_hy) + b_y
        y_pred = np.exp(y_pred) / np.sum(np.exp(y_pred)) # normalize with softmax

        print(f" y_pred: {y_pred}")
        predicted_class = np.argmax(y_pred)
        predictions.append(predicted_class)
    print("predictions , labels")
    return predictions, labels


task4_numpy_rnn_cell()

 y_pred: [0.32929808 0.67070192]
 y_pred: [0.82727313 0.17272687]
 y_pred: [0.04731087 0.95268913]
 y_pred: [0.89906372 0.10093628]
predictions , labels


([np.int64(1), np.int64(0), np.int64(1), np.int64(0)], array([1, 0, 1, 0]))

In [None]:
"""
Goal:
- Introduction to tensors in PyTorch
- Build a simple RNN-based classifier

Dataset:
- We will classify short sequences of numbers as "increasing" or "decreasing"
  Example:
    [1, 2, 3, 4] → Label: 1 (increasing)
    [5, 3, 1, 0] → Label: 0 (decreasing)

----------------------------------------------------
"""

import torch
import torch.nn as nn
import torch.optim as optim

# ====================================================
# STEP 1: Create a Tiny Synthetic Dataset
# ====================================================

def generate_data(num_samples=100, seq_len=4):
    X = []
    y = []
    for _ in range(num_samples):
        if torch.rand(1).item() > 0.5:
            seq = torch.sort(torch.rand(seq_len))[0]   # Increasing
            label = 1
        else:
            seq = torch.sort(torch.rand(seq_len), descending=True)[0]  # Decreasing
            label = 0
        X.append(seq.unsqueeze(-1))  # Shape: (seq_len, input_size=1)
        y.append(label)
    return torch.stack(X), torch.tensor(y)

X, y = generate_data()
# X shape → (batch_size=100, seq_len=4, input_size=1)
# y shape → (batch_size=100)

# ====================================================
# STEP 2: Define a Simple RNN Classifier
# ====================================================

class RNNClassifier(nn.Module):
    def __init__(self, input_size=1, hidden_size=8, num_classes=2):
        super().__init__()


    def forward(self, x):
      pass


model = RNNClassifier()
print(model)

# ====================================================
# STEP 3: Train the Model
# ====================================================

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)


# ====================================================
# STEP 4: Test the Model on New Data
# ====================================================

test_X, test_y = generate_data(num_samples=10)

print("\nPredictions vs Actual:")