In [1]:
import math
import random
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"


In [3]:
# ---------- Data generators ----------
def make_odd_sequences(num_seqs=2000, min_len=5, max_len=12):
    # Each sequence is a run of odd numbers with random start (odd) and small length
    # e.g., 1,3,5,7,9, ...  or 5,7,9,11,13, ...
    seqs = []
    for _ in range(num_seqs):
        L = random.randint(min_len, max_len)
        start = random.choice([1,3,5,7,9,11,13])
        seq = [start + 2*i for i in range(L)]
        seqs.append(seq)
    return seqs

In [4]:
def make_fib_sequences(num_seqs=2000, min_len=6, max_len=14):
    # Fibonacci-like with random small seeds (avoid exploding magnitudes)
    # e.g., a0=1, a1=1 -> 1,1,2,3,5,8,...
    # or random tiny seeds like (1,2) -> 1,2,3,5,8,...
    seqs = []
    for _ in range(num_seqs):
        L = random.randint(min_len, max_len)
        a0 = random.randint(0,2)
        a1 = random.randint(1,3)
        seq = [a0, a1]
        for _ in range(L-2):
            seq.append(seq[-1] + seq[-2])
        seqs.append(seq)
    return seqs

In [5]:
def to_batches(seqs, in_len=5):
    """
    Create many (input_seq -> next_value) training examples.
    Inputs shape: [batch, time, 1], Targets shape: [batch, time, 1]
    For a sequence s[0..L-1], we create sliding windows:
      x: s[0..in_len-1] -> y: s[1..in_len]
      x: s[1..in_len]   -> y: s[2..in_len+1]
      ...
    """
    X, Y = [], []
    for s in seqs:
        # simple normalization to stabilize training
        scale = max(abs(v) for v in s) if max(abs(v) for v in s) > 0 else 1.0
        ns = [v/scale for v in s]
        for start in range(0, len(ns) - in_len):
            x = ns[start:start+in_len]
            y = ns[start+1:start+in_len+1]
            X.append([[xi] for xi in x])  # time x 1
            Y.append([[yi] for yi in y])  # time x 1
    X = torch.tensor(X, dtype=torch.float32)
    Y = torch.tensor(Y, dtype=torch.float32)
    return X, Y  # [N, T, 1], [N, T, 1]


In [6]:
class VanillaRNN(nn.Module):
    def __init__(self, input_size=1, hidden_size=16, output_size=1):
        super().__init__()
        self.hidden_size = hidden_size
        # We'll implement using nn.RNN (tanh) to keep it "vanilla"
        self.rnn = nn.RNN(input_size, hidden_size, nonlinearity='tanh', batch_first=True)
        self.fc  = nn.Linear(hidden_size, output_size)

    def forward(self, x, h0=None):
        # x: [B,T,1]
        out, hT = self.rnn(x, h0)           # out: [B,T,H]
        y = self.fc(out)                    # [B,T,1]
        return y, hT


In [7]:
def train_model(X, Y, hidden_size=16, epochs=8, lr=1e-2, batch_size=128):
    ds = torch.utils.data.TensorDataset(X, Y)
    dl = torch.utils.data.DataLoader(ds, batch_size=batch_size, shuffle=True)

    model = VanillaRNN(1, hidden_size, 1).to(device)
    opt = optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()

    model.train()
    for ep in range(1, epochs+1):
        total = 0.0
        for xb, yb in dl:
            xb, yb = xb.to(device), yb.to(device)
            opt.zero_grad()
            yhat, _ = model(xb)
            loss = loss_fn(yhat, yb)
            loss.backward()
            opt.step()
            total += loss.item() * xb.size(0)
        print(f"Epoch {ep:02d} | Loss {total/len(ds):.6f}")
    return model

In [None]:
@torch.no_grad()
def generate_sequence(model, seed, steps=10):
    """
    seed: list of numbers (un-normalized). We auto-normalize internally
    so generation stays numerically stable; then de-normalize at the end.
    """
    model.eval()
    scale = max(1.0, max(abs(v) for v in seed))
    hist = [v/scale for v in seed]        # normalized working list
    in_len = len(hist)

    x = torch.tensor([[ [v] for v in hist ]], dtype=torch.float32).to(device)  # [1,T,1]
    _, h = model.rnn(x)  # prime hidden state with the seed

    last = hist[-1]
    out_vals = seed[:]   # store de-normalized values for return

    for _ in range(steps):
        inp = torch.tensor([[[last]]], dtype=torch.float32).to(device)  # [1,1,1]
        y, h = model(inp, h)               # one-step advance
        pred = y[0,0,0].item()             # normalized prediction
        denorm = pred * scale
        out_vals.append(denorm)
        last = pred

    return out_vals

In [None]:
# Training first sequence generator
odd_seqs = make_odd_sequences()
Xo, Yo = to_batches(odd_seqs, in_len=5)
odd_model = train_model(Xo, Yo, hidden_size=16, epochs=8, lr=1e-2)



Epoch 01 | Loss 0.067857
Epoch 02 | Loss 0.004303
Epoch 03 | Loss 0.001816
Epoch 04 | Loss 0.001191
Epoch 05 | Loss 0.000855
Epoch 06 | Loss 0.000676
Epoch 07 | Loss 0.000551
Epoch 08 | Loss 0.000471


In [10]:
print("\nOdd numbers generation from seed [1,3,5,7,9]:")
print(generate_sequence(odd_model, seed=[1,3,5,7,9], steps=8))



Odd numbers generation from seed [1,3,5,7,9]:
[1, 3, 5, 7, 9, 9.394219279289246, 9.634800553321838, 9.856371402740479, 10.010874152183533, 10.122614979743958, 10.216217637062073, 10.287319779396057, 10.342536807060242]


In [None]:
# Training the 2nd sequence generator
fib_seqs = make_fib_sequences()
Xf, Yf = to_batches(fib_seqs, in_len=6)
fib_model = train_model(Xf, Yf, hidden_size=32, epochs=12, lr=5e-3)



Epoch 01 | Loss 0.008856
Epoch 02 | Loss 0.000201
Epoch 03 | Loss 0.000158
Epoch 04 | Loss 0.000128
Epoch 05 | Loss 0.000110
Epoch 06 | Loss 0.000100
Epoch 07 | Loss 0.000093
Epoch 08 | Loss 0.000086
Epoch 09 | Loss 0.000081
Epoch 10 | Loss 0.000076
Epoch 11 | Loss 0.000072
Epoch 12 | Loss 0.000069


In [None]:
print("\nFibonacci-like generation from seed [1,1,2,3,5,8]:")
print(generate_sequence(fib_model, seed=[1,1,2,3,5,8], steps=8))


Fibonacci-like generation from seed [1,1,2,3,5,8]:
[5, 8, 13, 21, 34, 62.98739004135132, 84.00771617889404, 105.28031635284424, 116.44433212280273, 121.62806463241577, 123.39754962921143, 124.0987548828125, 124.3146562576294]


Q1: Carefully observe the two Vanilla RNN models. Please comment on its strength and weaknesses.
Q2: How are the two Vanilla RNN models different than a standard feedforward Neural Network? Can we do it with a feedforward NN?