# R-Tier

In [6]:
import sys
sys.path.append('..')

In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

In [8]:
from src import RTier

## Logic of R-Cell Arbitrary Output Endpoints

In [18]:
batch_size = 5
batch_pos = torch.full((1, batch_size), 0)
print(batch_pos)

tensor([[0, 0, 0, 0, 0]])


In [35]:
def one_hot_encoding(pos, dim):
    # Convert pos to LongTensor (index tensor)
    pos = pos.long()
    # Use F.one_hot to perform one-hot encoding
    return F.one_hot(pos, dim).float()

In [52]:
import torch
import torch.nn.functional as F

# Define dimensions
batch_size = 5
r_dim = 3
h_dim = 4

# Initialize h_next with some random values
h_next = torch.randn(batch_size, h_dim)

# Create a boolean mask is_summ
is_summ = torch.tensor([False, False, True, False, True])
batch_pos = torch.zeros(batch_size, dtype=torch.int)


# Initialize r with zeros
r = torch.zeros(batch_size, r_dim, device=h_next.device)

# Define weight and bias for the linear transformation
W_r = torch.randn(r_dim, h_dim)
b_r = torch.randn(r_dim)

# Compute r only for elements where is_summ is True
true_indices = is_summ.nonzero(as_tuple=True)[0]
print(r)
for i in true_indices:
    r[is_summ] = torch.tanh(F.linear(h_next[is_summ], W_r, b_r))
    print("A", r[is_summ])
    batch_pos[i] += 1
    one_hot_encoded = one_hot_encoding(batch_pos, r_dim)
    r[is_summ] += one_hot_encoded[is_summ]

    print("B", r[is_summ])
print(r)

print(is_summ.any())
# Print the results
print("h_next:")
print(h_next)
print("\nis_summ:")
print(is_summ)
print("\nr:")
print(r)
print("\nBatch Pos")
print(batch_pos)

print(one_hot_encoded)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])
A tensor([[ 0.9976, -0.5279,  0.9949],
        [ 0.0698, -0.1847,  0.9999]])
B tensor([[ 0.9976,  0.4721,  0.9949],
        [ 1.0698, -0.1847,  0.9999]])
A tensor([[ 0.9976, -0.5279,  0.9949],
        [ 0.0698, -0.1847,  0.9999]])
B tensor([[0.9976, 0.4721, 0.9949],
        [0.0698, 0.8153, 0.9999]])
tensor([[0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.9976, 0.4721, 0.9949],
        [0.0000, 0.0000, 0.0000],
        [0.0698, 0.8153, 0.9999]])
tensor(True)
h_next:
tensor([[-0.7761, -1.1215,  0.7610, -0.0483],
        [ 1.4732, -0.1821,  0.1879,  0.0521],
        [ 0.4008, -0.3552,  2.3219,  1.2010],
        [-0.6711,  0.1885,  0.0903,  0.5825],
        [ 1.1357,  0.6900,  0.6549,  2.2198]])

is_summ:
tensor([False, False,  True, False,  True])

r:
tensor([[0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000],
        [0.9976, 0.4721, 0.9949],
    

## TEST 1 (Constant Embedding) <font color='green'>PASS</font>

In [39]:
# Hyperparameters
sequence_length = 20
batch_size = 5

behavior_dim = 2
hidden_dim = 20
run_dim = 32

num_samples = 100
learning_rate = 0.01
num_epochs = 100


# Corresponding to is_summ_flags create the y_data if is_summ_flag zero y_data should be zero otherwise 2
y_data = 3 * torch.ones((num_samples, sequence_length, run_dim), dtype=torch.float32)

# Initialize a boolean tensor with all False values
is_summ_flags = torch.zeros((num_samples, sequence_length), dtype=torch.bool)

# Iterate over each sample to set True values after the gap
for i in range(num_samples):
    idx = 0
    
    while idx < sequence_length:
        gap = 2 + torch.randint(0, 6, (1,)).item()  # random number between 0 and 5
        idx += gap
        if idx < sequence_length:
            is_summ_flags[i][idx] = True

# Sample data creation
b_data = torch.ones(num_samples, sequence_length, behavior_dim) + 9
# Create y_data
y_data = torch.zeros((num_samples, sequence_length, run_dim))

for i in range(num_samples):
    for j in range(sequence_length):
        if is_summ_flags[i][j]:
            y_data[i][j] = torch.ones(run_dim) * 2

In [40]:
b_data[0]

tensor([[10., 10.],
        [10., 10.],
        [10., 10.],
        [10., 10.],
        [10., 10.],
        [10., 10.],
        [10., 10.],
        [10., 10.],
        [10., 10.],
        [10., 10.],
        [10., 10.],
        [10., 10.],
        [10., 10.],
        [10., 10.],
        [10., 10.],
        [10., 10.],
        [10., 10.],
        [10., 10.],
        [10., 10.],
        [10., 10.]])

In [41]:
is_summ_flags[0]

tensor([False, False, False,  True, False, False, False, False, False,  True,
        False, False, False, False, False,  True, False, False, False,  True])

In [42]:
y_data[0]

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.,
         2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2., 2.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 

In [43]:
dataset = TensorDataset(b_data, is_summ_flags, y_data)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Model instantiation
model = RTier(behavior_dim, hidden_dim, run_dim)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

# Training loop
num_epochs = 100
for epoch in range(num_epochs):
    epoch_loss = 0
    for batch in dataloader:
        b_data, is_summ_flags, y_data = batch
        optimizer.zero_grad()
        output, hidden = model((b_data, is_summ_flags, y_data))
        loss = criterion(output, y_data)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(dataloader)
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}')

Epoch [1/100], Loss: 0.2148
Epoch [2/100], Loss: 0.2010
Epoch [3/100], Loss: 0.2010
Epoch [4/100], Loss: 0.2010
Epoch [5/100], Loss: 0.2010
Epoch [6/100], Loss: 0.2010
Epoch [7/100], Loss: 0.2010
Epoch [8/100], Loss: 0.2010
Epoch [9/100], Loss: 0.2010
Epoch [10/100], Loss: 0.2010
Epoch [11/100], Loss: 0.2010
Epoch [12/100], Loss: 0.2010
Epoch [13/100], Loss: 0.2010
Epoch [14/100], Loss: 0.2010
Epoch [15/100], Loss: 0.2010
Epoch [16/100], Loss: 0.2010
Epoch [17/100], Loss: 0.2010
Epoch [18/100], Loss: 0.2010
Epoch [19/100], Loss: 0.2010
Epoch [20/100], Loss: 0.2010
Epoch [21/100], Loss: 0.2010
Epoch [22/100], Loss: 0.2010
Epoch [23/100], Loss: 0.2010
Epoch [24/100], Loss: 0.2010
Epoch [25/100], Loss: 0.2010
Epoch [26/100], Loss: 0.2010
Epoch [27/100], Loss: 0.2010
Epoch [28/100], Loss: 0.2010
Epoch [29/100], Loss: 0.2010
Epoch [30/100], Loss: 0.2010
Epoch [31/100], Loss: 0.2010
Epoch [32/100], Loss: 0.2010
Epoch [33/100], Loss: 0.2010
Epoch [34/100], Loss: 0.2010
Epoch [35/100], Loss: 0

##### One-Hot Positional Encoding

In [2]:
import torch
import torch.nn.functional as F


In [8]:
batch_size = 2
seq_len = 10
embedding_dim = 8

In [9]:
embeddings = torch.randn(batch_size, seq_len, embedding_dim)

In [10]:
embeddings

tensor([[[-0.0240,  1.6658,  0.1412, -1.5008, -0.5063, -0.7110,  1.4140,
          -0.0741],
         [-0.0411, -1.0450,  0.0989, -0.4587, -0.4409,  0.6957, -1.1044,
           0.7425],
         [-1.1492, -0.0069,  0.6731, -0.2991, -0.0801, -0.7374,  0.2929,
           1.8192],
         [ 0.7640,  1.0874, -0.2307, -0.8909, -0.1001,  0.7470,  0.0487,
          -0.7188],
         [-1.4021,  0.5715, -0.4163,  1.0333,  1.9638, -1.0180, -0.2020,
          -0.2069],
         [-0.0415,  0.9216, -0.9418, -0.4767,  0.7513,  1.6361,  1.3862,
           0.8791],
         [ 0.1723, -1.3469,  0.5914, -0.3936,  1.5920,  0.3846, -0.3332,
          -1.4006],
         [-1.9345,  0.0978, -0.0318, -1.6688, -0.6154, -1.1919, -0.5048,
           0.5712],
         [-0.5017, -0.6525,  1.5569, -0.5583, -0.0602,  1.6316, -0.2350,
          -0.3422],
         [ 0.9003, -0.2596, -0.1510,  0.3825,  0.9566, -1.1434,  1.3284,
          -0.7951]],

        [[ 1.4585, -1.6691, -1.8006, -0.6093, -0.5736, -0.2863, -1.3

In [11]:
position_ids = torch.arange(seq_len).unsqueeze(0).repeat(batch_size, 1)
one_hot_positional_encodings = F.one_hot(position_ids, num_classes=seq_len).float()

In [12]:
one_hot_positional_encodings

tensor([[[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]],

        [[1., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 1., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 1., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 1., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 0.,

In [13]:
combined_embeddings = embeddings + one_hot_positional_encodings

RuntimeError: The size of tensor a (8) must match the size of tensor b (10) at non-singleton dimension 2

In [17]:
import torch
import torch.nn.functional as F

# Parameters
batch_size = 2
seq_len = 5
embedding_dim = 8

# Step 1: Create random embeddings
embeddings = torch.randn(batch_size, seq_len, embedding_dim)

# Step 2: Create one-hot positional encodings
position_ids = torch.arange(seq_len).unsqueeze(0).repeat(batch_size, 1)
one_hot_positional_encodings = F.one_hot(position_ids, num_classes=seq_len).float()

# Step 3: Interpolate the one-hot encodings to match the embedding dimension
# Add a dummy dimension to use interpolate and then remove it
one_hot_positional_encodings = one_hot_positional_encodings.unsqueeze(1)  # Shape: (batch_size, 1, seq_len, seq_len)
one_hot_positional_encodings = F.interpolate(one_hot_positional_encodings, size=(seq_len, embedding_dim), mode='bilinear', align_corners=False)
one_hot_positional_encodings = one_hot_positional_encodings.squeeze(1)  # Shape: (batch_size, seq_len, embedding_dim)

# Step 4: Combine embeddings with positional encodings
combined_embeddings = embeddings + one_hot_positional_encodings

# Verify shapes
print(f"Embeddings shape: {embeddings.shape}")
print(f"One-hot positional encodings shape: {one_hot_positional_encodings.shape}")
print(f"Combined embeddings shape: {combined_embeddings.shape}")


Embeddings shape: torch.Size([2, 5, 8])
One-hot positional encodings shape: torch.Size([2, 5, 8])
Combined embeddings shape: torch.Size([2, 5, 8])


In [18]:
embeddings

tensor([[[ 2.0599,  0.4645,  0.5465, -1.0917,  0.0642,  0.6623,  0.6286,
          -0.8372],
         [ 0.0327,  0.6828,  1.2170,  0.2863,  0.1587,  0.5921,  0.0660,
          -0.7419],
         [ 0.8163, -1.0227,  0.2353, -0.8698, -0.6066,  0.8241,  1.3457,
          -1.2792],
         [ 1.6519, -0.8593, -1.0126,  0.0833,  1.0629, -0.7051, -1.0721,
          -0.5183],
         [ 0.3135,  1.4274, -0.2910,  0.0282, -1.4946, -1.2530, -1.1248,
          -1.0686]],

        [[-0.6167, -0.6189, -3.1582, -0.3099, -0.3236,  0.4801,  0.0295,
           0.0938],
         [-1.3505,  0.6629,  0.5287,  0.9935,  0.3090,  0.1417,  1.1674,
          -0.5419],
         [ 0.0799,  1.6759, -0.6971, -1.8678,  1.3571,  0.2774, -0.1568,
          -0.7590],
         [-1.5802, -0.8637, -0.1351,  0.7873,  1.7351, -0.1225, -1.1544,
           0.1233],
         [ 0.0084,  1.8210,  1.0898, -0.4848,  0.2294, -0.6013, -0.8195,
          -0.3167]]])

In [19]:
one_hot_positional_encodings

tensor([[[1.0000, 0.5625, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.4375, 0.9375, 0.3125, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0625, 0.6875, 0.6875, 0.0625, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.3125, 0.9375, 0.4375, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.5625, 1.0000]],

        [[1.0000, 0.5625, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.4375, 0.9375, 0.3125, 0.0000, 0.0000, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0625, 0.6875, 0.6875, 0.0625, 0.0000, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.3125, 0.9375, 0.4375, 0.0000],
         [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.5625, 1.0000]]])

In [31]:
inputs_with_pos.shape

torch.Size([2, 10, 1024])

In [10]:
import torch
import torch.nn.functional as F

def one_hot_encoding(pos, dim):
    return F.one_hot(pos, dim).float()

# Test the function with dummy input

pos = 4
batch = 5
pos = torch.full((1, batch), pos)
print(pos)
dim = 9
one_hot_encoded = one_hot_encoding(pos, dim)

print(one_hot_encoded)

tensor([[4, 4, 4, 4, 4]])
tensor([[[0., 0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0.],
         [0., 0., 0., 0., 1., 0., 0., 0., 0.]]])


In [9]:
print(tensor)

tensor([[7, 7, 7, 7],
        [7, 7, 7, 7],
        [7, 7, 7, 7]])
