In [6]:
import numpy as np
import matplotlib.pyplot as plt
import torch
from torch import nn
from sklearn.model_selection import train_test_split
import warnings 
from tqdm import tqdm
import torch.optim as optim

warnings.filterwarnings('ignore')

plt.style.use('seaborn')

In [29]:
np.random.seed(8999)
torch.manual_seed(8999)
# Here I have define the simple input arrays 
X,Y = [],[]

# Length of indivisual data point 
LOW_LIM = 2
UPR_LIM = 9

# I have taken the 5000 data points
for _ in range(5000):
  # Randomly select length
  LEN = np.random.randint(LOW_LIM,UPR_LIM)
  # Random points of selected size, value between one and zero
  D1 = np.random.random((LEN)).astype(np.double)
  D2 = np.zeros(LEN)
  # Randomly seet 2 positions as one.
  ONEs = np.random.randint(LEN,size=(2))
  while ONEs[0] == ONEs[1]:
    ONEs = np.random.randint(LEN,size=(2))
  D2[ONEs] = 1
  
  # Append to Data array after coverting to tensor.
  X.append(torch.from_numpy(np.array([(D1[i] , D2[i]) for i in range(LEN)])).double())
  Y.append(torch.from_numpy(np.array([ D1[ONEs[0]] + D1[ONEs[1]] ])))

print(X[0], Y[0])
# Converting to numpy array
# X = np.array(X)
# Y = np.array(Y)

# # Printing the shapes
# X.shape,Y.shape

tensor([[0.4494, 1.0000],
        [0.2620, 1.0000]], dtype=torch.float64) tensor([0.7114], dtype=torch.float64)


In [12]:
def data_generator(number_samples: int, max_len_input_sequence: int) -> tuple[list, list]:
    # Initialize lists to store sequences and their corresponding sums
    sequences = []
    sums = []
    
    for _ in range(number_samples):
        # Generate a random sequence length
        seq_len = np.random.randint(2, max_len_input_sequence + 1)
        
        # Create the sequence with random values in the first dimension
        sequence = np.zeros((seq_len, 2))
        sequence[:, 0] = np.random.rand(seq_len)
        
        # Randomly choose two distinct positions for the markers
        markers = np.random.choice(seq_len, 2, replace=False)
        sequence[markers, 1] = 1
        
        # Calculate the sum of the marked values
        sum_values = np.sum(sequence[markers, 0])
        
        # Append the sequence and its sum to the respective lists
        sequences.append(sequence)
        sums.append(sum_values)
    
    # Convert lists to NumPy arrays for efficient processing in training
    sequences = np.array(sequences, dtype=object)  # Use dtype=object for variable-length sequences
    sums = np.array(sums)
    
    return sequences, sums

In [25]:
# Generate data
sequences, sums = data_generator(5000, 20)

# Example usage: print the first sample
print(sequences[0])
print("Sum:", sums[0])

[[0.61347943 0.        ]
 [0.54902338 1.        ]
 [0.23564526 1.        ]
 [0.77829807 0.        ]]
Sum: 0.7846686428125885


In [8]:
# Test Train split of 33 - 66
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.33, random_state=42)

In [9]:
# Selecting the device
device = ("cuda" if torch.cuda.is_available() else "cpu")
device

'cuda'

In [10]:
# This is implementation of ElmonRNN 
# I have followed above equations & figure to implement.
class ElmonRNN(nn.Module):
  def __init__(self, INP_S, HID_S, OUT_S):
    super().__init__()

    self.U = nn.Linear(INP_S, HID_S  , bias=False )
    self.W = nn.Linear(HID_S, HID_S)
    self.V = nn.Linear(HID_S, OUT_S)
    
    self.double()

  def forward(self, INP, HID_State):
    Ux = self.U(INP)

    Wh = self.W(HID_State)
    Ht = torch.tanh(Ux + Wh)

    OUT = self.V(Ht)
    return OUT, Ht
        

In [11]:
torch.manual_seed(8999)
# Initializing perameters 
MAX_EPOCH = 25
INP_S  = 2
HID_S  = 10
OUT_S  = 1
L      = 0.005
Model1 = ElmonRNN(INP_S, HID_S, OUT_S).to(device)

print(Model1)

ElmonRNN(
  (U): Linear(in_features=2, out_features=10, bias=False)
  (W): Linear(in_features=10, out_features=10, bias=True)
  (V): Linear(in_features=10, out_features=1, bias=True)
)


In [None]:
# Some Empty dictionary to save data to plot later
TRAIN_LOSS_HIST = {}

# Defined a loss function as MSE
MSE_LOSS = nn.MSELoss()

# Defined OPTIMIZER as Adam
OPTIMIZER = torch.optim.Adam(Model1.parameters(), lr = L)

# Run till MaxEpoch8
for epoch in tqdm(range(MAX_EPOCH)):
  # Empty Temp dicts
  EPOCH_LOSS_HIST = list()
  # Iter over data set [SGD]
  for X_, Y_ in zip(X_train,y_train):
    # Setting gradients to zero
    Model1.zero_grad()
    # Intializing Loss
    loss = 0
    # Intializing perameter to pass in forwardpass 
    HID =  torch.zeros(1, HID_S, requires_grad=False,dtype=torch.float64).to(device)
    X_, Y_ = X_.to(device), Y_.to(device)
    OUT = 0
    # Iter over current Input Sequence
    for i in range(X_.shape[0]):
      OUT, HID = Model1(X_[i,:], HID)
    # finding loss on last output.
    loss = MSE_LOSS(OUT, Y_)
    # Calling Backward on final loss
    loss.backward()
    # Cliping the gradient to prevent exploding gradients
    nn.utils.clip_grad_norm_(Model1.parameters(), 3)
    # Updating model perameters
    OPTIMIZER.step()
    # Saving currunt loss
    EPOCH_LOSS_HIST.append(loss.detach().item())
  # Saving avg loss over whole dataset
  TRAIN_LOSS_HIST[epoch] = torch.tensor(EPOCH_LOSS_HIST).mean()
