In [201]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset
import random

In [202]:
import torch
import torch.nn as nn

def elman_init(input_size, hidden_size, output_size):
    params = {
        'w_ih': nn.Parameter(torch.randn(hidden_size, input_size) * 0.01),  # Corrected shape
        'w_hh': nn.Parameter(torch.randn(hidden_size, hidden_size) * 0.01),
        'b_ih': nn.Parameter(torch.zeros(hidden_size)),
        'b_hh': nn.Parameter(torch.zeros(hidden_size)),
        'w_out': nn.Parameter(torch.randn(output_size, hidden_size) * 0.01),  # Output layer weight
        'b_out': nn.Parameter(torch.zeros(output_size)),
    }
    return params

def elman_forward(x, params):
    """
    x: (batch_size, seq_len, input_dim)   # One-hot encoded input (input_dim = 9)
    """
    batch_size, seq_len, input_dim = x.size()  

    hidden_size = params['w_hh'].size(0)
    h_t = torch.zeros(batch_size, hidden_size, device=x.device)  

    for t in range(seq_len):
        x_t = x[:, t] 
        
        ih = x_t @ params['w_ih'].T 
        hh = h_t @ params['w_hh'] 
        
        h_t = torch.tanh(ih + hh + params['b_ih'] + params['b_hh'])  

    output = h_t @ params['w_out'].T + params['b_out'] 
    
    return output.view(batch_size, -1)  


In [203]:
def lstm_init(input_size, hidden_size, output_size):
   
    params = {
       
        'w_ii': nn.Parameter(torch.randn(input_size, hidden_size) * 0.01),
        'w_hi': nn.Parameter(torch.randn(hidden_size, hidden_size) * 0.01),
        'b_i': nn.Parameter(torch.zeros(hidden_size)),
       
        'w_if': nn.Parameter(torch.randn(input_size, hidden_size) * 0.01),
        'w_hf': nn.Parameter(torch.randn(hidden_size, hidden_size) * 0.01),
        'b_f': nn.Parameter(torch.ones(hidden_size)),  
        
       
        'w_ig': nn.Parameter(torch.randn(input_size, hidden_size) * 0.01),
        'w_hg': nn.Parameter(torch.randn(hidden_size, hidden_size) * 0.01),
        'b_g': nn.Parameter(torch.zeros(hidden_size)),
        
        
        'w_io': nn.Parameter(torch.randn(input_size, hidden_size) * 0.01),
        'w_ho': nn.Parameter(torch.randn(hidden_size, hidden_size) * 0.01),
        'b_o': nn.Parameter(torch.zeros(hidden_size)),
        
      
        'w_out': nn.Parameter(torch.randn(hidden_size, output_size) * 0.01),
        'b_out': nn.Parameter(torch.zeros(output_size)),
    }
    return params

def lstm_forward(x, params):
    """
    x: (batch_size, seq_len, input_dim)
    """
    batch_size, seq_len, input_dim = x.size()
    hidden_size = params['w_hi'].size(1) 
    
    h_t = torch.zeros(batch_size, hidden_size, device=x.device)
    c_t = torch.zeros(batch_size, hidden_size, device=x.device)
    
    for t in range(seq_len):
        x_t = x[:, t] 
        
        # Input gate 
        i_t = torch.sigmoid(x_t @ params['w_ii'] + h_t @ params['w_hi'] + params['b_i'])
        
        # Forget gate
        f_t = torch.sigmoid(x_t @ params['w_if'] + h_t @ params['w_hf'] + params['b_f'])
        
        # Cell update
        g_t = torch.tanh(x_t @ params['w_ig'] + h_t @ params['w_hg'] + params['b_g'])
        
        # Output gate 
        o_t = torch.sigmoid(x_t @ params['w_io'] + h_t @ params['w_ho'] + params['b_o'])
        
        # Update cell state
        c_t = f_t * c_t + i_t * g_t
        
        # Update hidden state
        h_t = o_t * torch.tanh(c_t)
    
    # Final prediction
    output = h_t @ params['w_out'] + params['b_out']
    return output.view(batch_size, -1) 


In [204]:
def gru_init(input_size, hidden_size, output_size):
    
    params = {
        # Update gate
        'w_iz': nn.Parameter(torch.randn(input_size, hidden_size) * 0.01),
        'w_hz': nn.Parameter(torch.randn(hidden_size, hidden_size) * 0.01),
        'b_z': nn.Parameter(torch.zeros(hidden_size)),
        
        # Reset gate
        'w_ir': nn.Parameter(torch.randn(input_size, hidden_size) * 0.01),
        'w_hr': nn.Parameter(torch.randn(hidden_size, hidden_size) * 0.01),
        'b_r': nn.Parameter(torch.zeros(hidden_size)),
        
        #  hidden state
        'w_in': nn.Parameter(torch.randn(input_size, hidden_size) * 0.01),
        'w_hn': nn.Parameter(torch.randn(hidden_size, hidden_size) * 0.01),
        'b_n': nn.Parameter(torch.zeros(hidden_size)),
        
        # Output layer
        'w_out': nn.Parameter(torch.randn(hidden_size, output_size) * 0.01),
        'b_out': nn.Parameter(torch.zeros(output_size)),
    }
    return params

def gru_forward(x, params):
   
    batch_size, seq_len, input_dim = x.size()
    hidden_size = params['w_hz'].size(1)
    
    h_t = torch.zeros(batch_size, hidden_size, device=x.device)
    
    for t in range(seq_len):
        x_t = x[:, t] 
        
        # Update gate 
        z_t = torch.sigmoid(x_t @ params['w_iz'] + h_t @ params['w_hz'] + params['b_z'])
        
        # Reset gate 
        r_t = torch.sigmoid(x_t @ params['w_ir'] + h_t @ params['w_hr'] + params['b_r'])
        
        # Hidden state
        n_t = torch.tanh(x_t @ params['w_in'] + (r_t * h_t) @ params['w_hn'] + params['b_n'])
        
        # Update hidden state
        h_t = (1 - z_t) * h_t + z_t * n_t
    
    # Final prediction
    output = h_t @ params['w_out'] + params['b_out']
    return output.view(batch_size, -1)  




In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

def train_model(forward_func, params, train_x, train_y, test_x, test_y, batch_size, epochs, lr):
    train_losses = []
    val_losses = []
    
    optimizer = optim.Adam(params.values(), lr=lr)
    mse_loss = torch.nn.MSELoss()

    num_samples = train_x.shape[0]  

    for epoch in range(epochs):
        epoch_train_loss = 0
        permutation = torch.randperm(num_samples) 
        
        for i in range(0, num_samples, batch_size):
            indices = permutation[i:i + batch_size]  
            batch_x, batch_y = train_x[indices], train_y[indices]  

           
            batch_x = batch_x.float() 

            outputs = forward_func(batch_x, params)
            loss = mse_loss(outputs, batch_y) 
            
            epoch_train_loss += loss.item()
            
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
        
        epoch_train_loss /= (num_samples // batch_size)  
        train_losses.append(epoch_train_loss)
        
       
        with torch.no_grad():
            test_x = test_x.float()  
            test_outputs = forward_func(test_x, params)
            test_loss = mse_loss(test_outputs, test_y).item()
        
        val_losses.append(test_loss)
        
        if (epoch + 1) % 5 == 0:
            print(f'Epoch {epoch+1}/{epochs}, Train Loss: {epoch_train_loss:.4f}, Test Loss: {test_loss:.4f}')
    
    return train_losses, val_losses,outputs


In [None]:
import matplotlib.pyplot as plt

def plot_losses(train_losses1, train_losses2, train_losses3,base_losses,name):
    epochs = range(1, len(train_losses) + 1)

    plt.figure(figsize=(8, 5))
    plt.plot(epochs, train_losses1, label='Train Loss Elman', linestyle='-')
    plt.plot(epochs, train_losses2, label='Train Loss GRU', linestyle='-')
    plt.plot(epochs, train_losses3, label='Train Loss LSTM', linestyle='-')
    plt.plot(epochs, [base_losses] * len(epochs), label='Baseline Loss', linestyle='-')
#     plt.axhline(y=1, color='r', linestyle=':', label='y=1')  # Horizontal line at y=1
    
    plt.xlabel("Epochs")
    plt.ylabel("Loss")
    plt.title(f"Training & Test Loss Over Epochs ,{name}")
    plt.legend()
    plt.grid(True)
    plt.show()

In [None]:


def encode_braces(sequence, max_len):
    bracket_map = {'(': 0, ')': 1, '{': 2, '}': 3, '[': 4, ']': 5, '<': 6, '>': 7}
    vocab_size = len(bracket_map) + 1  

    encoded = np.zeros((max_len, vocab_size), dtype=np.float32)
    for i, ch in enumerate(sequence[:max_len]):
        index = bracket_map.get(ch, 8) 
        encoded[i][index] = 1.0

    return encoded


def load_dataset(filename):
    with open(filename, 'r', encoding='utf-8') as f:
        data = json.load(f)
    return data


train_data = load_dataset("train.json")
test_data = load_dataset("test.json")

random.shuffle(train_data)
random.shuffle(test_data)

max_len = max(max(len(item['Sequence']) for item in train_data),
              max(len(item['Sequence']) for item in test_data))


train_x = torch.tensor([encode_braces(item['Sequence'], max_len) for item in train_data], dtype=torch.float32)
train_y = torch.tensor([item['Count'] for item in train_data], dtype=torch.float32)  



test_x = torch.tensor([encode_braces(item['Sequence'], max_len) for item in test_data], dtype=torch.float32) 
test_y = torch.tensor([item['Count'] for item in test_data], dtype=torch.float32)
print(train_x.shape)  
print(test_x.shape)   
print(train_y.shape) 
print(test_y.shape)  

ones_tensor = torch.ones_like(train_y)
base_loss = mse_loss(ones_tensor,train_y)
print(test_y[:5])

In [None]:
input_size = 9  
hidden_size = 8
output_size = 1
params = elman_init(input_size, hidden_size, output_size)

train_losses1, test_losses1,outputs1 = train_model(elman_forward, params, train_x,train_y, test_x,test_y,32,epochs=20, lr=0.001)
print(outputs1[:5])

In [None]:
input_size = 9  
hidden_size = 8
output_size = 1
params = gru_init(input_size, hidden_size, output_size)

train_losses2, test_losses2,outputs2 = train_model(gru_forward, params, train_x,train_y, test_x,test_y,32,epochs=20, lr=0.001)
print(outputs2[:5])

In [None]:
input_size = 9  
hidden_size = 8
output_size = 1
params = lstm_init(input_size, hidden_size, output_size)

train_losses3, test_losses3,outputs3 = train_model(lstm_forward, params, train_x,train_y, test_x,test_y,32,epochs=20, lr=0.001)
print(outputs3[:5])

In [None]:
plot_losses(train_losses1, train_losses2, train_losses3,base_loss,"Training Errors")
plot_losses(test_losses1, test_losses2, test_losses3,base_loss,"Testing Errors")