In [1]:
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm
import torch
print("Using torch", torch.__version__)
import seaborn as sns
import torchvision.datasets as datasets
from torchvision import transforms
sns.set()
torch.manual_seed(42) # Setting the seed
import torch.nn.functional as F
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, Dataset

Using torch 2.1.2+cu118


In [2]:
from ac_dll_grammar_vae import print_contact_info
from ac_dll_grammar_vae.data import CFGEquationDataset
from ac_dll_grammar_vae.data.alphabet import alphabet
from ac_dll_grammar_vae.data.transforms import MathTokenEmbedding, RuleTokenEmbedding, ToTensor, Compose, PadSequencesToSameLengthV2, OneHotEncode
from train import *
from eval import *
from visualize import *

Device cuda
Device cuda
Device cuda


In [3]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print("Device", device)

# GPU operations have a separate seed we also want to set
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)

# Additionally, some operations on a GPU are implemented stochastic for efficiency
# We want to ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
#device = "cpu"

Device cuda


## Character VAE

### Character Embedding:
+ Using Math Token Embedding Class

{'+': 1,
 '-': 2,
 '3': 3,
 'sqrt': 4,
 'exp': 5,
 '(': 6,
 'sin': 7,
 '/': 8,
 'x': 9,
 ')': 10,
 '*': 11,
 'log': 12,
 'cos': 13,
 '2': 14,
 '1': 15,
 ' ': 0}

In [4]:
emb = MathTokenEmbedding(alphabet=alphabet)
emb.token_to_idx

{'+': 1,
 '(': 2,
 '*': 3,
 'x': 4,
 '1': 5,
 '3': 6,
 'sin': 7,
 'sqrt': 8,
 ')': 9,
 'cos': 10,
 '-': 11,
 'log': 12,
 '/': 13,
 '2': 14,
 'exp': 15,
 ' ': 0}

In [5]:
#Example Data
data = CFGEquationDataset()
#Example Encoding:
print(f'Example: Data {data[42]}')
encoded_data = emb.embed(data[42])
print(f'Encoded Example: Data {encoded_data}')

Example: Data ['cos', '(', '(', '1', ')', ')', '+', '2']
Encoded Example: Data [10, 2, 2, 5, 9, 9, 1, 14]


+ Example: Data ['cos', '(', '(', '1', ')', ')', '+', '2']
+ Encoded Example: Data [13, 6, 6, 15, 10, 10, 1, 14]

### Creating the Training Dataset using CFG 
 + Class CFGEquationDataset is used to generate the equations

In [6]:
training = CFGEquationDataset(
        n_samples=100000,
        transform=Compose([
            MathTokenEmbedding(alphabet),
            ToTensor(dtype=torch.uint8)
        ]))

#Batch Size:
batch_size = 100
MAX_SEQ_LEN = 21
collate_fn = PadSequencesToSameLengthV2(padding_value=0, max_length=21)
training_loader = DataLoader(dataset=training,
                              batch_size=batch_size,
                              shuffle=True,
                              collate_fn=PadSequencesToSameLengthV2(padding_value=0, max_length=21))

#### Convert the Embedded equations into one hot encoded vectors

In [7]:
onehotencoder = OneHotEncode(alphabet)
one_hot_encoded_training = []
for batch in training_loader:
  try:
    one_hot_encoded_batch = onehotencoder(batch)
  except Exception as e:
    print(batch)
    continue
  one_hot_encoded_training.append(one_hot_encoded_batch.numpy())
one_hot_encoded_training = np.array(one_hot_encoded_training)
one_hot_encoded_training_tensor = torch.Tensor(one_hot_encoded_training)
one_hot_encoded_training_tensor = one_hot_encoded_training_tensor.view(one_hot_encoded_training_tensor.shape[0]*one_hot_encoded_training_tensor.shape[1],one_hot_encoded_training_tensor.shape[2],one_hot_encoded_training_tensor.shape[3])
one_hot_encoded_training_tensor.shape

torch.Size([100000, 21, 16])

### Creating final dataloader for model which is one-hot-encoded:
 + Setting Maximum equation length to 21

In [8]:
batch_size = 100
MAX_SEQ_LEN = 21
alphabet_length = len(alphabet) + 1
one_hot_encoded_training_loader = DataLoader(dataset=one_hot_encoded_training_tensor,
                              batch_size=batch_size,
                              shuffle=False)

### Creating the Test Dataset and Dataloader:

In [None]:
test_dataset = CFGEquationDataset(
        n_samples=1000,
        transform=Compose([
            MathTokenEmbedding(alphabet),
            ToTensor(dtype=torch.uint8)
        ]))
#Batch Size:
batch_size = 100
MAX_SEQ_LEN = 21
collate_fn = PadSequencesToSameLengthV2(padding_value=0, max_length=21)
test_loader = DataLoader(dataset=test_dataset,
                              batch_size=batch_size,
                              shuffle=True,
                              collate_fn=PadSequencesToSameLengthV2(padding_value=0, max_length=21))

In [None]:
one_hot_encoded_testing = []
for batch in test_loader:
  try:
    one_hot_encoded_batch = onehotencoder(batch)
  except Exception as e:
    print(batch)
    continue
  #print(one_hot_encoded_batch.shape)
  one_hot_encoded_testing.append(one_hot_encoded_batch.numpy())
one_hot_encoded_testing = np.array(one_hot_encoded_testing)
one_hot_encoded_testing_tensor = torch.Tensor(one_hot_encoded_testing)
one_hot_encoded_testing_tensor = one_hot_encoded_testing_tensor.view(one_hot_encoded_testing_tensor.shape[0]*one_hot_encoded_testing_tensor.shape[1],one_hot_encoded_testing_tensor.shape[2],one_hot_encoded_testing_tensor.shape[3])
print(one_hot_encoded_testing_tensor.shape)
one_hot_encoded_testing_loader = DataLoader(dataset=one_hot_encoded_testing_tensor,
                              batch_size=batch_size,
                              shuffle=False)

torch.Size([1000, 21, 16])


### Model Initialization for Character AE

In [13]:
from models import EqnAE
from train import train_AEmodel

In [14]:
mlflow.end_run()

In [15]:
import mlflow
from hyperparam_optim import HyperparameterOptimization
from tracking import MLFlowTracker
import itertools
from visualize import visualize_latent_space_Eqn

hyperParamOptimization = HyperparameterOptimization('./hyperparameters/params.yaml')
params = hyperParamOptimization.get_params()

#mlflow.create_experiment('Character AE Equation V1')
mlflow.set_experiment('Character AE Equation V1')


hyperparameter_combinations = itertools.product(params.M_LATENTDIMENSION, params.T_NUMEPOCHS, params.T_LR, params.T_BATCHSIZE, params.T_LOSS, params.T_OPTIMIZER, params.T_TRAINVALIDRATIO)
for combination in hyperparameter_combinations:
    mlflow.start_run()
    latent_dim, num_epochs, lr, batch_size, loss_function, optimizer_name, train_valid_ratio = combination

    # Initialize your model with the given latent dimension
    # Model Initialization
    alphabet_length = len(alphabet) + 1
    MAX_SEQ_LEN = 21
    model = EqnAE(alphabet_length,MAX_SEQ_LEN,latent_rep_size=latent_dim)
    model.to(device)
    #Loss
    if loss_function == 'BCE':
        loss_module = nn.BCELoss(reduction="sum")
    elif loss_function == 'MSE':
        loss_module = nn.MSELoss(reduction="sum")
    
    if optimizer_name == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    elif optimizer_name == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    elif optimizer_name == 'Adadelta':
        optimizer = torch.optim.Adadelta(model.parameters(), lr=lr)
    elif optimizer_name == 'Adagrad':
        optimizer = torch.optim.Adagrad(model.parameters(), lr=lr)
    elif optimizer_name == 'RMSprop':
        optimizer = torch.optim.RMSprop(model.parameters(),lr=lr)

    total_samples = one_hot_encoded_training_tensor.shape[0]
    valid_size = int(total_samples * train_valid_ratio)
    train_size = total_samples - valid_size

    train_tensor = one_hot_encoded_training_tensor[:train_size]
    valid_tensor = one_hot_encoded_training_tensor[train_size:]

    train_dataset = TensorDataset(train_tensor)
    val_dataset = TensorDataset(valid_tensor) 

    train_loader = DataLoader(dataset=train_tensor,
                              batch_size=batch_size,
                              shuffle=False)

    val_loader = DataLoader(dataset=valid_tensor,
                              batch_size=batch_size,
                              shuffle=False)

    # Log parameters to MLflow
    mlflow.log_params({
        "latent_dimension": latent_dim,
        "num_epochs": num_epochs,
        "learning_rate": lr,
        "batch_size": batch_size,
        "loss_function": loss_function,
        "optimizer": optimizer_name,
        "train_valid_ratio": train_valid_ratio
    })

    train_EqnAE(model, train_loader, val_loader, loss_module, optimizer, num_epochs=num_epochs)

    
    torch.save(model,f'./saved/models/EQN_AE_{latent_dim}_{batch_size}_{loss_function}_{lr}_{num_epochs}_{optimizer_name}_{train_valid_ratio}_Full.pth')
    torch.save(model.state_dict(),f'./saved/models/EQN_AE_{latent_dim}_{batch_size}_{loss_function}_{lr}_{num_epochs}_{optimizer_name}_{train_valid_ratio}_dict.pth')

    # Save the model
    mlflow.pytorch.log_model(model, "model")

    test_tensor = one_hot_encoded_testing_tensor
    test_dataset = TensorDataset(test_tensor)
    test_loader = DataLoader(dataset=test_tensor,
                              batch_size=batch_size,
                              shuffle=False)             
    model.to(device)
    visualize_latent_space_Eqn(model, test_loader,False)
    mlflow.log_artifact("./plots/LatentSpace_plot.png", artifact_path="plots")

    one_hot_decoded = []
    one_hot_decoded_recon = []
    for sample in test_loader:
        model.eval()
        sample = sample.float().to(device)
        recon = model(sample)
        for idx,ele in enumerate(recon):
            max_indices = torch.argmax(ele, dim=1)
            one_hot = torch.zeros_like(ele) 
            one_hot[torch.arange(ele.size(0)), max_indices] = 1
            embd = torch.argmax(one_hot, dim=1)
            one_hot_decoded.append(emb.decode(torch.argmax(sample[idx], dim=1)))
            one_hot_decoded_recon.append(emb.decode(embd))
            #break

        output_filename = f"./output/output_{latent_dim}_{batch_size}_{loss_function}_{lr}_{num_epochs}_{optimizer_name}_{train_valid_ratio}.txt"
        with open(output_filename, "w") as file:
            for idx, ele in enumerate(one_hot_decoded_recon[:1000]):
                actual_equation = ''.join(one_hot_decoded[idx])
                decoded_equation = ''.join(one_hot_decoded_recon[idx])
                file.write(f'Actual Equation: {actual_equation}\n')
                file.write(f'AE Decoded Equation: {decoded_equation}\n\n')

        # Log the file as an artifact in MLflow
        mlflow.log_artifact(output_filename, artifact_path="outputs")

    # End MLflow run
    mlflow.end_run()

====> Epoch: 0 Average Training loss: 70.77181650
====> Epoch: 0 Average Validation loss: 69.16035659
====> Epoch: 1 Average Training loss: 68.53184343
====> Epoch: 1 Average Validation loss: 68.13218423
====> Epoch: 2 Average Training loss: 67.67414947
====> Epoch: 2 Average Validation loss: 67.55131982
====> Epoch: 3 Average Training loss: 67.20789350
====> Epoch: 3 Average Validation loss: 67.20708716
====> Epoch: 4 Average Training loss: 66.87045394
====> Epoch: 4 Average Validation loss: 66.81307749
====> Epoch: 5 Average Training loss: 66.61108429
====> Epoch: 5 Average Validation loss: 66.67391694
====> Epoch: 6 Average Training loss: 66.37322133
====> Epoch: 6 Average Validation loss: 66.44243198
====> Epoch: 7 Average Training loss: 66.19692827
====> Epoch: 7 Average Validation loss: 66.26117349
====> Epoch: 8 Average Training loss: 66.03364886
====> Epoch: 8 Average Validation loss: 66.13133955
====> Epoch: 9 Average Training loss: 65.91795974
====> Epoch: 9 Average Validatio



====> Epoch: 14 Average Validation loss: 65.70504473




(1000, 10)
====> Epoch: 0 Average Training loss: 72.29415484
====> Epoch: 0 Average Validation loss: 71.68044658
====> Epoch: 1 Average Training loss: 70.61529671
====> Epoch: 1 Average Validation loss: 70.28687285
====> Epoch: 2 Average Training loss: 69.62725624
====> Epoch: 2 Average Validation loss: 69.36927588
====> Epoch: 3 Average Training loss: 69.00852592
====> Epoch: 3 Average Validation loss: 69.00159707
====> Epoch: 4 Average Training loss: 68.05498070
====> Epoch: 4 Average Validation loss: 68.29976904
====> Epoch: 5 Average Training loss: 67.47255576
====> Epoch: 5 Average Validation loss: 67.45084902
====> Epoch: 6 Average Training loss: 67.21568488
====> Epoch: 6 Average Validation loss: 67.26053916
====> Epoch: 7 Average Training loss: 67.00963681
====> Epoch: 7 Average Validation loss: 67.03398184
====> Epoch: 8 Average Training loss: 66.84408646
====> Epoch: 8 Average Validation loss: 66.89670449
====> Epoch: 9 Average Training loss: 66.67709169
====> Epoch: 9 Averag



====> Epoch: 14 Average Validation loss: 66.33570889




(1000, 10)
====> Epoch: 0 Average Training loss: 70.43864831
====> Epoch: 0 Average Validation loss: 68.71134014
====> Epoch: 1 Average Training loss: 68.03017476
====> Epoch: 1 Average Validation loss: 67.62979146
====> Epoch: 2 Average Training loss: 67.18655681
====> Epoch: 2 Average Validation loss: 67.11268047
====> Epoch: 3 Average Training loss: 66.69171313
====> Epoch: 3 Average Validation loss: 67.24404849
====> Epoch: 4 Average Training loss: 66.41308485
====> Epoch: 4 Average Validation loss: 66.46257402
====> Epoch: 5 Average Training loss: 66.12420164
====> Epoch: 5 Average Validation loss: 66.27077388
====> Epoch: 6 Average Training loss: 65.89856599
====> Epoch: 6 Average Validation loss: 66.01782075
====> Epoch: 7 Average Training loss: 65.79825651
====> Epoch: 7 Average Validation loss: 66.00281406
====> Epoch: 8 Average Training loss: 65.54786136
====> Epoch: 8 Average Validation loss: 65.72607871
====> Epoch: 9 Average Training loss: 65.40284877
====> Epoch: 9 Averag



====> Epoch: 14 Average Validation loss: 65.11451030




(1000, 25)
====> Epoch: 0 Average Training loss: 72.37756420
====> Epoch: 0 Average Validation loss: 71.47946104
====> Epoch: 1 Average Training loss: 70.82036498
====> Epoch: 1 Average Validation loss: 70.49105273
====> Epoch: 2 Average Training loss: 70.05188915
====> Epoch: 2 Average Validation loss: 69.96459355
====> Epoch: 3 Average Training loss: 69.63948443
====> Epoch: 3 Average Validation loss: 69.57502627
====> Epoch: 4 Average Training loss: 69.25108210
====> Epoch: 4 Average Validation loss: 69.26501191
====> Epoch: 5 Average Training loss: 68.91218867
====> Epoch: 5 Average Validation loss: 69.04093613
====> Epoch: 6 Average Training loss: 68.63095239
====> Epoch: 6 Average Validation loss: 68.70655322
====> Epoch: 7 Average Training loss: 67.87415828
====> Epoch: 7 Average Validation loss: 67.44739648
====> Epoch: 8 Average Training loss: 67.13601527
====> Epoch: 8 Average Validation loss: 67.17347832
====> Epoch: 9 Average Training loss: 66.89234175
====> Epoch: 9 Averag



====> Epoch: 14 Average Validation loss: 66.39101328




(1000, 25)
====> Epoch: 0 Average Training loss: 70.45414444
====> Epoch: 0 Average Validation loss: 68.64343037
====> Epoch: 1 Average Training loss: 67.90581362
====> Epoch: 1 Average Validation loss: 67.53840703
====> Epoch: 2 Average Training loss: 67.09022186
====> Epoch: 2 Average Validation loss: 67.10429668
====> Epoch: 3 Average Training loss: 66.64288443
====> Epoch: 3 Average Validation loss: 66.74046079
====> Epoch: 4 Average Training loss: 66.32134214
====> Epoch: 4 Average Validation loss: 66.53101572
====> Epoch: 5 Average Training loss: 66.06320699
====> Epoch: 5 Average Validation loss: 66.35591216
====> Epoch: 6 Average Training loss: 65.85816267
====> Epoch: 6 Average Validation loss: 66.00303564
====> Epoch: 7 Average Training loss: 65.67679004
====> Epoch: 7 Average Validation loss: 65.92229624
====> Epoch: 8 Average Training loss: 65.46291150
====> Epoch: 8 Average Validation loss: 65.70338423
====> Epoch: 9 Average Training loss: 65.32175934
====> Epoch: 9 Averag



====> Epoch: 14 Average Validation loss: 65.09967095




(1000, 50)
====> Epoch: 0 Average Training loss: 71.26241223
====> Epoch: 0 Average Validation loss: 69.70701406
====> Epoch: 1 Average Training loss: 68.67698563
====> Epoch: 1 Average Validation loss: 68.16436973
====> Epoch: 2 Average Training loss: 67.69446395
====> Epoch: 2 Average Validation loss: 67.52249775
====> Epoch: 3 Average Training loss: 67.12695942
====> Epoch: 3 Average Validation loss: 67.08659102
====> Epoch: 4 Average Training loss: 66.72908318
====> Epoch: 4 Average Validation loss: 66.82385811
====> Epoch: 5 Average Training loss: 66.45163842
====> Epoch: 5 Average Validation loss: 66.53947559
====> Epoch: 6 Average Training loss: 66.19764998
====> Epoch: 6 Average Validation loss: 66.45412295
====> Epoch: 7 Average Training loss: 65.99848103
====> Epoch: 7 Average Validation loss: 66.21177842
====> Epoch: 8 Average Training loss: 65.79878806
====> Epoch: 8 Average Validation loss: 66.05497803
====> Epoch: 9 Average Training loss: 65.65715591
====> Epoch: 9 Averag



====> Epoch: 14 Average Validation loss: 65.44846514




(1000, 50)


### Understanding and visualizing some outputs:
+ One hot decode into embedding and then use idxtotoken to convert to equations:

### Character VAE Model

In [11]:
from models import EqnVAE
from train import train_EqnVAE

### Model Initialization and Hyperparameter Optimization for Character VAE

In [14]:
mlflow.end_run()

In [15]:
import mlflow
from hyperparam_optim import HyperparameterOptimization
from tracking import MLFlowTracker
import itertools
from visualize import visualize_latent_space_Eqn
import gc
import time

hyperParamOptimization = HyperparameterOptimization('./hyperparameters/params.yaml')
params = hyperParamOptimization.get_params()

#mlflow.create_experiment('Character VAE Equation V2')
mlflow.set_experiment('Character VAE Equation V2')

hyperparameter_combinations = itertools.product(params.M_LATENTDIMENSION, params.T_NUMEPOCHS, params.T_LR, params.T_BATCHSIZE, params.T_LOSS, params.T_OPTIMIZER, params.T_TRAINVALIDRATIO)
for i,combination in enumerate(hyperparameter_combinations):
    # if i <= 1:
    #     continue
    mlflow.start_run()
    latent_dim, num_epochs, lr, batch_size, loss_function, optimizer_name, train_valid_ratio = combination

    # Initialize your model with the given latent dimension
    # Model Initialization
    alphabet_length = len(alphabet) + 1
    MAX_SEQ_LEN = 21
    model = None
    model = EqnVAE(alphabet_length,MAX_SEQ_LEN,latent_rep_size=latent_dim,recon_loss=loss_function)
    model.to(device)
    #Loss
    vaeLoss = None
    vaeLoss = model.vae_loss
    optimizer = None
    if optimizer_name == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    elif optimizer_name == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    elif optimizer_name == 'Adadelta':
        optimizer = torch.optim.Adadelta(model.parameters(), lr=lr)
    elif optimizer_name == 'Adagrad':
        optimizer = torch.optim.Adagrad(model.parameters(), lr=lr)
    elif optimizer_name == 'RMSprop':
        optimizer = torch.optim.RMSprop(model.parameters(),lr=lr)

    total_samples = one_hot_encoded_training_tensor.shape[0]
    valid_size = int(total_samples * train_valid_ratio)
    train_size = total_samples - valid_size

    train_tensor = one_hot_encoded_training_tensor[:train_size].clone().to(device)
    valid_tensor = one_hot_encoded_training_tensor[train_size:].clone().to(device)
    train_loader, val_loade = None, None
    train_loader = DataLoader(dataset=train_tensor,
                              batch_size=batch_size,
                              shuffle=False)

    val_loader = DataLoader(dataset=valid_tensor,
                              batch_size=batch_size,
                              shuffle=False)

    # Log parameters to MLflow
    mlflow.log_params({
        "latent_dimension": latent_dim,
        "num_epochs": num_epochs,
        "learning_rate": lr,
        "batch_size": batch_size,
        "loss_function": loss_function,
        "optimizer": optimizer_name,
        "train_valid_ratio": train_valid_ratio
    })

    train_EqnVAE(model, train_loader, val_loader, vaeLoss, optimizer, num_epochs=num_epochs)

    
    torch.save(model,f'./saved/models/EQN_VAE_{latent_dim}_{batch_size}_{loss_function}_{lr}_{num_epochs}_{optimizer_name}_{train_valid_ratio}_Full.pth')
    torch.save(model.state_dict(),f'./saved/models/EQN_VAE_{latent_dim}_{batch_size}_{loss_function}_{lr}_{num_epochs}_{optimizer_name}_{train_valid_ratio}_dict.pth')

    # Save the model
    mlflow.pytorch.log_model(model, "model")

    test_tensor = one_hot_encoded_testing_tensor.clone().to(device)
    test_dataset = TensorDataset(test_tensor)
    test_loader = DataLoader(dataset=test_tensor,
                              batch_size=batch_size,
                              shuffle=False)             
    model.to(device)
    visualize_latent_space_Eqn(model, test_loader,True)
    mlflow.log_artifact("./plots/LatentSpace_plot.png", artifact_path="plots")

    one_hot_decoded = []
    one_hot_decoded_recon = []
    for sample in test_loader:
        model.eval()
        sample = sample.float().to(device)
        recon, _, _ = model(sample)
        for idx,ele in enumerate(recon):
            max_indices = torch.argmax(ele, dim=1)
            one_hot = torch.zeros_like(ele) 
            one_hot[torch.arange(ele.size(0)), max_indices] = 1
            embd = torch.argmax(one_hot, dim=1)
            one_hot_decoded.append(emb.decode(torch.argmax(sample[idx], dim=1)))
            one_hot_decoded_recon.append(emb.decode(embd))
            #break

        output_filename = f"./output/output_{latent_dim}_{batch_size}_{loss_function}_{lr}_{num_epochs}_{optimizer_name}_{train_valid_ratio}.txt"
        with open(output_filename, "w") as file:
            for idx, ele in enumerate(one_hot_decoded_recon[:1000]):
                actual_equation = ''.join(one_hot_decoded[idx])
                decoded_equation = ''.join(one_hot_decoded_recon[idx])
                file.write(f'Actual Equation: {actual_equation}\n')
                file.write(f'VAE Decoded Equation: {decoded_equation}\n\n')

        # Log the file as an artifact in MLflow
        mlflow.log_artifact(output_filename, artifact_path="outputs")

    
    # Clear memory
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()  # Clear cached memory
        torch.cuda.manual_seed_all(42)  # Re-seed

    # Wait for 5-10 seconds before the next run
    
    # End MLflow run
    mlflow.end_run()
    time.sleep(5)  # Sleep for 5 seconds


====> Epoch: 0 Average Training loss: 1511.38888993
====> Epoch: 0 Average Validation loss: 1493.05335156
====> Epoch: 1 Average Training loss: 1484.67769792
====> Epoch: 1 Average Validation loss: 1479.08861719
====> Epoch: 2 Average Training loss: 1464.33134740
====> Epoch: 2 Average Validation loss: 1447.77856719
====> Epoch: 3 Average Training loss: 1440.97789358
====> Epoch: 3 Average Validation loss: 1437.83442969
====> Epoch: 4 Average Training loss: 1432.01184878
====> Epoch: 4 Average Validation loss: 1430.78165469
====> Epoch: 5 Average Training loss: 1425.38336858
====> Epoch: 5 Average Validation loss: 1424.74794688
====> Epoch: 6 Average Training loss: 1420.55318559
====> Epoch: 6 Average Validation loss: 1422.54631406
====> Epoch: 7 Average Training loss: 1416.18032101
====> Epoch: 7 Average Validation loss: 1418.36767656
====> Epoch: 8 Average Training loss: 1412.38814896
====> Epoch: 8 Average Validation loss: 1413.80091563
====> Epoch: 9 Average Training loss: 1409.475



====> Epoch: 14 Average Validation loss: 1395.91384844




(1000, 10)
====> Epoch: 0 Average Training loss: 1527.13772118
====> Epoch: 0 Average Validation loss: 1503.55567500
====> Epoch: 1 Average Training loss: 1492.60958368
====> Epoch: 1 Average Validation loss: 1484.62061875
====> Epoch: 2 Average Training loss: 1480.64010243
====> Epoch: 2 Average Validation loss: 1475.80614375
====> Epoch: 3 Average Training loss: 1470.38492708
====> Epoch: 3 Average Validation loss: 1463.65347500
====> Epoch: 4 Average Training loss: 1453.28304375
====> Epoch: 4 Average Validation loss: 1448.62960000
====> Epoch: 5 Average Training loss: 1437.58108507
====> Epoch: 5 Average Validation loss: 1435.78717500
====> Epoch: 6 Average Training loss: 1430.59317257
====> Epoch: 6 Average Validation loss: 1438.55752500
====> Epoch: 7 Average Training loss: 1425.50766181
====> Epoch: 7 Average Validation loss: 1427.79640625
====> Epoch: 8 Average Training loss: 1421.25277708
====> Epoch: 8 Average Validation loss: 1422.97734375
====> Epoch: 9 Average Training los



====> Epoch: 14 Average Validation loss: 1404.08303750




(1000, 10)
====> Epoch: 0 Average Training loss: 1491.41427604
====> Epoch: 0 Average Validation loss: 1452.54155000
====> Epoch: 1 Average Training loss: 1441.37228715
====> Epoch: 1 Average Validation loss: 1436.66017500
====> Epoch: 2 Average Training loss: 1427.61686042
====> Epoch: 2 Average Validation loss: 1427.96544531
====> Epoch: 3 Average Training loss: 1420.80996163
====> Epoch: 3 Average Validation loss: 1420.84968594
====> Epoch: 4 Average Training loss: 1413.54146701
====> Epoch: 4 Average Validation loss: 1418.45469375
====> Epoch: 5 Average Training loss: 1411.55951146
====> Epoch: 5 Average Validation loss: 1411.22021094
====> Epoch: 6 Average Training loss: 1406.19152448
====> Epoch: 6 Average Validation loss: 1406.13718281
====> Epoch: 7 Average Training loss: 1400.40074583
====> Epoch: 7 Average Validation loss: 1401.24834844
====> Epoch: 8 Average Training loss: 1394.76494167
====> Epoch: 8 Average Validation loss: 1396.49165313
====> Epoch: 9 Average Training los



====> Epoch: 14 Average Validation loss: 1382.16915937




(1000, 25)
====> Epoch: 0 Average Training loss: 1518.86048125
====> Epoch: 0 Average Validation loss: 1485.71015625
====> Epoch: 1 Average Training loss: 1464.44097396
====> Epoch: 1 Average Validation loss: 1459.98221562
====> Epoch: 2 Average Training loss: 1441.16833646
====> Epoch: 2 Average Validation loss: 1439.43776875
====> Epoch: 3 Average Training loss: 1430.66247361
====> Epoch: 3 Average Validation loss: 1432.60267813
====> Epoch: 4 Average Training loss: 1424.19275764
====> Epoch: 4 Average Validation loss: 1426.05798437
====> Epoch: 5 Average Training loss: 1418.86207222
====> Epoch: 5 Average Validation loss: 1420.15964687
====> Epoch: 6 Average Training loss: 1413.58630625
====> Epoch: 6 Average Validation loss: 1416.11780000
====> Epoch: 7 Average Training loss: 1408.56439583
====> Epoch: 7 Average Validation loss: 1409.74767187
====> Epoch: 8 Average Training loss: 1403.68623160
====> Epoch: 8 Average Validation loss: 1405.28164687
====> Epoch: 9 Average Training los



====> Epoch: 14 Average Validation loss: 1389.72922812




(1000, 25)
====> Epoch: 0 Average Training loss: 1498.08776528
====> Epoch: 0 Average Validation loss: 1467.26437812
====> Epoch: 1 Average Training loss: 1451.16631076
====> Epoch: 1 Average Validation loss: 1444.28618437
====> Epoch: 2 Average Training loss: 1436.02894566
====> Epoch: 2 Average Validation loss: 1432.46808594
====> Epoch: 3 Average Training loss: 1427.01081597
====> Epoch: 3 Average Validation loss: 1427.63687656
====> Epoch: 4 Average Training loss: 1419.88561962
====> Epoch: 4 Average Validation loss: 1422.10073125
====> Epoch: 5 Average Training loss: 1413.94719288
====> Epoch: 5 Average Validation loss: 1415.26866563
====> Epoch: 6 Average Training loss: 1409.57350417
====> Epoch: 6 Average Validation loss: 1410.66543594
====> Epoch: 7 Average Training loss: 1404.24584045
====> Epoch: 7 Average Validation loss: 1405.11311094
====> Epoch: 8 Average Training loss: 1398.61931910
====> Epoch: 8 Average Validation loss: 1399.91033125
====> Epoch: 9 Average Training los



====> Epoch: 14 Average Validation loss: 1385.23392500




(1000, 50)
====> Epoch: 0 Average Training loss: 1510.03329653
====> Epoch: 0 Average Validation loss: 1481.05089375
====> Epoch: 1 Average Training loss: 1459.86874896
====> Epoch: 1 Average Validation loss: 1458.56119375
====> Epoch: 2 Average Training loss: 1448.12656632
====> Epoch: 2 Average Validation loss: 1451.78588750
====> Epoch: 3 Average Training loss: 1441.82169479
====> Epoch: 3 Average Validation loss: 1444.50431875
====> Epoch: 4 Average Training loss: 1435.00737604
====> Epoch: 4 Average Validation loss: 1437.10038437
====> Epoch: 5 Average Training loss: 1429.89226562
====> Epoch: 5 Average Validation loss: 1431.08982812
====> Epoch: 6 Average Training loss: 1425.16328299
====> Epoch: 6 Average Validation loss: 1427.43260313
====> Epoch: 7 Average Training loss: 1420.23893854
====> Epoch: 7 Average Validation loss: 1421.92926250
====> Epoch: 8 Average Training loss: 1416.33798472
====> Epoch: 8 Average Validation loss: 1419.99503750
====> Epoch: 9 Average Training los



====> Epoch: 14 Average Validation loss: 1400.67619375




(1000, 50)


## Grammar VAE

### Embed Equations to Rule

In [4]:
#Rules Embedding
data = CFGEquationDataset()
cfg = data.get_grammar()
emb = RuleTokenEmbedding(cfg=cfg,one_hot_encode=True)
emb.rule_to_idx

{"S -> S '+' T": 0,
 "S -> S '*' T": 1,
 "S -> S '/' T": 2,
 "S -> S '-' T": 3,
 'S -> T': 4,
 "T -> '(' S ')'": 5,
 "T -> 'sin' '(' S ')'": 6,
 "T -> 'exp' '(' S ')'": 7,
 "T -> 'cos' '(' S ')'": 8,
 "T -> 'sqrt' '(' S ')'": 9,
 "T -> 'log' '(' S ')'": 10,
 "T -> 'x'": 11,
 "T -> '1'": 12,
 "T -> '2'": 13,
 "T -> '3'": 14,
 "Nothing -> 'None'": 15}

### Intializing Grammar Masks and Mask Index for Introducing into Model

In [5]:
import nltk
# Parsing the grammar to create masks and indices
all_lhs = [a.lhs().symbol() for a in cfg.productions()]
lhs_list = []
for a in all_lhs:
    if a not in lhs_list:
        lhs_list.append(a)
D = len(cfg.productions())

rhs_map = [None] * D
for i, production in enumerate(cfg.productions()):
    rhs_map[i] = [lhs_list.index(b.symbol()) for b in production.rhs() if (isinstance(b, nltk.Nonterminal) and b.symbol()!='None')]

print(rhs_map)

masks = np.zeros((len(lhs_list), D))
for i, lhs in enumerate(lhs_list):
    masks[i] = [lhs == production.lhs().symbol() for production in cfg.productions()]

print(masks)
ind_of_ind = np.array([np.where(masks[:, i] == 1)[0][0] for i in range(masks.shape[1])])
print(ind_of_ind)
# Convert numpy arrays to torch tensors
masks_tensor = torch.from_numpy(masks).float()
ind_of_ind_tensor = torch.from_numpy(ind_of_ind).long()

[[0, 1], [0, 1], [0, 1], [0, 1], [1], [0], [0], [0], [0], [0], [0], [], [], [], [], []]
[[1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]]
[0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 2]


#### Example Encoding of an equation

In [6]:
eqn = ['sqrt','(','log','(','x','*','cos','(','2',')',')',')','*','x']
encoded_gram_eqn = emb.embed(eqn)
encoded_gram_eqn


[array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0.]),
 array([0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0.]),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]),
 array([0., 0., 0., 0., 0., 0., 0., 0.

### Grammar VAE Initialization

In [7]:
num_rules = len(cfg.productions())
num_rules

16

In [8]:
from models import EqnGVAE
from train import train_EqnGVAE

Device cuda


### Data Preparation for GVAE

In [9]:
training_val_gvae_dataset = CFGEquationDataset(
        n_samples=100000,
        transform=Compose([
            RuleTokenEmbedding(cfg,max_num_rules=16,one_hot_encode=True),
            ToTensor(dtype=torch.uint8)
        ]))

#Batch Size:
batch_size = 100
MAX_PROD_LEN = 16

test_gvae_dataset = CFGEquationDataset(
        n_samples=1000,
        transform=Compose([
            RuleTokenEmbedding(cfg,max_num_rules=16,one_hot_encode=True),
            ToTensor(dtype=torch.uint8)
        ]))


#### Training of GVAE

In [10]:
mlflow.end_run()

In [11]:
import mlflow
from hyperparam_optim import HyperparameterOptimization
from tracking import MLFlowTracker
import itertools
from visualize import visualize_latent_space_Eqn
import gc
import time
from torch.utils.data import random_split

hyperParamOptimization = HyperparameterOptimization('./hyperparameters/params.yaml')
params = hyperParamOptimization.get_params()

#mlflow.create_experiment('Character VAE Equation V2')
mlflow.set_experiment('Grammar VAE Equation V1')

hyperparameter_combinations = itertools.product(params.M_LATENTDIMENSION, params.T_NUMEPOCHS, params.T_LR, params.T_BATCHSIZE, params.T_LOSS, params.T_OPTIMIZER, params.T_TRAINVALIDRATIO)
for i,combination in enumerate(hyperparameter_combinations):
    # if i <= 1:
    #     continue
    mlflow.start_run()
    latent_dim, num_epochs, lr, batch_size, loss_function, optimizer_name, train_valid_ratio = combination

    # Initialize your model with the given latent dimension
    # Model Initialization
    model = None
    model = EqnGVAE(num_rules,MAX_PROD_LEN,masks_tensor=masks_tensor,ind_of_masks=ind_of_ind_tensor)
    model.to(device)
    #Loss
    vaeLoss = None
    vaeLoss = model.vae_loss
    optimizer = None
    if optimizer_name == 'Adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    elif optimizer_name == 'SGD':
        optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    elif optimizer_name == 'Adadelta':
        optimizer = torch.optim.Adadelta(model.parameters(), lr=lr)
    elif optimizer_name == 'Adagrad':
        optimizer = torch.optim.Adagrad(model.parameters(), lr=lr)
    elif optimizer_name == 'RMSprop':
        optimizer = torch.optim.RMSprop(model.parameters(),lr=lr)



    total_samples = len(training_val_gvae_dataset)
    valid_size = int(total_samples * train_valid_ratio)
    train_size = total_samples - valid_size

    train_dataset, valid_dataset = random_split(training_val_gvae_dataset, [train_size, valid_size])
    train_loader, val_loader = None, None
    train_loader = DataLoader(dataset=train_dataset,
                              batch_size=batch_size,
                              shuffle=True)

    val_loader = DataLoader(dataset=valid_dataset,
                              batch_size=batch_size,
                              shuffle=True)

    # Log parameters to MLflow
    mlflow.log_params({
        "latent_dimension": latent_dim,
        "num_epochs": num_epochs,
        "learning_rate": lr,
        "batch_size": batch_size,
        "loss_function": loss_function,
        "optimizer": optimizer_name,
        "train_valid_ratio": train_valid_ratio
    })

    train_EqnGVAE(model, train_loader, val_loader, vaeLoss, optimizer, num_epochs=num_epochs)

    
    torch.save(model,f'./saved/models/EQN_GVAE_{latent_dim}_{batch_size}_{loss_function}_{lr}_{num_epochs}_{optimizer_name}_{train_valid_ratio}_Full.pth')
    torch.save(model.state_dict(),f'./saved/models/EQN_GVAE_{latent_dim}_{batch_size}_{loss_function}_{lr}_{num_epochs}_{optimizer_name}_{train_valid_ratio}_dict.pth')

    # Save the model
    mlflow.pytorch.log_model(model, "model")

    test_dataset = test_gvae_dataset
    test_loader = DataLoader(dataset=test_dataset,
                              batch_size=batch_size,
                              shuffle=False)             
    model.to(device)
    visualize_latent_space_EqnGVAE(model,test_loader,max_num_rules=16,cfg=cfg,vae=True)
    mlflow.log_artifact("./plots/LatentSpace_plot.png", artifact_path="plots")

    one_hot_decoded = []
    one_hot_decoded_recon = []
    for sample in test_loader:
        model.eval()
        sample = sample.float().to(device)
        recon, _, _ = model(sample)
        for idx,ele in enumerate(recon):
            one_hot_decoded.append(emb.decode(sample.cpu().numpy()))
            one_hot_decoded_recon.append(emb.decode(recon[i].cpu().detach().numpy()))
            #break

        output_filename = f"./output/output_{latent_dim}_{batch_size}_{loss_function}_{lr}_{num_epochs}_{optimizer_name}_{train_valid_ratio}.txt"
        with open(output_filename, "w") as file:
            for idx, ele in enumerate(one_hot_decoded_recon):
                actual_equation = ''.join(one_hot_decoded[idx])
                decoded_equation = ''.join(one_hot_decoded_recon[idx])
                file.write(f'Actual Equation: {actual_equation}\n')
                file.write(f'GVAE Decoded Equation: {decoded_equation}\n\n')

        # Log the file as an artifact in MLflow
        mlflow.log_artifact(output_filename, artifact_path="outputs")

    
    # Clear memory
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()  # Clear cached memory
        torch.cuda.manual_seed_all(42)  # Re-seed

    # Wait for 5-10 seconds before the next run
    
    # End MLflow run
    mlflow.end_run()
    time.sleep(5)  # Sleep for 5 seconds

  return torch.tensor(x, dtype=self.dtype)
