In [43]:
from typing import Tuple

In [441]:
import torch
from torch_geometric.data import InMemoryDataset, Data, Batch
from torch.utils.data import DataLoader, Subset
from sklearn.calibration import LabelEncoder
from generate_dataset import generate_dataset
import json 

Constants

In [442]:
GENERATED_DATASET_SIZE = 300
BATCH_SIZE = 32
TRAIN_RATIO = 0.6
VAL_RATIO = 0.2

In [443]:

def dict_to_geometric_representation(in_graph_dict: dict, encoder) -> Data:
    node_list = []
    edge_mappings = []
    def traverse_graph(graph = in_graph_dict):
        nonlocal node_list
        nonlocal edge_mappings
        curr_node_index = len(node_list)
        encoded_data = encoder({"type": graph["type"], "subtype": graph["subtype"], "value": graph["value"]})
        # print("Encoded data: ", encoded_data)
        node_list.append(encoded_data)
        if "children" in graph.keys():
            for child in graph["children"]:
                edge_mappings.append((curr_node_index, traverse_graph(child)) ) #I'm retarded.
        return curr_node_index
    traverse_graph()
    nodes = torch.tensor(node_list,dtype=torch.float32)
    edges = torch.tensor([[x[0] for x in edge_mappings], [x[1] for x in edge_mappings]], dtype=torch.long) # Probably slow and mentally degenerated
    geom_data = Data(x=nodes, edge_index=edges)
    return geom_data

In [467]:
TYPES = ["FUNC", "OPERATION", "POW", "LITERAL", "VARIABLE", "CONSTANT_LITERAL"]
OPERATIONS = ["ADD", "MUL", "FUNC", "POW"]
FUNCTIONS = ["SIN", "COS", "TAN", "EXP", "LOG", "SINH", "COSH"]
VARIABLE_ALPHABET = [chr(x) for x in range(ord('a'), ord('z')+1) if chr(x) not in ["e", "i"]]
CLASSIC_CONSTANTS = ["PI", "I", "g","e", "zoo"]


In [468]:

from math import nan


def make_node_attribute_encoder(label_encoder:LabelEncoder, rep = 4):
    def node_attr_encoder(attr):
        type_encoding = label_encoder.transform([attr["type"]]) if attr["type"] else [-1]
        subtype_encoding = label_encoder.transform([attr["subtype"]]) if attr["subtype"] else [-1]
        value_encoding_vec = None
        if attr["value"]:
            if isinstance(attr["value"], str):
                value_encoding = label_encoder.transform([attr["value"]])
                value_encoding_vec = [(0 if i % 2 == 0 else value_encoding[0]) for i in range(rep)]
            else: 
                # big num correction: 
                val = float(attr["value"])
                if val > 100000:
                    val = -41
                elif val == nan:
                    val = -51
                value_encoding_vec = [(0 if i % 2 == 1 else val) for i in range(rep)]
                
        else:
            value_encoding = [-1]
            value_encoding_vec = value_encoding*rep 
        
        type_encoding_vec = [type_encoding[0]] * rep
        subtype_encoding_vec = [subtype_encoding[0]] * rep       
        
        return type_encoding_vec + subtype_encoding_vec + value_encoding_vec
        # if isinstance(attr, str) and attr in OPERATIONS+FUNCTIONS+CLASSIC_CONSTANTS+VARIABLE_ALPHABET:
        #     res = label_encoder.transform([attr])
        #     return [res[0]]*(rep + 1)
        # else:
        #     return [0] + [float(attr)]*rep
        
            
    return node_attr_encoder

In [469]:
def create_dataset_class(expression):
    # Will it be the same for both datasets ? 
    le = LabelEncoder()
    le.fit(OPERATIONS+FUNCTIONS+CLASSIC_CONSTANTS+VARIABLE_ALPHABET+TYPES)
    class MathExpressionDataset(InMemoryDataset):
        def __init__(self, root, transform=None, pre_transform=None, pre_filter=None):
            super().__init__(root, transform, pre_transform, pre_filter, force_reload=True)
            self.load(self.processed_paths[0])
            
        @property
        def raw_file_names(self):
            return ['math_datagen.json']

        @property
        def processed_file_names(self):
            return ['data.pt']
        

        def process(self):
            # Read data into huge `Data` list.
            data_list = []
            for file in self.raw_file_names:
                with open(file) as file_handle:
                    object_data = json.load(file_handle)
                    for comparison in object_data:
                        # print("Comparison before : ",comparison)
                        # LEGACY : Remove first case once fixed bug in datagen
                        if isinstance(comparison,list):
                            expr = comparison[0 if expression == "expr_l" else 1]
                            score = comparison[2]
                            geometric_expr = dict_to_geometric_representation(expr, make_node_attribute_encoder(le))
                            geometric_expr.y = score #torch.tensor([score],dtype=torch.float32)
                            data_list.append(geometric_expr)
                        else:
                            expr = comparison[expression]
                            score = comparison["score"]
                            geometric_expr = dict_to_geometric_representation(expr, make_node_attribute_encoder(le))
                            geometric_expr.y = score #torch.tensor([score],dtype=torch.float32)
                            data_list.append(geometric_expr)
                        
            if self.pre_filter is not None:
                data_list = [data for data in data_list if self.pre_filter(data)]

            if self.pre_transform is not None:
                data_list = [self.pre_transform(data) for data in data_list]
            self.save(data_list, self.processed_paths[0])
    return MathExpressionDataset
    

In [470]:
class ExpressionPairDataset(torch.utils.data.Dataset):
    def __init__(self, root, transform=None, pre_transform=None, pre_filter=None):
        super().__init__()
        self.dataset_l = create_dataset_class("expr_l")(root+"_l",transform=None, pre_transform=None, pre_filter=None)
        self.dataset_r = create_dataset_class("expr_r")(root+"_r",transform=None, pre_transform=None, pre_filter=None)
        
    @property 
    def num_features(self):
        return self.dataset_l.num_features
    
    def __len__(self):
        return len(self.dataset_l)
        
    
    
    def __getitem__(self, idx):
        return self.dataset_l[idx], self.dataset_r[idx]

In [471]:
#generate_dataset(GENERATED_DATASET_SIZE,"math_datagen.json") #TODO: Switch to orjson, loading this file will take ages as I generate more data
dataset = ExpressionPairDataset(root="/dataset")

Processing...
Done!
Processing...
Done!


In [472]:
print(len(dataset))

8000


In [473]:
from torch import nn
from torch.nn import Linear, ReLU, LeakyReLU
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool
import numpy as np

In [474]:
class FormulaNet(nn.Module):
    def __init__(self, hidden_channels: int, embedding_space: int):
        super(FormulaNet, self).__init__()
        self.dense_1 = Linear(dataset.num_features, dataset.num_features) 
        self.relu_1 = LeakyReLU()
        self.gconv_1 = GCNConv(dataset.num_features, hidden_channels)
        self.gconv_2 = GCNConv(hidden_channels, hidden_channels)
        self.dense_3 = Linear(hidden_channels, embedding_space)
    
    def forward(self, x, edge_index, batch):
        
        if torch.isnan(x).any():
            with np.printoptions(threshold=99999):
                print("Some values of the input are nan: ", x)
        
        x = torch.nan_to_num(x, -51)
        
        # print("X = ", str(x))
        x = self.dense_1(x)
        # print("d1 X = ", str(x))
        x = self.relu_1(x)
        # print("rd1 X = ", str(x))
        x = self.gconv_1(x, edge_index)
        # print("gcn1 X = ", str(x))
        x = self.relu_1(x)
        # print("rgcn1 X = ", str(x))
        x = self.gconv_2(x, edge_index)
        # print("gcn2 X = ", str(x))
        x = self.relu_1(x)
        # print("rgcn2 X = ", str(x))
        x = global_mean_pool(x, batch)
        x = F.dropout(x, p=0.3,training=self.training)
        x = self.dense_3(x)
        x = self.relu_1(x)
        return x
    

In [475]:
class SiameseFormulaNet(nn.Module):
    def __init__(self, hidden_channels, embedding_space):
        super(SiameseFormulaNet, self).__init__()
        self.formulanet = FormulaNet(hidden_channels, embedding_space)
        # self.fc = nn.Sequential(
        #     Linear(embedding_space*2, embedding_space),
        #     ReLU(inplace=True),
        #     Linear(embedding_space, 1)
        # )
        # self.sigmoid = nn.Sigmoid() # TODO: Only used it for testing purposes, everything is subject to change Okay
    

    def forward(self, expr_l, expr_r):
        # print(expr_l)
        # print("X = ", expr_l.x)
        # print("X shape = ", expr_l.x.shape)
        # print("Batch = ",expr_l.batch)
        # print("Batch shape = ",expr_l.batch.shape)
        # print("Y =", expr_l.y)
        # print("Edge index = ",expr_l.edge_index)
        # print("Edge index shape = ",expr_l.edge_index.shape)
        
        embed_l = self.formulanet(expr_l.x,expr_l.edge_index, expr_l.batch)
        # print("Embed Left : ", embed_l)
        # print("Embed Left Shape: ", embed_l.shape)
        embed_l = embed_l.view(embed_l.size()[0], -1)
        embed_r = self.formulanet(expr_r.x,expr_r.edge_index, expr_r.batch)
        # print("Embed Right : ", embed_r)
        # print("Embed Right Shape: ", embed_r.shape)
        embed_r = embed_r.view(embed_r.size()[0], -1)
        
        # output = torch.cat((embed_l, embed_r), 1)
        
        # output = self.fc(output)
        # output = self.sigmoid(output)
        # return output
        return embed_l, embed_r
        
        
        

In [476]:
from torch.utils.data import random_split

In [477]:
total_len = len(dataset.dataset_l)
train_len = int(TRAIN_RATIO * total_len)
val_len = int(VAL_RATIO * total_len)
test_len = total_len - train_len - val_len
train_dataset, val_dataset, test_dataset = random_split(dataset, [train_len, val_len, test_len])

In [478]:

def collate(data_list):
    batchA = Batch.from_data_list([data[0] for data in data_list])
    batchB = Batch.from_data_list([data[1] for data in data_list])
    return batchA, batchB
# NOTE: Type ignore only for collate_fn_t ... make sure it doesn't get in the way of correct typing for the dataset
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, collate_fn=collate) # type: ignore
validation_laoder = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle= False, collate_fn = collate)
test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate) # type: ignore


In [479]:
device = torch.device("cpu") #torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [480]:
model = SiameseFormulaNet(32,64).to(device)

In [481]:
optimizer = torch.optim.Adam(model.parameters(), lr=0.000001)

In [482]:
from tqdm import tqdm

In [483]:
def contrastive_loss(embed_l, embed_r, labels, margin=1.0, threshold=0.5):
    # print("Embed Left : ", embed_l)
    # print("Embed Left shape: ", embed_l.shape)
    # print("Embed Right : ", embed_r)
    # print("Embed Right shape: ", embed_r.shape)
    euclidean_distance = F.pairwise_distance(embed_l, embed_r)
    # print("Euclidean distance : ", euclidean_distance)
    #labels = (labels > threshold).float()  # Convert labels to 0 or 1
    # print("Labels : ", labels)
    # print("Labels Shape: ", labels.shape)
    loss_contrastive = torch.mean((1 - labels) * torch.pow(euclidean_distance, 2) +
                                  labels * torch.pow(torch.clamp(margin - euclidean_distance, min=0.0), 2))
    # print("Constrastive Loss : ", loss_contrastive)
    return loss_contrastive

In [484]:
def train(epoch):
    model.train()
    epoch_loss = 0
    for batch_l, batch_r in tqdm(train_loader, desc=f'Epoch {epoch}'):
        # print(batch_l)
        # print("x : ",batch_l.x)
        # print("y : ", batch_l.y)
        # print("edge_index : ", batch_l.edge_index)
        
        # break
        batch_l, batch_r = batch_l.to(device), batch_r.to(device)
        optimizer.zero_grad()
        embed_l, embed_r = model(batch_l, batch_r)
        loss = contrastive_loss(embed_l, embed_r, batch_l.y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
    print(f'Epoch {epoch}, Loss: {epoch_loss / len(train_loader)}')

In [485]:
def validate():
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for batch_l, batch_r in validation_laoder:
            batch_l, batch_r = batch_l.to(device), batch_r.to(device)
            embed_l, embed_r = model(batch_l, batch_r)
            euclidean_distance = F.pairwise_distance(embed_l, embed_r)
            pred = (euclidean_distance < 0.5).float()  # Adjust the threshold as needed
            correct += (pred == batch_l.y).sum().item()
            test_loss += contrastive_loss(embed_l, embed_r, batch_l.y).item()
    test_loss /= len(test_loader)
    acc = correct / len(test_dataset)
    print(f'Test Loss: {test_loss}, Accuracy: {acc}')

In [486]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [487]:
num_epochs = 10
for epoch in range(num_epochs):
    train(epoch)
    validate()

Epoch 0:   0%|          | 0/150 [00:00<?, ?it/s]

Epoch 0:  11%|█▏        | 17/150 [00:00<00:03, 39.00it/s]

Some values of the input are nan:  tensor([[ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 27.0000,  0.0000, 27.0000],
        ...,
        [16.0000, 16.0000, 16.0000,  ..., 28.0000,  0.0000, 28.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  0.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -3.5000,  0.0000]])
Some values of the input are nan:  tensor([[ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 27.0000,  0.0000, 27.0000],
        ...,
        [ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 33.0000,  0.0000, 33.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -3.5000,  0.0000]])
Some values of the input are nan:  tensor([[ 5.,  5.,  5.,  ..

Epoch 0:  55%|█████▌    | 83/150 [00:02<00:01, 41.21it/s]

Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  1.2528,  0.0000],
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  1.5708,  0.0000],
        [ 1.0000,  1.0000,  1.0000,  ...,  6.0000,  0.0000,  6.0000]])


Epoch 0:  82%|████████▏ | 123/150 [00:03<00:00, 40.57it/s]

Some values of the input are nan:  tensor([[ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        ...,
        [16.0000, 16.0000, 16.0000,  ..., 40.0000,  0.0000, 40.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -9.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  6.0000,  0.0000]])


Epoch 0: 100%|██████████| 150/150 [00:03<00:00, 40.18it/s]

Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 36.0000,  0.0000, 36.0000],
        [16.0000, 16.0000, 16.0000,  ..., 40.0000,  0.0000, 40.0000],
        ...,
        [16.0000, 16.0000, 16.0000,  ..., 20.0000,  0.0000, 20.0000],
        [16.0000, 16.0000, 16.0000,  ..., 36.0000,  0.0000, 36.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  1.5000,  0.0000]])





Epoch 0, Loss: nan
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Test Loss: nan, Accuracy: 0.498125


Epoch 1:  21%|██        | 31/150 [00:00<00:01, 71.80it/s]

Some values of the input are nan:  tensor([[ 5.,  5.,  5.,  ..., -1., -1., -1.],
        [ 5.,  5.,  5.,  ..., -1., -1., -1.],
        [12., 12., 12.,  ..., -1., -1., -1.],
        ...,
        [16., 16., 16.,  ..., 28.,  0., 28.],
        [ 7.,  7.,  7.,  ...,  0., -7.,  0.],
        [ 7.,  7.,  7.,  ...,  0.,  8.,  0.]])
Some values of the input are nan:  tensor([[ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        ...,
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -6.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -7.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  5.0000,  0.0000]])


Epoch 1:  37%|███▋      | 56/150 [00:00<00:01, 73.20it/s]

Some values of the input are nan:  tensor([[12., 12., 12.,  ..., -1., -1., -1.],
        [10., 10., 10.,  ..., -1., -1., -1.],
        [ 7.,  7.,  7.,  ...,  0.,  5.,  0.],
        ...,
        [10., 10., 10.,  ..., -1., -1., -1.],
        [16., 16., 16.,  ..., 22.,  0., 22.],
        [16., 16., 16.,  ..., 39.,  0., 39.]])
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.8902,  0.0000],
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        ...,
        [ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 41.0000,  0.0000, 41.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -8.0000,  0.0000]])


Epoch 1: 100%|██████████| 150/150 [00:02<00:00, 71.43it/s]

Some values of the input are nan:  tensor([[ 5.,  5.,  5.,  ..., -1., -1., -1.],
        [10., 10., 10.,  ..., -1., -1., -1.],
        [10., 10., 10.,  ..., -1., -1., -1.],
        ...,
        [16., 16., 16.,  ..., 34.,  0., 34.],
        [ 7.,  7.,  7.,  ...,  0., -9.,  0.],
        [ 7.,  7.,  7.,  ...,  0., -6.,  0.]])
Some values of the input are nan:  tensor([[ 5.,  5.,  5.,  ..., -1., -1., -1.],
        [10., 10., 10.,  ..., -1., -1., -1.],
        [10., 10., 10.,  ..., -1., -1., -1.],
        ...,
        [16., 16., 16.,  ..., 34.,  0., 34.],
        [ 7.,  7.,  7.,  ...,  0., -9.,  0.],
        [ 7.,  7.,  7.,  ...,  0., -6.,  0.]])





Epoch 1, Loss: nan
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Test Loss: nan, Accuracy: 0.498125


Epoch 2:  53%|█████▎    | 80/150 [00:01<00:00, 71.90it/s]

Some values of the input are nan:  tensor([[ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 23.0000,  0.0000, 23.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -8.5000,  0.0000]])
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        ...,
        [ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  6.5000,  0.0000]])


Epoch 2:  63%|██████▎   | 95/150 [00:01<00:00, 67.63it/s]

Some values of the input are nan:  tensor([[ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  0.8415,  0.0000],
        [ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -3.0000,  0.0000],
        ...,
        [16.0000, 16.0000, 16.0000,  ..., 27.0000,  0.0000, 27.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -8.0000,  0.0000]])


Epoch 2:  89%|████████▉ | 134/150 [00:01<00:00, 67.32it/s]

Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, 24.7815,  0.0000],
        [12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        ...,
        [12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 17.0000,  0.0000, 17.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  4.5000,  0.0000]])
Some values of the input are nan:  tensor([[ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        ...,
        [ 7.0000,  7.0000,  7.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  4.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  7.0000,  0.0000]])
Some values of the input are nan:  tensor([[ 5.0000,  5.0000, 

Epoch 2: 100%|██████████| 150/150 [00:02<00:00, 69.28it/s]


Epoch 2, Loss: nan
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Test Loss: nan, Accuracy: 0.498125


Epoch 3:   5%|▍         | 7/150 [00:00<00:02, 64.74it/s]

Some values of the input are nan:  tensor([[12., 12., 12.,  ..., -1., -1., -1.],
        [12., 12., 12.,  ..., -1., -1., -1.],
        [12., 12., 12.,  ..., -1., -1., -1.],
        ...,
        [ 7.,  7.,  7.,  ...,  0., -1.,  0.],
        [16., 16., 16.,  ..., 37.,  0., 37.],
        [ 7.,  7.,  7.,  ...,  0.,  8.,  0.]])
Some values of the input are nan:  tensor([[10., 10., 10.,  ..., -1., -1., -1.],
        [ 5.,  5.,  5.,  ..., -1., -1., -1.],
        [12., 12., 12.,  ..., -1., -1., -1.],
        ...,
        [ 7.,  7.,  7.,  ...,  0., -1.,  0.],
        [16., 16., 16.,  ..., 37.,  0., 37.],
        [ 7.,  7.,  7.,  ...,  0.,  8.,  0.]])


Epoch 3:  24%|██▍       | 36/150 [00:00<00:02, 54.79it/s]

Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 27.0000,  0.0000, 27.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 24.0000,  0.0000, 24.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.5000,  0.0000]])


Epoch 3:  44%|████▍     | 66/150 [00:01<00:01, 67.31it/s]

Some values of the input are nan:  tensor([[ 1.0000e+01,  1.0000e+01,  1.0000e+01,  ..., -1.0000e+00,
         -1.0000e+00, -1.0000e+00],
        [ 7.0000e+00,  7.0000e+00,  7.0000e+00,  ...,  0.0000e+00,
         -9.7656e+01,  0.0000e+00],
        [ 1.2000e+01,  1.2000e+01,  1.2000e+01,  ..., -1.0000e+00,
         -1.0000e+00, -1.0000e+00],
        ...,
        [ 7.0000e+00,  7.0000e+00,  7.0000e+00,  ...,  0.0000e+00,
          1.5000e+00,  0.0000e+00],
        [ 7.0000e+00,  7.0000e+00,  7.0000e+00,  ...,  0.0000e+00,
          4.0000e+00,  0.0000e+00],
        [ 7.0000e+00,  7.0000e+00,  7.0000e+00,  ...,  0.0000e+00,
          9.5367e-07,  0.0000e+00]])


Epoch 3:  64%|██████▍   | 96/150 [00:01<00:00, 69.08it/s]

Some values of the input are nan:  tensor([[ 5.,  5.,  5.,  ..., -1., -1., -1.],
        [10., 10., 10.,  ..., -1., -1., -1.],
        [ 5.,  5.,  5.,  ..., -1., -1., -1.],
        ...,
        [ 7.,  7.,  7.,  ...,  0., -4.,  0.],
        [16., 16., 16.,  ..., 41.,  0., 41.],
        [ 7.,  7.,  7.,  ...,  0.,  2.,  0.]])


Epoch 3:  95%|█████████▌| 143/150 [00:02<00:00, 74.68it/s]

Some values of the input are nan:  tensor([[ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.0000,  0.0000],
        [12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        ...,
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  4.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -7.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,     nan,  0.0000]])


Epoch 3: 100%|██████████| 150/150 [00:02<00:00, 65.47it/s]


Epoch 3, Loss: nan
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Test Loss: nan, Accuracy: 0.498125


Epoch 4:  43%|████▎     | 65/150 [00:00<00:01, 77.67it/s]

Some values of the input are nan:  tensor([[10., 10., 10.,  ..., -1., -1., -1.],
        [ 5.,  5.,  5.,  ..., -1., -1., -1.],
        [12., 12., 12.,  ..., -1., -1., -1.],
        ...,
        [12., 12., 12.,  ..., -1., -1., -1.],
        [16., 16., 16.,  ..., 32.,  0., 32.],
        [ 7.,  7.,  7.,  ...,  0., -8.,  0.]])


Epoch 4:  91%|█████████▏| 137/150 [00:01<00:00, 73.94it/s]

Some values of the input are nan:  tensor([[ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        ...,
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -7.5000,  0.0000],
        [ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 17.0000,  0.0000, 17.0000]])
Some values of the input are nan:  tensor([[10., 10., 10.,  ..., -1., -1., -1.],
        [16., 16., 16.,  ..., 39.,  0., 39.],
        [ 7.,  7.,  7.,  ...,  0.,  5.,  0.],
        ...,
        [12., 12., 12.,  ..., -1., -1., -1.],
        [ 7.,  7.,  7.,  ...,  0., -9.,  0.],
        [ 7.,  7.,  7.,  ...,  0., -6.,  0.]])
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0

Epoch 4: 100%|██████████| 150/150 [00:02<00:00, 74.77it/s]


Epoch 4, Loss: nan
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Test Loss: nan, Accuracy: 0.498125


Epoch 5:   5%|▌         | 8/150 [00:00<00:01, 78.20it/s]

Some values of the input are nan:  tensor([[ 7.,  7.,  7.,  ...,  0., 13.,  0.],
        [10., 10., 10.,  ..., -1., -1., -1.],
        [10., 10., 10.,  ..., -1., -1., -1.],
        ...,
        [12., 12., 12.,  ..., -1., -1., -1.],
        [16., 16., 16.,  ..., 33.,  0., 33.],
        [ 7.,  7.,  7.,  ...,  0.,  9.,  0.]])


Epoch 5:  32%|███▏      | 48/150 [00:00<00:01, 75.53it/s]

Some values of the input are nan:  tensor([[ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        ...,
        [12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 18.0000,  0.0000, 18.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -9.5000,  0.0000]])


Epoch 5:  85%|████████▌ | 128/150 [00:01<00:00, 72.18it/s]

Some values of the input are nan:  tensor([[10., 10., 10.,  ..., -1., -1., -1.],
        [10., 10., 10.,  ..., -1., -1., -1.],
        [12., 12., 12.,  ..., -1., -1., -1.],
        ...,
        [16., 16., 16.,  ..., 23.,  0., 23.],
        [16., 16., 16.,  ..., 20.,  0., 20.],
        [ 7.,  7.,  7.,  ...,  0., -3.,  0.]])
Some values of the input are nan:  tensor([[ 5.,  5.,  5.,  ..., -1., -1., -1.],
        [10., 10., 10.,  ..., -1., -1., -1.],
        [10., 10., 10.,  ..., -1., -1., -1.],
        ...,
        [16., 16., 16.,  ..., 23.,  0., 23.],
        [16., 16., 16.,  ..., 20.,  0., 20.],
        [ 7.,  7.,  7.,  ...,  0., -3.,  0.]])


Epoch 5: 100%|██████████| 150/150 [00:02<00:00, 74.40it/s]

Some values of the input are nan:  tensor([[12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 20.0000,  0.0000, 20.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.5000,  0.0000],
        ...,
        [16.0000, 16.0000, 16.0000,  ..., 35.0000,  0.0000, 35.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,     nan,  0.0000]])
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 23.0000,  0.0000, 23.0000],
        [16.0000, 16.0000, 16.0000,  ..., 39.0000,  0.0000, 39.0000],
        ...,
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -2.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -6.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000]])
Epoch 5, Loss: nan





Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Test Loss: nan, Accuracy: 0.498125


Epoch 6:  11%|█         | 16/150 [00:00<00:01, 75.45it/s]

Some values of the input are nan:  tensor([[12., 12., 12.,  ..., -1., -1., -1.],
        [10., 10., 10.,  ..., -1., -1., -1.],
        [16., 16., 16.,  ..., 29.,  0., 29.],
        ...,
        [10., 10., 10.,  ..., -1., -1., -1.],
        [16., 16., 16.,  ..., 39.,  0., 39.],
        [ 7.,  7.,  7.,  ...,  0., -2.,  0.]])
Some values of the input are nan:  tensor([[12., 12., 12.,  ..., -1., -1., -1.],
        [10., 10., 10.,  ..., -1., -1., -1.],
        [16., 16., 16.,  ..., 29.,  0., 29.],
        ...,
        [10., 10., 10.,  ..., -1., -1., -1.],
        [ 7.,  7.,  7.,  ...,  0.,  2.,  0.],
        [16., 16., 16.,  ..., 39.,  0., 39.]])


Epoch 6:  59%|█████▉    | 89/150 [00:01<00:00, 75.58it/s]

Some values of the input are nan:  tensor([[10., 10., 10.,  ..., -1., -1., -1.],
        [10., 10., 10.,  ..., -1., -1., -1.],
        [ 5.,  5.,  5.,  ..., -1., -1., -1.],
        ...,
        [16., 16., 16.,  ..., 32.,  0., 32.],
        [ 7.,  7.,  7.,  ...,  0.,  4.,  0.],
        [ 7.,  7.,  7.,  ...,  0.,  3.,  0.]])


Epoch 6:  75%|███████▌  | 113/150 [00:01<00:00, 72.80it/s]

Some values of the input are nan:  tensor([[12., 12., 12.,  ..., -1., -1., -1.],
        [10., 10., 10.,  ..., -1., -1., -1.],
        [ 5.,  5.,  5.,  ..., -1., -1., -1.],
        ...,
        [10., 10., 10.,  ..., -1., -1., -1.],
        [ 7.,  7.,  7.,  ...,  0.,  1.,  0.],
        [16., 16., 16.,  ..., 17.,  0., 17.]])
Some values of the input are nan:  tensor([[ 7.0000e+00,  7.0000e+00,  7.0000e+00,  ...,  0.0000e+00,
                 nan,  0.0000e+00],
        [ 5.0000e+00,  5.0000e+00,  5.0000e+00,  ..., -1.0000e+00,
         -1.0000e+00, -1.0000e+00],
        [ 1.6000e+01,  1.6000e+01,  1.6000e+01,  ...,  3.8000e+01,
          0.0000e+00,  3.8000e+01],
        ...,
        [ 7.0000e+00,  7.0000e+00,  7.0000e+00,  ...,  0.0000e+00,
         -8.0000e+00,  0.0000e+00],
        [ 7.0000e+00,  7.0000e+00,  7.0000e+00,  ...,  0.0000e+00,
          3.0000e+00,  0.0000e+00],
        [ 7.0000e+00,  7.0000e+00,  7.0000e+00,  ...,  0.0000e+00,
          1.6538e-08,  0.0000e+00]])
Some val

Epoch 6: 100%|██████████| 150/150 [00:02<00:00, 73.98it/s]


Epoch 6, Loss: nan
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Test Loss: nan, Accuracy: 0.498125


Epoch 7:  15%|█▌        | 23/150 [00:00<00:01, 71.89it/s]

Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  0.9765,  0.0000],
        [12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        ...,
        [16.0000, 16.0000, 16.0000,  ..., 40.0000,  0.0000, 40.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -7.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000]])


Epoch 7:  37%|███▋      | 55/150 [00:00<00:01, 72.56it/s]

Some values of the input are nan:  tensor([[ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 33.0000,  0.0000, 33.0000],
        ...,
        [12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.0000,  0.0000]])


Epoch 7:  77%|███████▋  | 115/150 [00:01<00:00, 67.26it/s]

Some values of the input are nan:  tensor([[ 12.,  12.,  12.,  ...,  -1.,  -1.,  -1.],
        [ 10.,  10.,  10.,  ...,  -1.,  -1.,  -1.],
        [  5.,   5.,   5.,  ...,  -1.,  -1.,  -1.],
        ...,
        [ 12.,  12.,  12.,  ...,  -1.,  -1.,  -1.],
        [  7.,   7.,   7.,  ...,   0.,   6.,   0.],
        [  7.,   7.,   7.,  ...,   0., -10.,   0.]])
Some values of the input are nan:  tensor([[12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 29.0000,  0.0000, 29.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -6.5000,  0.0000],
        ...,
        [12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  4.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000]])
Some values of the input are nan:  tensor([[12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 29.0000,  0.0000, 29.00

Epoch 7:  91%|█████████ | 136/150 [00:02<00:00, 64.53it/s]

Some values of the input are nan:  tensor([[ 10.,  10.,  10.,  ...,  -1.,  -1.,  -1.],
        [  7.,   7.,   7.,  ...,   0., -10.,   0.],
        [ 16.,  16.,  16.,  ...,  30.,   0.,  30.],
        ...,
        [ 12.,  12.,  12.,  ...,  -1.,  -1.,  -1.],
        [ 16.,  16.,  16.,  ...,  19.,   0.,  19.],
        [  7.,   7.,   7.,  ...,   0.,  -2.,   0.]])


Epoch 7: 100%|██████████| 150/150 [00:02<00:00, 67.16it/s]


Epoch 7, Loss: nan
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Test Loss: nan, Accuracy: 0.498125


Epoch 8:   5%|▍         | 7/150 [00:00<00:02, 60.75it/s]

Some values of the input are nan:  tensor([[12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 38.0000,  0.0000, 38.0000],
        ...,
        [16.0000, 16.0000, 16.0000,  ..., 32.0000,  0.0000, 32.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  0.8064,  0.0000],
        [ 1.0000,  1.0000,  1.0000,  ..., 42.0000,  0.0000, 42.0000]])


Epoch 8:  37%|███▋      | 55/150 [00:00<00:01, 61.16it/s]

Some values of the input are nan:  tensor([[12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [16.0000, 16.0000, 16.0000,  ..., 23.0000,  0.0000, 23.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -8.5000,  0.0000]])
Some values of the input are nan:  tensor([[ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        [12.0000, 12.0000, 12.0000,  ..., -1.0000, -1.0000, -1.0000],
        ...,
        [16.0000, 16.0000, 16.0000,  ..., 33.0000,  0.0000, 33.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -8.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -3.5000,  0.0000]])


Epoch 8:  79%|███████▉  | 119/150 [00:01<00:00, 65.73it/s]

Some values of the input are nan:  tensor([[ 10.,  10.,  10.,  ...,  -1.,  -1.,  -1.],
        [  5.,   5.,   5.,  ...,  -1.,  -1.,  -1.],
        [ 10.,  10.,  10.,  ...,  -1.,  -1.,  -1.],
        ...,
        [ 12.,  12.,  12.,  ...,  -1.,  -1.,  -1.],
        [ 16.,  16.,  16.,  ...,  38.,   0.,  38.],
        [  7.,   7.,   7.,  ...,   0., -10.,   0.]])
Some values of the input are nan:  tensor([[ 10.0000,  10.0000,  10.0000,  ...,  -1.0000,  -1.0000,  -1.0000],
        [  7.0000,   7.0000,   7.0000,  ...,   0.0000,  -6.5000,   0.0000],
        [ 16.0000,  16.0000,  16.0000,  ...,  34.0000,   0.0000,  34.0000],
        ...,
        [ 12.0000,  12.0000,  12.0000,  ...,  -1.0000,  -1.0000,  -1.0000],
        [ 16.0000,  16.0000,  16.0000,  ...,  38.0000,   0.0000,  38.0000],
        [  7.0000,   7.0000,   7.0000,  ...,   0.0000, -10.0000,   0.0000]])


Epoch 8: 100%|██████████| 150/150 [00:02<00:00, 65.01it/s]


Some values of the input are nan:  tensor([[ 10.0000,  10.0000,  10.0000,  ...,  -1.0000,  -1.0000,  -1.0000],
        [  7.0000,   7.0000,   7.0000,  ...,   0.0000, -46.7654,   0.0000],
        [  1.0000,   1.0000,   1.0000,  ...,   6.0000,   0.0000,   6.0000],
        ...,
        [ 12.0000,  12.0000,  12.0000,  ...,  -1.0000,  -1.0000,  -1.0000],
        [  7.0000,   7.0000,   7.0000,  ...,   0.0000,   5.5000,   0.0000],
        [  7.0000,   7.0000,   7.0000,  ...,   0.0000,  -9.5000,   0.0000]])
Epoch 8, Loss: nan
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Some va

Epoch 9:  21%|██▏       | 32/150 [00:00<00:01, 75.64it/s]

Some values of the input are nan:  tensor([[ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  0.0101,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  7.7228,  0.0000],
        [ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        ...,
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -9.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -8.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])


Epoch 9:  63%|██████▎   | 95/150 [00:01<00:00, 71.94it/s]

Some values of the input are nan:  tensor([[ 5.,  5.,  5.,  ..., -1., -1., -1.],
        [ 5.,  5.,  5.,  ..., -1., -1., -1.],
        [16., 16., 16.,  ..., 22.,  0., 22.],
        ...,
        [10., 10., 10.,  ..., -1., -1., -1.],
        [ 7.,  7.,  7.,  ...,  0., -3.,  0.],
        [16., 16., 16.,  ..., 32.,  0., 32.]])


Epoch 9:  79%|███████▉  | 119/150 [00:01<00:00, 74.06it/s]

Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -2.5000,  0.0000],
        ...,
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  4.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -8.5000,  0.0000]])
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 5.0000,  5.0000,  5.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -2.5000,  0.0000],
        ...,
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  4.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -8.5000,  0.0000]])


Epoch 9:  90%|█████████ | 135/150 [00:01<00:00, 71.37it/s]

Some values of the input are nan:  tensor([[ 5.,  5.,  5.,  ..., -1., -1., -1.],
        [16., 16., 16.,  ..., 18.,  0., 18.],
        [ 5.,  5.,  5.,  ..., -1., -1., -1.],
        ...,
        [10., 10., 10.,  ..., -1., -1., -1.],
        [16., 16., 16.,  ..., 27.,  0., 27.],
        [ 7.,  7.,  7.,  ...,  0., -5.,  0.]])


Epoch 9: 100%|██████████| 150/150 [00:02<00:00, 71.53it/s]


Some values of the input are nan:  tensor([[ 5.,  5.,  5.,  ..., -1., -1., -1.],
        [10., 10., 10.,  ..., -1., -1., -1.],
        [12., 12., 12.,  ..., -1., -1., -1.],
        ...,
        [ 5.,  5.,  5.,  ..., -1., -1., -1.],
        [ 5.,  5.,  5.,  ..., -1., -1., -1.],
        [16., 16., 16.,  ..., 24.,  0., 24.]])
Epoch 9, Loss: nan
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  2.5000,  0.0000],
        ...,
        [10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  9.0000,  0.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000,  3.0000,  0.0000]])
Some values of the input are nan:  tensor([[10.0000, 10.0000, 10.0000,  ..., -1.0000, -1.0000, -1.0000],
        [ 7.0000,  7.0000,  7.0000,  ...,  0.0000, -5.5000,  0.0000],
        [ 7.

## Evaluation

In [61]:
le = LabelEncoder()
le.fit(OPERATIONS+FUNCTIONS+CLASSIC_CONSTANTS+VARIABLE_ALPHABET+TYPES)
eval_node_attr_encoder = make_node_attribute_encoder(le)

In [67]:
expr_left = {
      "type": "POW",
      "value": None,
      "subtype": None,
      "children": [
        {
          "type": "POW",
          "value": None,
          "subtype": None,
          "children": [
            {
              "type": "LITERAL",
              "value": -3.5,
              "subtype": None,
              "children": []
            },
            {
              "type": "LITERAL",
              "value": 3.0,
              "subtype": None,
              "children": []
            }
          ]
        },
        {
          "type": "LITERAL",
          "value": 4.5,
          "subtype": None,
          "children": []
        }
      ]
    }
expr_right = {
      "type": "POW",
      "value": None,
      "subtype": None,
      "children": [
        {
          "type": "POW",
          "value": None,
          "subtype": None,
          "children": [
            {
              "type": "LITERAL",
              "value": -5.5,
              "subtype": None,
              "children": []
            },
            {
              "type": "LITERAL",
              "value": -10.0,
              "subtype": None,
              "children": []
            }
          ]
        },
        {
          "type": "LITERAL",
          "value": 4.0,
          "subtype": None,
          "children": []
        }
      ]
    }

expr_right_constant_error = {
      "type": "POW",
      "value": None,
      "subtype": None,
      "children": [
        {
          "type": "POW",
          "value": None,
          "subtype": None,
          "children": [
            {
              "type": "LITERAL",
              "value": -5.5,
              "subtype": None,
              "children": []
            },
            {
              "type": "LITERAL",
              "value": -10.0,
              "subtype": None,
              "children": []
            }
          ]
        },
        {
          "type": "LITERAL",
          "value": 4.0,
          "subtype": None,
          "children": []
        }
      ]
    }

In [68]:
expr_l_obj = dict_to_geometric_representation(expr_left, eval_node_attr_encoder)
expr_r_obj = dict_to_geometric_representation(expr_right, eval_node_attr_encoder)
expr_r_c_obj = dict_to_geometric_representation(expr_right_constant_error, eval_node_attr_encoder)

In [69]:
y_1 = model(expr_l_obj, expr_r_obj)
y_2 = model(expr_l_obj, expr_l_obj)
y_3 = model(expr_r_obj, expr_r_obj)
y_4 = model(expr_r_obj, expr_r_c_obj)

In [70]:
print(F.cosine_similarity(y_1[0], y_1[1]))
print(F.cosine_similarity(y_2[0], y_2[1]))
print(F.cosine_similarity(y_3[0], y_3[1]))
print(F.cosine_similarity(y_4[0], y_4[1]))


tensor([0.4399], grad_fn=<SumBackward1>)
tensor([1.], grad_fn=<SumBackward1>)
tensor([1.0000], grad_fn=<SumBackward1>)
tensor([1.0000], grad_fn=<SumBackward1>)


In [66]:
# from torchviz import make_dot

# Misc

In [224]:
# make_dot(y_f.mean(), params=dict(formulanet.named_parameters()))