### Starting:

In [2]:
import json
import numpy as np
import re

In [2]:
func_list = ["sin", "cos", "sec", "csc", "tan", "cot", "log", "exp", "sqrt", "sinh", "cosh", "sech", "csch", "tanh", "coth", "atan", "asin", "acos", "asinh", "acosh", "atanh", "acoth", "asech", "acsch"]
basic_diadic = ["+", "-", "/", "*", "^","~"]

In [3]:
token_dict = {}
token_dict['[NUM]'] = 1
token_dict.update({chr(i + 96): i+1 for i in range(1, 27)})
token_dict.update({chr(i + 64): i+len(token_dict)+1 for i in range(1, 27)})
token_dict.update({op:i+len(token_dict)+2 for i,op in enumerate(func_list + basic_diadic)})

In [4]:
# Expression: (cot(x) - tan(x)) / (1 + cos(4x))
input_tree = ['/', '+', 'cot', 'x', '*', '-1', 'tan', 'x', '+', '1', 'cos', '*', '4', 'x']  

In [5]:
def make_float(s, return_num=False):
    try:
        s = float(s)
        if return_num:
            return s
        else:
            return 1
    except:
        if return_num:
            return s
        else:
            return token_dict[s]
        
def classify_operation(token):
    if token==1:
        return 1  # it's a number
    elif 1<token<55:
        return 2  # it's a symbolic variable
    elif 55<=token<61:
        return 3  # it's a trignometric operator
    elif 61<=token<64:
        return 4  # it's a log, exp, sqrt
    elif 64<=token<70:
        return 5  # it's a hyperbolic operator
    elif 70<=token<79:
        return 6  # it's a inverse operator
    elif 79<=token<85:
        return 7  # it's binary operator
    
def isoperator(token):
    if token<55:
        return 0 # not an operator
    else:
        return 1



In [6]:
def node_feature(node, return_num=False):

    token = make_float(node, return_num)

    return [token, isoperator(token), classify_operation(token)]

In [7]:
node_feature("~")

[84, 1, 7]

In [45]:
token_dict

{'[NUM]': 1,
 'a': 2,
 'b': 3,
 'c': 4,
 'd': 5,
 'e': 6,
 'f': 7,
 'g': 8,
 'h': 9,
 'i': 10,
 'j': 11,
 'k': 12,
 'l': 13,
 'm': 14,
 'n': 15,
 'o': 16,
 'p': 17,
 'q': 18,
 'r': 19,
 's': 20,
 't': 21,
 'u': 22,
 'v': 23,
 'w': 24,
 'x': 25,
 'y': 26,
 'z': 27,
 'A': 29,
 'B': 30,
 'C': 31,
 'D': 32,
 'E': 33,
 'F': 34,
 'G': 35,
 'H': 36,
 'I': 37,
 'J': 38,
 'K': 39,
 'L': 40,
 'M': 41,
 'N': 42,
 'O': 43,
 'P': 44,
 'Q': 45,
 'R': 46,
 'S': 47,
 'T': 48,
 'U': 49,
 'V': 50,
 'W': 51,
 'X': 52,
 'Y': 53,
 'Z': 54,
 'sin': 55,
 'cos': 56,
 'sec': 57,
 'csc': 58,
 'tan': 59,
 'cot': 60,
 'log': 61,
 'exp': 62,
 'sqrt': 63,
 'sinh': 64,
 'cosh': 65,
 'sech': 66,
 'csch': 67,
 'tanh': 68,
 'coth': 69,
 'atan': 70,
 'asin': 71,
 'acos': 72,
 'asinh': 73,
 'acosh': 74,
 'atanh': 75,
 'acoth': 76,
 'asech': 77,
 'acsch': 78,
 '+': 79,
 '-': 80,
 '/': 81,
 '*': 82,
 '^': 83,
 '~': 84}

In [47]:
input_tree

['/', '+', 'cot', 'x', '*', '-1', 'tan', 'x', '+', '1', 'cos', '*', '4', 'x']

In [8]:
# Simply making token is just one of the features of the node, I can add more 
x_test = [node_feature(a) for a in input_tree]
x_test

[[81, 1, 7],
 [79, 1, 7],
 [60, 1, 3],
 [25, 0, 2],
 [82, 1, 7],
 [1, 0, 1],
 [59, 1, 3],
 [25, 0, 2],
 [79, 1, 7],
 [1, 0, 1],
 [56, 1, 3],
 [82, 1, 7],
 [1, 0, 1],
 [25, 0, 2]]

### Creating of Graph Dataset:

In [1]:
from torch_geometric.data import InMemoryDataset, Dataset, Data
import torch_geometric
import torch
from torch import nn
import os

print(torch_geometric.__version__)

2.6.1


In [9]:
data = json.load(open("D:/Study/Intelligence And Learning/Reinforcement Learning/Project 1/deployment/src/Equation model/data/raw/linear_numeric1.json"))

In [10]:
eq = list(data.values())[0]

In [11]:
list(data.keys())[0]

'90 + 41x ~ 77'

In [12]:
edge_index = torch.tensor([[a,b] for [a,b,c] in eq if a!=-1])
nodes = torch.tensor([node_feature(eq[i][2]) for i in range(len(eq))],dtype=torch.float32)
edge_index

tensor([[0, 1],
        [1, 2],
        [1, 3],
        [3, 4],
        [3, 5],
        [0, 6]])

In [13]:
d = Data(x=nodes, edge_index=edge_index)
d.is_directed()

True

In [3]:
class MyOwnDataset(Dataset):
    def __init__(self, root, transform=None, pre_transform=None, pre_filter=None):
        super(MyOwnDataset, self).__init__(root, transform, pre_transform, pre_filter)
        self.data = json.load(open(self.raw_paths[0]))

    @property
    def raw_file_names(self):
        return "linear_numeric1.json"

    @property
    def processed_file_names(self):
        return 'processed_data.pt'

    def download(self):
        pass

    def process(self):
        self.data = json.load(open(self.raw_paths[0]))
        eqs = []

        for (key,value) in self.data.items():
            eqs.append(Data(
                edge_index = torch.tensor([[a,b] for [a,b,c] in value if a!=-1]),
                x=torch.tensor([node_feature(value[i][2]) for i in range(len(value))],dtype=torch.float32),
                eq = key
            ))
        
        torch.save(eqs, os.path.join(self.processed_dir, f"processed_data.pt"))

    def len(self):
        return len(self.data)

    def get(self, idx):
        data = torch.load(os.path.join(self.processed_dir, f'processed_data.pt'), weights_only=False)
        return data[idx]

In [4]:
yup = MyOwnDataset("data/")

In [16]:
data = torch.load(os.path.join(yup.processed_dir, f'processed_data.pt'), weights_only=False)

In [20]:
data[10].x[0]

tensor([84.,  1.,  7.])

In [112]:
data[10].eq

'-88 - 16x ~ -83'

In [5]:
from torch_geometric.loader import DataLoader

In [6]:
data_loader = DataLoader(yup, batch_size=1, shuffle=True)


# When tree is passed as batches, it only return one action, so we have to pass that same batch, those same trees, again and again until the process gets terminated for a tree
# it may happen that a processs is getting terminated for one tree but not for the other, how to handle that in batch processing

### GNN Model:

In [17]:
import torch.nn.functional as F 
from torch.nn import Linear, BatchNorm1d, ModuleList
from torch_geometric.nn import TransformerConv, TopKPooling 
from torch_geometric.nn import global_mean_pool as gap, global_max_pool as gmp
torch.manual_seed(42)

<torch._C.Generator at 0x21903734a10>

In [45]:
torch_geometric.__version__

'2.6.1'

In [18]:
class GNN(nn.Module):
    def __init__(self, feature_size, model_params, num_output):
        super(GNN, self).__init__()
        embedding_size = model_params["model_embedding_size"]
        n_heads = model_params["model_attention_heads"]
        self.n_layers = model_params["model_layers"]
        dropout_rate = model_params["model_dropout_rate"]
        top_k_ratio = model_params["model_top_k_ratio"]
        self.top_k_every_n = model_params["model_top_k_every_n"]
        dense_neurons = model_params["model_dense_neurons"]
        edge_dim = model_params["model_edge_dim"]

        self.conv_layers = ModuleList([])
        self.transf_layers = ModuleList([])
        # self.pooling_layers = ModuleList([])   # I don't think that this dataset requires pooling layers, as my graph is not that much big, it's size is already very small, so this will lead to losing information
        self.bn_layers = ModuleList([])

        # Transformer layers (Convolving by making multi head attention, which has to be concatenated together, so size self.conv1 = (feature_size, n_heads*embedding_size))
        self.conv1 = TransformerConv(feature_size, embedding_size, heads=n_heads, dropout=dropout_rate, edge_dim=edge_dim, beta=True)
        
        self.transf1 = Linear(embedding_size*n_heads, embedding_size)
        self.bn1 = BatchNorm1d(embedding_size)

        # Other layers
        for i in range(self.n_layers):
            self.conv_layers.append(TransformerConv(embedding_size, 
                                                    embedding_size, 
                                                    heads=n_heads, 
                                                    dropout=dropout_rate,
                                                    edge_dim=edge_dim,
                                                    beta=True))

            self.transf_layers.append(Linear(embedding_size*n_heads, embedding_size))
            self.bn_layers.append(BatchNorm1d(embedding_size))  # do batch normalization
            # if i % self.top_k_every_n == 0:
            #     # Dropout some node after some layers to reduce the size of graphs
            #     self.pooling_layers.append(TopKPooling(embedding_size, ratio=top_k_ratio))

            # Linear layers
        self.linear1 = Linear(embedding_size*2, dense_neurons)
        self.linear2 = Linear(dense_neurons, int(dense_neurons/2))  
        self.linear3 = Linear(int(dense_neurons/2), num_output)  

    def forward(self, x, edge_attr, edge_index, batch_index):
        # Initial transformation
        x = self.conv1(x, edge_index, edge_attr)
        x = torch.relu(self.transf1(x))
        x = self.bn1(x)

        # Holds the intermediate graph representations
        global_representation = []

        for i in range(self.n_layers):
            x = self.conv_layers[i](x, edge_index, edge_attr)
            x = torch.relu(self.transf_layers[i](x))
            x = self.bn_layers[i](x)

            # Always aggregate last layer
            # if i % self.top_k_every_n == 0 or i == self.n_layers:
            #     x , edge_index, edge_attr, batch_index, _, _ = self.pooling_layers[int(i/self.top_k_every_n)](
            #         x, edge_index, edge_attr, batch_index
            #         )
                # Add current representation (global max and mean pooling)
                # global_representation.append(torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1))
            
            # For this model we have n_layers=2, that's top_k_every_n doesn't make much sense here
            global_representation.append(torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1))
    
        x = sum(global_representation)

        # Output block
        x = torch.relu(self.linear1(x))
        x = F.dropout(x, p=0.8, training=self.training)
        x = torch.relu(self.linear2(x))
        x = F.dropout(x, p=0.8, training=self.training)
        x = self.linear3(x)

        return x

In [19]:
model = GNN(3,{
    "model_embedding_size":16,
    "model_attention_heads":2,
    "model_layers":2,
    "model_dropout_rate":0.3,
    "model_top_k_ratio":2,
    "model_top_k_every_n":2,
    "model_dense_neurons":16,
    "model_edge_dim":None,
} ,2)

In [23]:
model

GNN(
  (conv_layers): ModuleList(
    (0-1): 2 x TransformerConv(16, 16, heads=2)
  )
  (transf_layers): ModuleList(
    (0-1): 2 x Linear(in_features=32, out_features=16, bias=True)
  )
  (bn_layers): ModuleList(
    (0-1): 2 x BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  )
  (conv1): TransformerConv(3, 16, heads=2)
  (transf1): Linear(in_features=32, out_features=16, bias=True)
  (bn1): BatchNorm1d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (linear1): Linear(in_features=32, out_features=16, bias=True)
  (linear2): Linear(in_features=16, out_features=8, bias=True)
  (linear3): Linear(in_features=8, out_features=2, bias=True)
)

In [50]:
batch = torch.zeros(7, dtype=torch.long)
model(yup.get(0).x, yup.get(0).edge_attr, yup.get(0).edge_index.T, batch)

tensor([[-0.5888, -1.0927]], grad_fn=<AddmmBackward0>)

In [19]:
model_params = {
    "model_embedding_size":16,
    "model_attention_heads":2,
    "model_layers":2,
    "model_dropout_rate":0.3,
    "model_top_k_ratio":2,
    "model_top_k_every_n":2,
    "model_dense_neurons":16,
    "model_edge_dim":None,
}
conv1 = TransformerConv(3, model_params["model_embedding_size"], heads=model_params["model_attention_heads"], dropout=model_params["model_dropout_rate"], edge_dim=model_params["model_edge_dim"], beta=True)

In [72]:
criterion = nn.BCEWithLogitsLoss()  # best for multi-label binary classification
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
actions = []
for d in data_loader:
    actions.append(model(d.x, d.edge_attr, d.edge_index.T,d.batch))

In [None]:
# Loss for just 1 input & 1 output, in actual loss should come from some reinforcement program
xmx = iter(data_loader)
yup = next(xmx)
outputs = model(yup.x, yup.edge_attr, yup.edge_index.T, yup.batch)
yup.eq

In [77]:
actual = torch.tensor([[1,0]],dtype=torch.float32)
outputs, actual

(tensor([[1.0510, 1.0634]], grad_fn=<AddmmBackward0>), tensor([[1., 0.]]))

In [78]:
loss = criterion(outputs, actual)
loss.backward()
optimizer.step()

In [79]:
loss

tensor(0.8299, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)

In [None]:
# Making of model
embedding_size = model_params["model_embedding_size"]
n_heads = model_params["model_attention_heads"]
n_layers = model_params["model_layers"]
dropout_rate = model_params["model_dropout_rate"]
top_k_ratio = model_params["model_top_k_ratio"]
top_k_every_n = model_params["model_top_k_every_n"]
dense_neurons = model_params["model_dense_neurons"]
edge_dim = model_params["model_edge_dim"]

conv_layers = ModuleList([])
transf_layers = ModuleList([])
# pooling_layers = ModuleList([])   # I don't think that this dataset requires pooling layers, as my graph is not that much big, it's size is already very small, so this will lead to losing information
bn_layers = ModuleList([])

# Transformer layers (Convolving by making multi head attention, which has to be concatenated together, so size conv1 = (feature_size, n_heads*embedding_size))
conv1 = TransformerConv(3, embedding_size, heads=n_heads, dropout=dropout_rate, edge_dim=edge_dim, beta=True)

transf1 = Linear(embedding_size*n_heads, embedding_size)
bn1 = BatchNorm1d(embedding_size)

# Other layers
for i in range(n_layers):
    conv_layers.append(TransformerConv(embedding_size, 
                                            embedding_size, 
                                            heads=n_heads, 
                                            dropout=dropout_rate,
                                            edge_dim=edge_dim,
                                            beta=True))

    transf_layers.append(Linear(embedding_size*n_heads, embedding_size))
    bn_layers.append(BatchNorm1d(embedding_size))  # do batch normalization
    # if i % top_k_every_n == 0:
    #     # Dropout some node after some layers to reduce the size of graphs
    #     pooling_layers.append(TopKPooling(embedding_size, ratio=top_k_ratio))

    # Linear layers
linear1 = Linear(embedding_size*2, dense_neurons)
linear2 = Linear(dense_neurons, int(dense_neurons/2))  
linear3 = Linear(int(dense_neurons/2), 2)  

In [None]:
x,edge_attr,edge_index,batch_index=  yup.get(0).x, yup.get(0).edge_attr, yup.get(0).edge_index.T, 0

In [37]:
# Initial transformation
x = conv1(x, edge_index, edge_attr)
x = torch.relu(transf1(x))
x = bn1(x)

In [None]:
# Holds the intermediate graph representations
global_representation = []

for i in range(n_layers):
    x = conv_layers[i](x, edge_index, edge_attr)
    x = torch.relu(transf_layers[i](x))
    x = bn_layers[i](x)

    # Always aggregate last layer
    # if i == n_layers:
        # Add current representation (global max and mean pooling)
    global_representation.append(torch.cat([gmp(x, batch_index), gap(x, batch_index)], dim=1))

In [29]:
x = sum(global_representation)
# Output block
x = torch.relu(linear1(x))
x = F.dropout(x, p=0.8, training=training)
x = torch.relu(linear2(x))
x = F.dropout(x, p=0.8, training=training)
x = linear3(x)
x

0

In [None]:
model(yup.get(0).x, yup.get(0).edge_attr, yup.get(0).edge_index.T, 0)

## <center> Making of reinforcement environment: </center>

In [7]:
from juliacall import Main as jl
jl.seval("using SymbolicUtils")



Detected IPython. Loading juliacall extension. See https://juliapy.github.io/PythonCall.jl/stable/compat/#IPython


In [None]:
# jl.seval("""
#     include("../integral.jl")
#     include("../visualizer.jl")
#     include("../Symbolics_func.jl")
#     include("../Our_rules.jl")

#     @variables x y z t
# """)

[ Info: Precompiling Symbolics [0c5d862f-8b57-4792-8d23-62f2024744c7] (cache misses: wrong dep version loaded (6), incompatible header (4))
[ Info: Precompiling SciMLBasePythonCallExt [2797fd30-2078-5027-980c-4c2c8a19c528] 


4-element Vector{Num}:
 x
 y
 z
 t



In [33]:
jl.seval('include("linear_model.jl")')

linear_transport (generic function with 6 methods)



In [8]:
# Loss for just 1 input & 1 output, in actual loss should come from some reinforcement program
xmx = iter(data_loader)
yup = next(xmx)
yup.eq

['-61 - 125x ~ 103']

In [9]:
# model takes action
actual = torch.tensor([[1,0]],dtype=torch.float32).to(torch.int).detach().cpu().numpy().tolist()

In [10]:
yup.eq[0],actual[0][0],actual[0][1]

('-61 - 125x ~ 103', 1, 0)

In [None]:
D:/Study/Intelligence And Learning/Reinforcement Learning/Project 1/SymbolicModelsUtils.jl

In [None]:
print("yu")

In [None]:
jl.seval(f"""
         @variables x
         typeof({yup.eq[0]})""")

JuliaError: UndefVarError: `x` not defined in `Main`
Stacktrace:
 [1] top-level scope
   @ none:1
 [2] eval
   @ .\boot.jl:430 [inlined]
 [3] eval
   @ .\Base.jl:130 [inlined]
 [4] pyjlmodule_seval(self::Module, expr::Py)
   @ PythonCall.JlWrap C:\Users\mohit\.julia\packages\PythonCall\Nr75f\src\JlWrap\module.jl:13
 [5] _pyjl_callmethod(f::Any, self_::Ptr{PythonCall.C.PyObject}, args_::Ptr{PythonCall.C.PyObject}, nargs::Int64)
   @ PythonCall.JlWrap C:\Users\mohit\.julia\packages\PythonCall\Nr75f\src\JlWrap\base.jl:67
 [6] _pyjl_callmethod(o::Ptr{PythonCall.C.PyObject}, args::Ptr{PythonCall.C.PyObject})
   @ PythonCall.JlWrap.Cjl C:\Users\mohit\.julia\packages\PythonCall\Nr75f\src\JlWrap\C.jl:63

In [34]:
# This will return the modified tree expression which has to again fed to the tree, so that it can again give the actions and node 
jl.seval(f"linear_transport({yup.eq[0]}, {actual[0][0]}, {actual[0][1]})")

JuliaError: MethodError: no method matching *(::Int64, ::Nothing)
The function `*` exists, but no method is defined for this combination of argument types.

Closest candidates are:
  *(::Any, ::Any, !Matched::Any, !Matched::Any...)
   @ Base operators.jl:596
  *(!Matched::Differential, ::Any)
   @ Symbolics C:\Users\mohit\.julia\packages\Symbolics\kQzvO\src\diff.jl:56
  *(::Any, !Matched::Differential)
   @ Symbolics C:\Users\mohit\.julia\packages\Symbolics\kQzvO\src\diff.jl:55
  ...

Stacktrace:
 [1] linear_transport(expr_tree::Vector{Tuple{typeof(+), AbstractVector{Any}}}, side::Int64, node::Int64)
   @ Main d:\Study\Intelligence And Learning\Reinforcement Learning\Project 1\deployment\src\Equation model\linear_model.jl:26
 [2] linear_transport(expr::Equation, side::Int64, node::Int64)
   @ Main d:\Study\Intelligence And Learning\Reinforcement Learning\Project 1\deployment\src\Equation model\linear_model.jl:48
 [3] top-level scope
   @ none:1
 [4] eval
   @ .\boot.jl:430 [inlined]
 [5] eval
   @ .\Base.jl:130 [inlined]
 [6] pyjlmodule_seval(self::Module, expr::Py)
   @ PythonCall.JlWrap C:\Users\mohit\.julia\packages\PythonCall\Nr75f\src\JlWrap\module.jl:13
 [7] _pyjl_callmethod(f::Any, self_::Ptr{PythonCall.C.PyObject}, args_::Ptr{PythonCall.C.PyObject}, nargs::Int64)
   @ PythonCall.JlWrap C:\Users\mohit\.julia\packages\PythonCall\Nr75f\src\JlWrap\base.jl:67
 [8] _pyjl_callmethod(o::Ptr{PythonCall.C.PyObject}, args::Ptr{PythonCall.C.PyObject})
   @ PythonCall.JlWrap.Cjl C:\Users\mohit\.julia\packages\PythonCall\Nr75f\src\JlWrap\C.jl:63

In [26]:
actual[0][1]

0