# test model

In [1]:
# from POMO import OPEnv
from POMO import OPModel
import OPProblemDef as OP 
import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
depot_xy , node_xy , prize = OP.get_random_problems(2 , 3 )

In [3]:
env_params = {
    'problem_size': 3,
    'pomo_size': 3, 
    'embedding_dim' : 6, 
    'encoder_layer_num' : 1,
    'head_num' : 2,
    'qkv_dim' : 3,
    'ff_hidden_dim' : 512}

head_num = 2
embedding_dim = 6
batch_size = 2
pomo_size = 3  

node_index_to_pick = torch.randint(1, pomo_size + 1, (batch_size, pomo_size))
node_index_to_pick

tensor([[1, 2, 3],
        [1, 3, 2]])

In [4]:
model = OPModel.OPModel(**env_params)
encoder = OPModel.OP_Encoder(**env_params)
decoder = OPModel.OP_Decoder(**env_params)
layer = OPModel.EncoderLayer(**env_params)
node_xy_prize = torch.cat((node_xy, prize[:, :, None]), dim=2)
node_xy_prize

tensor([[[0.3388, 0.1809, 6.0000],
         [0.9768, 0.9139, 5.0000],
         [0.6260, 0.7024, 1.0000]],

        [[0.0746, 0.0228, 3.0000],
         [0.6301, 0.3931, 8.0000],
         [0.6072, 0.4737, 7.0000]]])

## encoder & encoder layer test

In [5]:
embedded_depot = encoder.embedding_depot(depot_xy)
embedded_node = encoder.embedding_node(node_xy_prize)
out_first = torch.cat((embedded_depot, embedded_node), dim=1)
out_first


tensor([[[ 0.7555, -0.0470,  0.4505, -0.4278, -0.2636, -0.2305],
         [-2.3164, -2.1174, -2.7709, -2.2419, -0.2017,  2.3189],
         [-2.2049, -1.4295, -2.2368, -1.5166, -0.4840,  1.2320],
         [-0.5517, -0.1825, -0.0737,  0.3024, -0.6495, -0.5718]],

        [[ 1.0892, -0.4807,  0.3135, -0.4625, -0.4893,  0.0723],
         [-1.0763, -1.1824, -1.1495, -0.8783, -0.3263,  0.9665],
         [-3.1878, -2.6839, -3.8434, -3.1061, -0.1486,  3.1247],
         [-2.8128, -2.3284, -3.3367, -2.6341, -0.2409,  2.5987]]],
       grad_fn=<CatBackward>)

In [6]:
encoder.forward(depot_xy,node_xy_prize)
encoder.output

tensor([[[ 0.7555, -0.0470,  0.4505, -0.4278, -0.2636, -0.2305],
         [-2.3164, -2.1174, -2.7709, -2.2419, -0.2017,  2.3189],
         [-2.2049, -1.4295, -2.2368, -1.5166, -0.4840,  1.2320],
         [-0.5517, -0.1825, -0.0737,  0.3024, -0.6495, -0.5718]],

        [[ 1.0892, -0.4807,  0.3135, -0.4625, -0.4893,  0.0723],
         [-1.0763, -1.1824, -1.1495, -0.8783, -0.3263,  0.9665],
         [-3.1878, -2.6839, -3.8434, -3.1061, -0.1486,  3.1247],
         [-2.8128, -2.3284, -3.3367, -2.6341, -0.2409,  2.5987]]],
       grad_fn=<CloneBackward>)

In [7]:
q = OPModel.reshape_by_heads(layer.Wq(out_first), head_num=head_num)
k = OPModel.reshape_by_heads(layer.Wk(out_first), head_num=head_num)
v = OPModel.reshape_by_heads(layer.Wv(out_first), head_num=head_num)
q

tensor([[[[-0.1899, -0.1847, -0.3827],
          [ 1.3745,  0.3794,  1.0963],
          [ 1.1744,  0.5815,  1.0508],
          [ 0.1374,  0.4218,  0.2831]],

         [[ 0.1644, -0.1034,  0.0605],
          [-1.7196,  1.7423, -0.2025],
          [-1.2781,  1.0751, -0.4079],
          [ 0.1488, -0.2609, -0.2219]]],


        [[[-0.2222, -0.2559, -0.4364],
          [ 0.5971,  0.2595,  0.5206],
          [ 1.8984,  0.4943,  1.5002],
          [ 1.6581,  0.4879,  1.3324]],

         [[ 0.2075,  0.1699,  0.1980],
          [-0.6499,  0.7409, -0.0632],
          [-2.4161,  2.3564, -0.3299],
          [-2.0630,  1.9948, -0.3254]]]], grad_fn=<TransposeBackward0>)

In [8]:
layer.forward(encoder.output)
layer.qclone

tensor([[[[-0.1899, -0.1847, -0.3827],
          [ 1.3745,  0.3794,  1.0963],
          [ 1.1744,  0.5815,  1.0508],
          [ 0.1374,  0.4218,  0.2831]],

         [[ 0.1644, -0.1034,  0.0605],
          [-1.7196,  1.7423, -0.2025],
          [-1.2781,  1.0751, -0.4079],
          [ 0.1488, -0.2609, -0.2219]]],


        [[[-0.2222, -0.2559, -0.4364],
          [ 0.5971,  0.2595,  0.5206],
          [ 1.8984,  0.4943,  1.5002],
          [ 1.6581,  0.4879,  1.3324]],

         [[ 0.2075,  0.1699,  0.1980],
          [-0.6499,  0.7409, -0.0632],
          [-2.4161,  2.3564, -0.3299],
          [-2.0630,  1.9948, -0.3254]]]], grad_fn=<CloneBackward>)

In [9]:
multi_out_concat = OPModel.multi_head_attention(q,k,v)
multi_out_concat

tensor([[[-0.1275, -0.7520, -0.2116,  0.8838,  0.5310, -1.1457],
         [-0.1127, -0.9562, -0.2574,  0.7736,  0.4787, -1.0108],
         [-0.1295, -0.7263, -0.2051,  0.8262,  0.5044, -1.0765],
         [-0.1501, -0.4398, -0.1419,  0.8765,  0.5182, -1.1334]],

        [[-0.1568, -1.4425, -0.5099,  1.2337,  0.9711, -1.7452],
         [-0.1653, -1.3810, -0.5040,  1.2100,  0.9536, -1.7145],
         [-0.1041, -1.8610, -0.5553,  0.9655,  0.7698, -1.3992],
         [-0.1176, -1.7500, -0.5428,  1.0209,  0.8112, -1.4707]]],
       grad_fn=<UnsafeViewBackward>)

In [10]:
multi_head_out = layer.multi_head_combine(multi_out_concat)
multi_head_out

tensor([[[ 0.2006,  0.2907, -0.6694,  0.4338,  0.1894, -0.3933],
         [ 0.0908,  0.3160, -0.5903,  0.5149,  0.2114, -0.4005],
         [ 0.1987,  0.2750, -0.6486,  0.4231,  0.1816, -0.3739],
         [ 0.3267,  0.2220, -0.7102,  0.3086,  0.1430, -0.3355]],

        [[ 0.0784,  0.5062, -0.8357,  0.7446,  0.3003, -0.5935],
         [ 0.0979,  0.4897, -0.8329,  0.7208,  0.2890, -0.5741],
         [-0.1796,  0.5604, -0.6282,  0.9036,  0.3583, -0.6110],
         [-0.1159,  0.5440, -0.6748,  0.8613,  0.3423, -0.6024]]],
       grad_fn=<AddBackward0>)

In [11]:
out1 = layer.add_n_normalization_1(out_first, multi_head_out)
out1

tensor([[[ 1.3964,  1.0797,  1.1920,  0.6096,  0.7141, -0.8170],
         [-1.0297, -1.3418, -1.1587, -1.2875,  1.1302,  1.4039],
         [-0.8625, -0.5758, -0.8028, -0.5940, -0.4166,  0.4776],
         [ 0.4958,  0.8379,  0.7694,  1.2719, -1.4277, -1.0646]],

        [[ 1.4995,  1.3637,  1.3995,  1.1881, -1.4044, -1.3276],
         [ 0.3051,  0.5260,  0.4809,  0.7684, -0.3909, -0.5772],
         [-1.0244, -1.1426, -1.0850, -1.1833,  1.2593,  1.1649],
         [-0.7803, -0.7471, -0.7955, -0.7732,  0.5360,  0.7399]]],
       grad_fn=<TransposeBackward0>)

In [12]:
out2 = layer.feed_forward(out1)
out2

tensor([[[-0.3064, -0.0495, -0.3178, -0.5758, -0.2116, -0.1240],
         [-0.0327,  0.4479, -0.0673,  0.3292,  0.0288, -0.1712],
         [-0.1008,  0.1719, -0.1633,  0.2304, -0.0434, -0.2140],
         [-0.3560,  0.0581, -0.3685, -0.4500, -0.3415, -0.2802]],

        [[-0.4701, -0.0512, -0.5074, -0.8080, -0.4498, -0.2647],
         [-0.2437,  0.0329, -0.2432, -0.2951, -0.2312, -0.1803],
         [-0.0258,  0.4075, -0.0621,  0.2584,  0.0222, -0.2232],
         [-0.0671,  0.2645, -0.0881,  0.1935, -0.0229, -0.2170]]],
       grad_fn=<AddBackward0>)

In [13]:
out3 = layer.add_n_normalization_2(out1, out2)
out3

tensor([[[ 1.4628,  1.0573,  1.2432,  0.2325,  0.5848, -0.7320],
         [-0.9798, -1.2727, -1.1230, -1.3016,  1.1806,  1.4078],
         [-0.8674, -0.6794, -0.8302, -0.3820, -0.2886,  0.4538],
         [ 0.3844,  0.8948,  0.7100,  1.4511, -1.4768, -1.1296]],

        [[ 1.4924,  1.3982,  1.3485,  0.9003, -1.4212, -1.3586],
         [ 0.3190,  0.4813,  0.5588,  1.0552, -0.3812, -0.5313],
         [-1.0286, -1.0933, -1.1126, -1.2640,  1.2255,  1.1524],
         [-0.7828, -0.7862, -0.7946, -0.6915,  0.5769,  0.7375]]],
       grad_fn=<TransposeBackward0>)

In [14]:
layer.forward(encoder.output)

tensor([[[ 1.4628,  1.0573,  1.2432,  0.2325,  0.5848, -0.7320],
         [-0.9798, -1.2727, -1.1230, -1.3016,  1.1806,  1.4078],
         [-0.8674, -0.6794, -0.8302, -0.3820, -0.2886,  0.4538],
         [ 0.3844,  0.8948,  0.7100,  1.4511, -1.4768, -1.1296]],

        [[ 1.4924,  1.3982,  1.3485,  0.9003, -1.4212, -1.3586],
         [ 0.3190,  0.4813,  0.5588,  1.0552, -0.3812, -0.5313],
         [-1.0286, -1.0933, -1.1126, -1.2640,  1.2255,  1.1524],
         [-0.7828, -0.7862, -0.7946, -0.6915,  0.5769,  0.7375]]],
       grad_fn=<TransposeBackward0>)

#   why this cell is not equal to next two cells????

In [15]:
for layer in encoder.layers:
    output = layer(encoder.output)

output

tensor([[[ 1.3521,  1.0348,  1.1514,  0.4580, -0.2703, -0.9326],
         [-1.0772, -1.3681, -1.1673, -1.3748,  1.4683,  1.3704],
         [-0.8352, -0.5412, -0.8019, -0.4017,  0.1268,  0.5414],
         [ 0.5602,  0.8745,  0.8178,  1.3186, -1.3248, -0.9793]],

        [[ 1.5184,  1.4240,  1.4832,  0.8800, -1.5461, -1.3420],
         [ 0.2641,  0.4301,  0.3432,  1.0632, -0.1901, -0.5619],
         [-1.0258, -1.1117, -0.9988, -1.2936,  1.0498,  1.1403],
         [-0.7567, -0.7424, -0.8276, -0.6496,  0.6864,  0.7636]]],
       grad_fn=<TransposeBackward0>)

In [16]:
encoder.forward(depot_xy,node_xy_prize) #out

tensor([[[ 1.3521,  1.0348,  1.1514,  0.4580, -0.2703, -0.9326],
         [-1.0772, -1.3681, -1.1673, -1.3748,  1.4683,  1.3704],
         [-0.8352, -0.5412, -0.8019, -0.4017,  0.1268,  0.5414],
         [ 0.5602,  0.8745,  0.8178,  1.3186, -1.3248, -0.9793]],

        [[ 1.5184,  1.4240,  1.4832,  0.8800, -1.5461, -1.3420],
         [ 0.2641,  0.4301,  0.3432,  1.0632, -0.1901, -0.5619],
         [-1.0258, -1.1117, -0.9988, -1.2936,  1.0498,  1.1403],
         [-0.7567, -0.7424, -0.8276, -0.6496,  0.6864,  0.7636]]],
       grad_fn=<TransposeBackward0>)

## Decoder set kv test

In [17]:
# decoder.__init__(**model_params)
# model.decoder.set_kv(model.encoded_nodes)

In [18]:
embedding_depot = nn.Linear(2, embedding_dim)
embedded_depott = embedding_depot(depot_xy)
embedded_depott

tensor([[[ 0.0744, -0.6503,  0.0306, -0.1368,  0.2379,  0.9080]],

        [[ 0.1665, -0.9202,  0.0944, -0.2593,  0.4848,  1.2750]]],
       grad_fn=<AddBackward0>)

In [19]:
embedding_depot

Linear(in_features=2, out_features=6, bias=True)

In [20]:
embedding_node = nn.Linear(3, embedding_dim)
eembedded_node = embedding_node(node_xy_prize)
eembedded_node

tensor([[[ 1.9004, -0.8712,  2.2221,  2.8133, -1.0249,  1.8653],
         [ 1.1746, -0.8182,  2.3426,  2.2107, -0.7013,  1.6274],
         [ 0.2805, -0.3303,  0.8675,  0.6956, -0.4095,  0.4266]],

        [[ 1.2299, -0.5047,  1.1158,  1.6769, -0.8067,  0.9642],
         [ 2.2761, -1.1309,  3.0290,  3.5392, -1.1286,  2.4807],
         [ 1.9782, -0.9996,  2.7332,  3.1270, -1.0403,  2.1692]]],
       grad_fn=<AddBackward0>)

In [21]:
encoded_nodes = torch.cat((embedded_depott, eembedded_node), dim=1)
encoded_nodes
#shape: (batch, problem+1, embedding) 

tensor([[[ 0.0744, -0.6503,  0.0306, -0.1368,  0.2379,  0.9080],
         [ 1.9004, -0.8712,  2.2221,  2.8133, -1.0249,  1.8653],
         [ 1.1746, -0.8182,  2.3426,  2.2107, -0.7013,  1.6274],
         [ 0.2805, -0.3303,  0.8675,  0.6956, -0.4095,  0.4266]],

        [[ 0.1665, -0.9202,  0.0944, -0.2593,  0.4848,  1.2750],
         [ 1.2299, -0.5047,  1.1158,  1.6769, -0.8067,  0.9642],
         [ 2.2761, -1.1309,  3.0290,  3.5392, -1.1286,  2.4807],
         [ 1.9782, -0.9996,  2.7332,  3.1270, -1.0403,  2.1692]]],
       grad_fn=<CatBackward>)

In [22]:
encoded_nodes.shape

torch.Size([2, 4, 6])

In [23]:
batch_size = node_index_to_pick.size(0)
pomo_size = node_index_to_pick.size(1)
embedding_dim = encoded_nodes.size(2)
node_index_to_pick

tensor([[1, 2, 3],
        [1, 3, 2]])

In [24]:
gathering_index = node_index_to_pick[:, :, None].expand(batch_size, pomo_size, embedding_dim)
node_index_to_pick[:, :, None].expand(batch_size, pomo_size, embedding_dim)

tensor([[[1, 1, 1, 1, 1, 1],
         [2, 2, 2, 2, 2, 2],
         [3, 3, 3, 3, 3, 3]],

        [[1, 1, 1, 1, 1, 1],
         [3, 3, 3, 3, 3, 3],
         [2, 2, 2, 2, 2, 2]]])

In [25]:
picked_nodes = encoded_nodes.gather(dim=1, index=gathering_index)
picked_nodes

tensor([[[ 1.9004, -0.8712,  2.2221,  2.8133, -1.0249,  1.8653],
         [ 1.1746, -0.8182,  2.3426,  2.2107, -0.7013,  1.6274],
         [ 0.2805, -0.3303,  0.8675,  0.6956, -0.4095,  0.4266]],

        [[ 1.2299, -0.5047,  1.1158,  1.6769, -0.8067,  0.9642],
         [ 1.9782, -0.9996,  2.7332,  3.1270, -1.0403,  2.1692],
         [ 2.2761, -1.1309,  3.0290,  3.5392, -1.1286,  2.4807]]],
       grad_fn=<GatherBackward>)

In [26]:
selected = torch.arange(start=1, end=pomo_size+1)[None, :].expand(batch_size, pomo_size)
selected

tensor([[1, 2, 3],
        [1, 2, 3]])

In [27]:
import torch

# Create a tensor with shape (2, 3, 8)
qkv = torch.randn(2, 3, 8)
batch_s = qkv.size(0)
n = qkv.size(1)
head_num = 2

# Reshape the tensor with -1 in the last dimension
q_reshaped = qkv.reshape(batch_s, n, head_num, -1)

print("Original tensor shape:", qkv.shape)
print("Reshaped tensor shape:", q_reshaped.shape)


Original tensor shape: torch.Size([2, 3, 8])
Reshaped tensor shape: torch.Size([2, 3, 2, 4])


In [28]:
qkv

tensor([[[ 1.6614,  0.1773, -2.3643,  0.3933, -0.9040, -1.9822,  0.8090,
          -0.8590],
         [ 0.0983,  1.6852,  1.2493, -0.8066,  0.0875, -0.9733,  0.7543,
          -0.0772],
         [-0.9903, -0.0448,  0.7174, -0.8195, -0.1929, -0.9320, -2.2059,
           0.5364]],

        [[-0.2947,  0.0473,  0.4203,  1.0884, -0.8996, -1.3518, -0.0557,
          -0.3800],
         [ 0.7307, -2.4862, -0.2487,  0.3316,  1.2053, -0.3663,  1.7566,
           0.6264],
         [-2.3257,  0.3481,  0.6272, -0.8287,  1.7677,  0.2287,  0.1635,
          -0.4741]]])

In [29]:
q_reshaped

tensor([[[[ 1.6614,  0.1773, -2.3643,  0.3933],
          [-0.9040, -1.9822,  0.8090, -0.8590]],

         [[ 0.0983,  1.6852,  1.2493, -0.8066],
          [ 0.0875, -0.9733,  0.7543, -0.0772]],

         [[-0.9903, -0.0448,  0.7174, -0.8195],
          [-0.1929, -0.9320, -2.2059,  0.5364]]],


        [[[-0.2947,  0.0473,  0.4203,  1.0884],
          [-0.8996, -1.3518, -0.0557, -0.3800]],

         [[ 0.7307, -2.4862, -0.2487,  0.3316],
          [ 1.2053, -0.3663,  1.7566,  0.6264]],

         [[-2.3257,  0.3481,  0.6272, -0.8287],
          [ 1.7677,  0.2287,  0.1635, -0.4741]]]])

In [30]:
import torch
import torch.nn as nn

# Create a sample tensor
score_scaled = torch.randn(2, 3, 2, 5)  

In [31]:

softmax = nn.Softmax(dim=2)
softmax3 = nn.Softmax(dim=3)
weights = softmax(score_scaled)
weights3 = softmax3(score_scaled)

weights

tensor([[[[0.4972, 0.3680, 0.7126, 0.8183, 0.1567],
          [0.5028, 0.6320, 0.2874, 0.1817, 0.8433]],

         [[0.3735, 0.1331, 0.8548, 0.4177, 0.6178],
          [0.6265, 0.8669, 0.1452, 0.5823, 0.3822]],

         [[0.4067, 0.6479, 0.7120, 0.8668, 0.3618],
          [0.5933, 0.3521, 0.2880, 0.1332, 0.6382]]],


        [[[0.0961, 0.7148, 0.1431, 0.6722, 0.1692],
          [0.9039, 0.2852, 0.8569, 0.3278, 0.8308]],

         [[0.3202, 0.2017, 0.5763, 0.8304, 0.6945],
          [0.6798, 0.7983, 0.4237, 0.1696, 0.3055]],

         [[0.8382, 0.8874, 0.0959, 0.3761, 0.4487],
          [0.1618, 0.1126, 0.9041, 0.6239, 0.5513]]]])

In [32]:
weights3    

tensor([[[[0.1562, 0.1346, 0.4731, 0.2068, 0.0293],
          [0.2015, 0.2951, 0.2434, 0.0586, 0.2015]],

         [[0.2863, 0.0715, 0.3823, 0.1425, 0.1173],
          [0.3745, 0.3634, 0.0506, 0.1549, 0.0566]],

         [[0.2430, 0.0501, 0.4155, 0.2405, 0.0509],
          [0.5239, 0.0402, 0.2484, 0.0546, 0.1328]]],


        [[[0.0945, 0.3506, 0.0707, 0.4263, 0.0579],
          [0.4573, 0.0719, 0.2177, 0.1069, 0.1461]],

         [[0.1185, 0.0328, 0.2193, 0.4847, 0.1447],
          [0.3567, 0.1842, 0.2285, 0.1403, 0.0903]],

         [[0.3200, 0.4306, 0.0251, 0.0988, 0.1255],
          [0.0920, 0.0814, 0.3526, 0.2443, 0.2298]]]])

In [33]:
score_scaled

tensor([[[[-0.1956, -0.3440,  0.9126,  0.0849, -1.8680],
          [-0.1846,  0.1969,  0.0045, -1.4200, -0.1847]],

         [[ 0.2441, -1.1426,  0.5334, -0.4535, -0.6479],
          [ 0.7615,  0.7314, -1.2391, -0.1212, -1.1281]],

         [[ 0.8197, -0.7600,  1.3562,  0.8093, -0.7425],
          [ 1.1974, -1.3699,  0.4510, -1.0633, -0.1747]]],


        [[[-0.7472,  0.5636, -1.0379,  0.7592, -1.2381],
          [ 1.4944, -0.3554,  0.7523,  0.0410,  0.3534]],

         [[-0.1279, -1.4122,  0.4874,  1.2804,  0.0718],
          [ 0.6249, -0.0363,  0.1797, -0.3083, -0.7493]],

         [[ 1.1031,  1.4000, -1.4427, -0.0717,  0.1672],
          [-0.5417, -0.6647,  0.8013,  0.4343,  0.3732]]]])

In [34]:
import torch
import torch.nn as nn

# Create a sample tensor
tensor = torch.randn(2, 3, 4)  # Tensor of shape (2, 3, 4)

# Apply a sum operation along dimension 1
sum_result = torch.sum(tensor, dim=0)

print(sum_result)


tensor([[-0.1524, -1.7431, -1.6052, -0.8216],
        [ 0.3264, -3.0486, -0.3137, -2.6651],
        [ 4.3786,  0.7942,  0.7758, -1.8524]])


In [35]:
import torch
import torch.nn as nn
m = nn.InstanceNorm1d(5, affine=True)
input = torch.randn(2, 5, 4)
output = m(input)
input

tensor([[[ 0.0783, -1.0028,  0.8775,  1.3500],
         [-0.6401, -0.2642,  2.3366,  1.2663],
         [ 1.5993, -0.4154,  1.3792,  0.7551],
         [ 2.4278, -1.0157, -1.4257,  0.4554],
         [-3.1709, -0.4085, -0.8739,  0.2029]],

        [[ 1.4407, -0.3782,  0.9619, -1.2570],
         [ 0.6438,  0.7857, -0.0149, -2.8753],
         [-0.7831,  0.5508,  0.1061,  0.9683],
         [ 0.7241,  0.2215,  1.4741,  0.8375],
         [ 1.6220, -0.3577,  0.2856, -0.8859]]])

In [36]:
output

tensor([[[-0.2775, -1.4901,  0.6188,  1.1488],
         [-1.0992, -0.7850,  1.3895,  0.4947],
         [ 0.9835, -1.5907,  0.7023, -0.0951],
         [ 1.5349, -0.7459, -1.0175,  0.2285],
         [-1.6526,  0.5127,  0.1479,  0.9920]],

        [[ 1.1675, -0.5330,  0.7199, -1.3545],
         [ 0.6816,  0.7774,  0.2366, -1.6956],
         [-1.5294,  0.5238, -0.1608,  1.1664],
         [-0.2022, -1.3293,  1.4796,  0.0519],
         [ 1.5532, -0.5586,  0.1276, -1.1222]]], grad_fn=<ViewBackward>)