From 787e415dd154725d37e14fe1216646a1ed9de791 Mon Sep 17 00:00:00 2001 From: Liu Yiqun Date: Thu, 25 Sep 2025 20:07:54 +0800 Subject: [PATCH] Add several paddle nlp samples. --- .../PaddleNLP/bart-base/graph_hash.txt | 1 + .../chinese-xlnet-base/graph_hash.txt | 1 + .../chinese-xlnet-large/graph_hash.txt | 1 + .../chinese-xlnet-mid/graph_hash.txt | 1 + .../PaddleNLP/electra-base/graph_hash.txt | 1 + .../PaddleNLP/electra-base/graph_net.json | 6 + .../PaddleNLP/electra-base/input_meta.py | 34 + .../PaddleNLP/electra-base/model.py | 2201 ++++++ .../PaddleNLP/electra-base/weight_meta.py | 1752 +++++ .../PaddleNLP/electra-large/graph_hash.txt | 1 + .../PaddleNLP/electra-large/graph_net.json | 6 + .../PaddleNLP/electra-large/input_meta.py | 34 + .../PaddleNLP/electra-large/model.py | 4277 +++++++++++ .../PaddleNLP/electra-large/weight_meta.py | 3456 +++++++++ .../PaddleNLP/electra-small/graph_hash.txt | 1 + .../PaddleNLP/electra-small/graph_net.json | 6 + .../PaddleNLP/electra-small/input_meta.py | 34 + .../PaddleNLP/electra-small/model.py | 2211 ++++++ .../PaddleNLP/electra-small/weight_meta.py | 1770 +++++ .../PaddleNLP/ernie-ctm/graph_hash.txt | 1 + .../PaddleNLP/ernie-ctm/graph_net.json | 6 + .../PaddleNLP/ernie-ctm/input_meta.py | 16 + paddle_samples/PaddleNLP/ernie-ctm/model.py | 2309 ++++++ .../PaddleNLP/ernie-ctm/weight_meta.py | 1824 +++++ .../PaddleNLP/ernie-gram-zh/graph_hash.txt | 1 + .../PaddleNLP/ernie-gram-zh/graph_net.json | 6 + .../PaddleNLP/ernie-gram-zh/input_meta.py | 12 + .../PaddleNLP/ernie-gram-zh/model.py | 2224 ++++++ .../PaddleNLP/ernie-gram-zh/weight_meta.py | 1770 +++++ .../ernie-health-chinese/graph_hash.txt | 1 + .../ernie-health-chinese/graph_net.json | 6 + .../ernie-health-chinese/input_meta.py | 12 + .../PaddleNLP/ernie-health-chinese/model.py | 2201 ++++++ .../ernie-health-chinese/weight_meta.py | 1752 +++++ .../graph_hash.txt | 1 + .../graph_net.json | 6 + .../input_meta.py | 12 + .../roformer_v2_chinese_char_base/model.py | 3280 +++++++++ .../weight_meta.py | 1076 +++ .../graph_hash.txt | 1 + .../graph_net.json | 6 + .../input_meta.py | 12 + .../roformer_v2_chinese_char_large/model.py | 6436 +++++++++++++++++ .../weight_meta.py | 2132 ++++++ .../graph_hash.txt | 1 + .../graph_net.json | 6 + .../input_meta.py | 12 + .../roformer_v2_chinese_char_small/model.py | 1702 +++++ .../weight_meta.py | 548 ++ .../PaddleNLP/xlnet-base-cased/graph_hash.txt | 1 + .../xlnet-large-cased/graph_hash.txt | 1 + 51 files changed, 43168 insertions(+) create mode 100644 paddle_samples/PaddleNLP/bart-base/graph_hash.txt create mode 100644 paddle_samples/PaddleNLP/chinese-xlnet-base/graph_hash.txt create mode 100644 paddle_samples/PaddleNLP/chinese-xlnet-large/graph_hash.txt create mode 100644 paddle_samples/PaddleNLP/chinese-xlnet-mid/graph_hash.txt create mode 100644 paddle_samples/PaddleNLP/electra-base/graph_hash.txt create mode 100644 paddle_samples/PaddleNLP/electra-base/graph_net.json create mode 100644 paddle_samples/PaddleNLP/electra-base/input_meta.py create mode 100644 paddle_samples/PaddleNLP/electra-base/model.py create mode 100644 paddle_samples/PaddleNLP/electra-base/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/electra-large/graph_hash.txt create mode 100644 paddle_samples/PaddleNLP/electra-large/graph_net.json create mode 100644 paddle_samples/PaddleNLP/electra-large/input_meta.py create mode 100644 paddle_samples/PaddleNLP/electra-large/model.py create mode 100644 paddle_samples/PaddleNLP/electra-large/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/electra-small/graph_hash.txt create mode 100644 paddle_samples/PaddleNLP/electra-small/graph_net.json create mode 100644 paddle_samples/PaddleNLP/electra-small/input_meta.py create mode 100644 paddle_samples/PaddleNLP/electra-small/model.py create mode 100644 paddle_samples/PaddleNLP/electra-small/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/ernie-ctm/graph_hash.txt create mode 100644 paddle_samples/PaddleNLP/ernie-ctm/graph_net.json create mode 100644 paddle_samples/PaddleNLP/ernie-ctm/input_meta.py create mode 100644 paddle_samples/PaddleNLP/ernie-ctm/model.py create mode 100644 paddle_samples/PaddleNLP/ernie-ctm/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/ernie-gram-zh/graph_hash.txt create mode 100644 paddle_samples/PaddleNLP/ernie-gram-zh/graph_net.json create mode 100644 paddle_samples/PaddleNLP/ernie-gram-zh/input_meta.py create mode 100644 paddle_samples/PaddleNLP/ernie-gram-zh/model.py create mode 100644 paddle_samples/PaddleNLP/ernie-gram-zh/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/ernie-health-chinese/graph_hash.txt create mode 100644 paddle_samples/PaddleNLP/ernie-health-chinese/graph_net.json create mode 100644 paddle_samples/PaddleNLP/ernie-health-chinese/input_meta.py create mode 100644 paddle_samples/PaddleNLP/ernie-health-chinese/model.py create mode 100644 paddle_samples/PaddleNLP/ernie-health-chinese/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/graph_hash.txt create mode 100644 paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/graph_net.json create mode 100644 paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/input_meta.py create mode 100644 paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/model.py create mode 100644 paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/graph_hash.txt create mode 100644 paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/graph_net.json create mode 100644 paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/input_meta.py create mode 100644 paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/model.py create mode 100644 paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/graph_hash.txt create mode 100644 paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/graph_net.json create mode 100644 paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/input_meta.py create mode 100644 paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/model.py create mode 100644 paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/weight_meta.py create mode 100644 paddle_samples/PaddleNLP/xlnet-base-cased/graph_hash.txt create mode 100644 paddle_samples/PaddleNLP/xlnet-large-cased/graph_hash.txt diff --git a/paddle_samples/PaddleNLP/bart-base/graph_hash.txt b/paddle_samples/PaddleNLP/bart-base/graph_hash.txt new file mode 100644 index 000000000..746cf4045 --- /dev/null +++ b/paddle_samples/PaddleNLP/bart-base/graph_hash.txt @@ -0,0 +1 @@ +752fd67d12054cfd32677bec5eae6b293b7af30f784daf6da7b29049d300361c \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/chinese-xlnet-base/graph_hash.txt b/paddle_samples/PaddleNLP/chinese-xlnet-base/graph_hash.txt new file mode 100644 index 000000000..1280d338c --- /dev/null +++ b/paddle_samples/PaddleNLP/chinese-xlnet-base/graph_hash.txt @@ -0,0 +1 @@ +be19706eab62425b7014788a90447aa1413bb2990334a60dad10a424ce300a9a \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/chinese-xlnet-large/graph_hash.txt b/paddle_samples/PaddleNLP/chinese-xlnet-large/graph_hash.txt new file mode 100644 index 000000000..468285d7f --- /dev/null +++ b/paddle_samples/PaddleNLP/chinese-xlnet-large/graph_hash.txt @@ -0,0 +1 @@ +845a39a2df9046b5673fad1785a3a216037950eb451d58aa167b8b989334fd55 \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/chinese-xlnet-mid/graph_hash.txt b/paddle_samples/PaddleNLP/chinese-xlnet-mid/graph_hash.txt new file mode 100644 index 000000000..01237658a --- /dev/null +++ b/paddle_samples/PaddleNLP/chinese-xlnet-mid/graph_hash.txt @@ -0,0 +1 @@ +e29ad28c026c0c6760a73db8bfa502afcc3f239b12ca8c2dd3ba3b258544b744 \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/electra-base/graph_hash.txt b/paddle_samples/PaddleNLP/electra-base/graph_hash.txt new file mode 100644 index 000000000..e0c9c50b7 --- /dev/null +++ b/paddle_samples/PaddleNLP/electra-base/graph_hash.txt @@ -0,0 +1 @@ +03434c5692eadd4f05bb5feaf8ea4eb6778381fda71507383df7c9a01e702062 \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/electra-base/graph_net.json b/paddle_samples/PaddleNLP/electra-base/graph_net.json new file mode 100644 index 000000000..73824ea27 --- /dev/null +++ b/paddle_samples/PaddleNLP/electra-base/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "electra-base", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/electra-base/input_meta.py b/paddle_samples/PaddleNLP/electra-base/input_meta.py new file mode 100644 index 000000000..fd1f7db6f --- /dev/null +++ b/paddle_samples/PaddleNLP/electra-base/input_meta.py @@ -0,0 +1,34 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 21] + dtype = "int64" + data = [ + 101, + 7592, + 1010, + 2026, + 2171, + 2003, + 3960, + 1012, + 1045, + 2572, + 4083, + 2055, + 2312, + 2653, + 4275, + 1998, + 2037, + 4294, + 2015, + 1012, + 102, + ] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 21] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/electra-base/model.py b/paddle_samples/PaddleNLP/electra-base/model.py new file mode 100644 index 000000000..0782e04c2 --- /dev/null +++ b/paddle_samples/PaddleNLP/electra-base/model.py @@ -0,0 +1,2201 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + data_0, + data_1, + ): + # pd_op.full: (xi64) <- () + full_0 = paddle._C_ops.full( + [], float("0"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.equal: (1x21xb) <- (1x21xi64, xi64) + equal_0 = paddle._C_ops.equal(data_0, full_0) + del full_0 + + # pd_op.cast: (1x21xf32) <- (1x21xb) + cast_0 = paddle._C_ops.cast(equal_0, paddle.float32) + del equal_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x21xf32) <- (1x21xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_1, float("0"), True) + del cast_0, full_1 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_0 = [1, 2] + + # pd_op.unsqueeze: (1x1x1x21xf32) <- (1x21xf32, 2xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(scale_0, full_int_array_0) + del full_int_array_0, scale_0 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full_like: (1x21xi64) <- (1x21xi64, 1xf32) + full_like_0 = paddle._C_ops.full_like( + data_0, full_2, paddle.int64, paddle.framework._current_expected_place() + ) + del full_2 + + # pd_op.full: (1xi32) <- () + full_3 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.cumsum: (1x21xi64) <- (1x21xi64, 1xi32) + cumsum_0 = paddle._C_ops.cumsum(full_like_0, full_3, False, False, False) + del full_3 + + # pd_op.subtract: (1x21xi64) <- (1x21xi64, 1x21xi64) + subtract_0 = paddle._C_ops.subtract(cumsum_0, full_like_0) + del cumsum_0, full_like_0 + + # pd_op.embedding: (1x21x768xf32) <- (1x21xi64, 30522x768xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_196, -1, False) + del data_0, parameter_196 + + # pd_op.embedding: (1x21x768xf32) <- (1x21xi64, 512x768xf32) + embedding_1 = paddle._C_ops.embedding(subtract_0, parameter_195, -1, False) + del parameter_195, subtract_0 + + # pd_op.embedding: (1x21x768xf32) <- (1x21xi64, 2x768xf32) + embedding_2 = paddle._C_ops.embedding(data_1, parameter_194, -1, False) + del data_1, parameter_194 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_1 = paddle._C_ops.add(add_0, embedding_2) + del add_0, embedding_2 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_1, layer_norm_2, layer_norm_3 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_1, parameter_193, parameter_192, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_1, parameter_192, parameter_193 + + # pd_op.full: (1xf32) <- () + full_4 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + layer_norm_1, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del layer_norm_1 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_0 = paddle._C_ops.matmul(dropout_0, parameter_191, False, False) + del parameter_191 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_190) + del matmul_0, parameter_190 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_1 = [0, 0, 12, 64] + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(add_2, full_int_array_1) + del add_2 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_1 = paddle._C_ops.matmul(dropout_0, parameter_189, False, False) + del parameter_189 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_3 = paddle._C_ops.add(matmul_1, parameter_188) + del matmul_1, parameter_188 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_2 = paddle._C_ops.matmul(dropout_0, parameter_187, False, False) + del parameter_187 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_4 = paddle._C_ops.add(matmul_2, parameter_186) + del matmul_2, parameter_186 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(add_3, full_int_array_1) + del add_3 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(add_4, full_int_array_1) + del add_4 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.full: (1xf32) <- () + full_5 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_1 = paddle._C_ops.scale(transpose_0, full_5, float("0"), True) + del transpose_0 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_3 = paddle._C_ops.matmul(scale_1, transpose_1, False, True) + del scale_1, transpose_1 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_5 = paddle._C_ops.add(matmul_3, unsqueeze_0) + del matmul_3 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_0 = paddle._C_ops.softmax(add_5, -1) + del add_5 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_4 = paddle._C_ops.matmul(dropout_2, transpose_2, False, False) + del dropout_2, transpose_2 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_3 = paddle._C_ops.transpose(matmul_4, [0, 2, 1, 3]) + del matmul_4 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_2 = [0, 0, 768] + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_2) + del transpose_3 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_5 = paddle._C_ops.matmul(reshape_3, parameter_185, False, False) + del parameter_185, reshape_3 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_6 = paddle._C_ops.add(matmul_5, parameter_184) + del matmul_5, parameter_184 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_6, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_6 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_7 = paddle._C_ops.add(dropout_0, dropout_4) + del dropout_0, dropout_4 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_4, layer_norm_5, layer_norm_6 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_7, parameter_179, parameter_178, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_7, parameter_178, parameter_179 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_6 = paddle._C_ops.matmul(layer_norm_4, parameter_183, False, False) + del parameter_183 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_8 = paddle._C_ops.add(matmul_6, parameter_182) + del matmul_6, parameter_182 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_0 = paddle._C_ops.gelu(add_8, False) + del add_8 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_7 = paddle._C_ops.matmul(gelu_0, parameter_181, False, False) + del gelu_0, parameter_181 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_9 = paddle._C_ops.add(matmul_7, parameter_180) + del matmul_7, parameter_180 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_9, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_9 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_10 = paddle._C_ops.add(layer_norm_4, dropout_6) + del dropout_6, layer_norm_4 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_7, layer_norm_8, layer_norm_9 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_10, parameter_177, parameter_176, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_10, parameter_176, parameter_177 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_7, parameter_175, False, False) + del parameter_175 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_11 = paddle._C_ops.add(matmul_8, parameter_174) + del matmul_8, parameter_174 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(add_11, full_int_array_1) + del add_11 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_9 = paddle._C_ops.matmul(layer_norm_7, parameter_173, False, False) + del parameter_173 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_172) + del matmul_9, parameter_172 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_7, parameter_171, False, False) + del parameter_171 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_13 = paddle._C_ops.add(matmul_10, parameter_170) + del matmul_10, parameter_170 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(add_12, full_int_array_1) + del add_12 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_13, full_int_array_1) + del add_13 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_2 = paddle._C_ops.scale(transpose_4, full_5, float("0"), True) + del transpose_4 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_11 = paddle._C_ops.matmul(scale_2, transpose_5, False, True) + del scale_2, transpose_5 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_14 = paddle._C_ops.add(matmul_11, unsqueeze_0) + del matmul_11 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_1 = paddle._C_ops.softmax(add_14, -1) + del add_14 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_12 = paddle._C_ops.matmul(dropout_8, transpose_6, False, False) + del dropout_8, transpose_6 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_7 = paddle._C_ops.transpose(matmul_12, [0, 2, 1, 3]) + del matmul_12 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_2) + del transpose_7 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_13 = paddle._C_ops.matmul(reshape_7, parameter_169, False, False) + del parameter_169, reshape_7 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_15 = paddle._C_ops.add(matmul_13, parameter_168) + del matmul_13, parameter_168 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_15, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_15 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_16 = paddle._C_ops.add(layer_norm_7, dropout_10) + del dropout_10, layer_norm_7 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_10, layer_norm_11, layer_norm_12 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_16, parameter_163, parameter_162, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_16, parameter_162, parameter_163 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_10, parameter_167, False, False) + del parameter_167 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_17 = paddle._C_ops.add(matmul_14, parameter_166) + del matmul_14, parameter_166 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_1 = paddle._C_ops.gelu(add_17, False) + del add_17 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_15 = paddle._C_ops.matmul(gelu_1, parameter_165, False, False) + del gelu_1, parameter_165 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_18 = paddle._C_ops.add(matmul_15, parameter_164) + del matmul_15, parameter_164 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_18, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_18 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_19 = paddle._C_ops.add(layer_norm_10, dropout_12) + del dropout_12, layer_norm_10 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_13, layer_norm_14, layer_norm_15 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_19, parameter_161, parameter_160, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_19, parameter_160, parameter_161 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_16 = paddle._C_ops.matmul(layer_norm_13, parameter_159, False, False) + del parameter_159 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_20 = paddle._C_ops.add(matmul_16, parameter_158) + del matmul_16, parameter_158 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(add_20, full_int_array_1) + del add_20 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_13, parameter_157, False, False) + del parameter_157 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_156) + del matmul_17, parameter_156 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_13, parameter_155, False, False) + del parameter_155 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_154) + del matmul_18, parameter_154 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_21, full_int_array_1) + del add_21 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(add_22, full_int_array_1) + del add_22 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_3 = paddle._C_ops.scale(transpose_8, full_5, float("0"), True) + del transpose_8 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_19 = paddle._C_ops.matmul(scale_3, transpose_9, False, True) + del scale_3, transpose_9 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_23 = paddle._C_ops.add(matmul_19, unsqueeze_0) + del matmul_19 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_2 = paddle._C_ops.softmax(add_23, -1) + del add_23 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_20 = paddle._C_ops.matmul(dropout_14, transpose_10, False, False) + del dropout_14, transpose_10 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_11 = paddle._C_ops.transpose(matmul_20, [0, 2, 1, 3]) + del matmul_20 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_2) + del transpose_11 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_21 = paddle._C_ops.matmul(reshape_11, parameter_153, False, False) + del parameter_153, reshape_11 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_24 = paddle._C_ops.add(matmul_21, parameter_152) + del matmul_21, parameter_152 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_24, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_24 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_25 = paddle._C_ops.add(layer_norm_13, dropout_16) + del dropout_16, layer_norm_13 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_16, layer_norm_17, layer_norm_18 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_25, parameter_147, parameter_146, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_25, parameter_146, parameter_147 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_22 = paddle._C_ops.matmul(layer_norm_16, parameter_151, False, False) + del parameter_151 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_26 = paddle._C_ops.add(matmul_22, parameter_150) + del matmul_22, parameter_150 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_2 = paddle._C_ops.gelu(add_26, False) + del add_26 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_23 = paddle._C_ops.matmul(gelu_2, parameter_149, False, False) + del gelu_2, parameter_149 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_27 = paddle._C_ops.add(matmul_23, parameter_148) + del matmul_23, parameter_148 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_27, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_27 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_28 = paddle._C_ops.add(layer_norm_16, dropout_18) + del dropout_18, layer_norm_16 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_19, layer_norm_20, layer_norm_21 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_28, parameter_145, parameter_144, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_28, parameter_144, parameter_145 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_24 = paddle._C_ops.matmul(layer_norm_19, parameter_143, False, False) + del parameter_143 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_29 = paddle._C_ops.add(matmul_24, parameter_142) + del matmul_24, parameter_142 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(add_29, full_int_array_1) + del add_29 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_19, parameter_141, False, False) + del parameter_141 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_30 = paddle._C_ops.add(matmul_25, parameter_140) + del matmul_25, parameter_140 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_19, parameter_139, False, False) + del parameter_139 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_31 = paddle._C_ops.add(matmul_26, parameter_138) + del matmul_26, parameter_138 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(add_30, full_int_array_1) + del add_30 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(add_31, full_int_array_1) + del add_31 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_4 = paddle._C_ops.scale(transpose_12, full_5, float("0"), True) + del transpose_12 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_27 = paddle._C_ops.matmul(scale_4, transpose_13, False, True) + del scale_4, transpose_13 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_32 = paddle._C_ops.add(matmul_27, unsqueeze_0) + del matmul_27 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_3 = paddle._C_ops.softmax(add_32, -1) + del add_32 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_28 = paddle._C_ops.matmul(dropout_20, transpose_14, False, False) + del dropout_20, transpose_14 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_15 = paddle._C_ops.transpose(matmul_28, [0, 2, 1, 3]) + del matmul_28 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_2) + del transpose_15 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_29 = paddle._C_ops.matmul(reshape_15, parameter_137, False, False) + del parameter_137, reshape_15 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_33 = paddle._C_ops.add(matmul_29, parameter_136) + del matmul_29, parameter_136 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_33, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_33 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_34 = paddle._C_ops.add(layer_norm_19, dropout_22) + del dropout_22, layer_norm_19 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_22, layer_norm_23, layer_norm_24 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_34, parameter_131, parameter_130, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_34, parameter_130, parameter_131 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_22, parameter_135, False, False) + del parameter_135 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_35 = paddle._C_ops.add(matmul_30, parameter_134) + del matmul_30, parameter_134 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_3 = paddle._C_ops.gelu(add_35, False) + del add_35 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_31 = paddle._C_ops.matmul(gelu_3, parameter_133, False, False) + del gelu_3, parameter_133 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_36 = paddle._C_ops.add(matmul_31, parameter_132) + del matmul_31, parameter_132 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_36, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_36 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_37 = paddle._C_ops.add(layer_norm_22, dropout_24) + del dropout_24, layer_norm_22 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_25, layer_norm_26, layer_norm_27 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_37, parameter_129, parameter_128, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_37, parameter_128, parameter_129 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_32 = paddle._C_ops.matmul(layer_norm_25, parameter_127, False, False) + del parameter_127 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_38 = paddle._C_ops.add(matmul_32, parameter_126) + del matmul_32, parameter_126 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(add_38, full_int_array_1) + del add_38 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_16 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_33 = paddle._C_ops.matmul(layer_norm_25, parameter_125, False, False) + del parameter_125 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_39 = paddle._C_ops.add(matmul_33, parameter_124) + del matmul_33, parameter_124 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_25, parameter_123, False, False) + del parameter_123 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_40 = paddle._C_ops.add(matmul_34, parameter_122) + del matmul_34, parameter_122 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(add_39, full_int_array_1) + del add_39 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_40, full_int_array_1) + del add_40 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_5 = paddle._C_ops.scale(transpose_16, full_5, float("0"), True) + del transpose_16 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_35 = paddle._C_ops.matmul(scale_5, transpose_17, False, True) + del scale_5, transpose_17 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_41 = paddle._C_ops.add(matmul_35, unsqueeze_0) + del matmul_35 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_4 = paddle._C_ops.softmax(add_41, -1) + del add_41 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_36 = paddle._C_ops.matmul(dropout_26, transpose_18, False, False) + del dropout_26, transpose_18 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_19 = paddle._C_ops.transpose(matmul_36, [0, 2, 1, 3]) + del matmul_36 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_19, full_int_array_2) + del transpose_19 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_37 = paddle._C_ops.matmul(reshape_19, parameter_121, False, False) + del parameter_121, reshape_19 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_42 = paddle._C_ops.add(matmul_37, parameter_120) + del matmul_37, parameter_120 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_42, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_42 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_43 = paddle._C_ops.add(layer_norm_25, dropout_28) + del dropout_28, layer_norm_25 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_28, layer_norm_29, layer_norm_30 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_43, parameter_115, parameter_114, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_43, parameter_114, parameter_115 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_38 = paddle._C_ops.matmul(layer_norm_28, parameter_119, False, False) + del parameter_119 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_44 = paddle._C_ops.add(matmul_38, parameter_118) + del matmul_38, parameter_118 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_4 = paddle._C_ops.gelu(add_44, False) + del add_44 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_39 = paddle._C_ops.matmul(gelu_4, parameter_117, False, False) + del gelu_4, parameter_117 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_45 = paddle._C_ops.add(matmul_39, parameter_116) + del matmul_39, parameter_116 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_45, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_45 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_46 = paddle._C_ops.add(layer_norm_28, dropout_30) + del dropout_30, layer_norm_28 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_31, layer_norm_32, layer_norm_33 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_46, parameter_113, parameter_112, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_46, parameter_112, parameter_113 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_31, parameter_111, False, False) + del parameter_111 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_47 = paddle._C_ops.add(matmul_40, parameter_110) + del matmul_40, parameter_110 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(add_47, full_int_array_1) + del add_47 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_20 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_41 = paddle._C_ops.matmul(layer_norm_31, parameter_109, False, False) + del parameter_109 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_48 = paddle._C_ops.add(matmul_41, parameter_108) + del matmul_41, parameter_108 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_31, parameter_107, False, False) + del parameter_107 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_49 = paddle._C_ops.add(matmul_42, parameter_106) + del matmul_42, parameter_106 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(add_48, full_int_array_1) + del add_48 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(add_49, full_int_array_1) + del add_49 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_6 = paddle._C_ops.scale(transpose_20, full_5, float("0"), True) + del transpose_20 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_43 = paddle._C_ops.matmul(scale_6, transpose_21, False, True) + del scale_6, transpose_21 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_50 = paddle._C_ops.add(matmul_43, unsqueeze_0) + del matmul_43 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_5 = paddle._C_ops.softmax(add_50, -1) + del add_50 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_44 = paddle._C_ops.matmul(dropout_32, transpose_22, False, False) + del dropout_32, transpose_22 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_23 = paddle._C_ops.transpose(matmul_44, [0, 2, 1, 3]) + del matmul_44 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_23, full_int_array_2) + del transpose_23 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_45 = paddle._C_ops.matmul(reshape_23, parameter_105, False, False) + del parameter_105, reshape_23 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_51 = paddle._C_ops.add(matmul_45, parameter_104) + del matmul_45, parameter_104 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_51, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_51 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_52 = paddle._C_ops.add(layer_norm_31, dropout_34) + del dropout_34, layer_norm_31 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_34, layer_norm_35, layer_norm_36 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_52, parameter_99, parameter_98, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_52, parameter_98, parameter_99 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_46 = paddle._C_ops.matmul(layer_norm_34, parameter_103, False, False) + del parameter_103 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_53 = paddle._C_ops.add(matmul_46, parameter_102) + del matmul_46, parameter_102 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_5 = paddle._C_ops.gelu(add_53, False) + del add_53 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_47 = paddle._C_ops.matmul(gelu_5, parameter_101, False, False) + del gelu_5, parameter_101 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_54 = paddle._C_ops.add(matmul_47, parameter_100) + del matmul_47, parameter_100 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_54, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_54 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_55 = paddle._C_ops.add(layer_norm_34, dropout_36) + del dropout_36, layer_norm_34 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_37, layer_norm_38, layer_norm_39 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_55, parameter_97, parameter_96, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_55, parameter_96, parameter_97 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_48 = paddle._C_ops.matmul(layer_norm_37, parameter_95, False, False) + del parameter_95 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_56 = paddle._C_ops.add(matmul_48, parameter_94) + del matmul_48, parameter_94 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_56, full_int_array_1) + del add_56 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_24 = paddle._C_ops.transpose(reshape_24, [0, 2, 1, 3]) + del reshape_24 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(layer_norm_37, parameter_93, False, False) + del parameter_93 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_57 = paddle._C_ops.add(matmul_49, parameter_92) + del matmul_49, parameter_92 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_37, parameter_91, False, False) + del parameter_91 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_58 = paddle._C_ops.add(matmul_50, parameter_90) + del matmul_50, parameter_90 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_57, full_int_array_1) + del add_57 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_25 = paddle._C_ops.transpose(reshape_25, [0, 2, 1, 3]) + del reshape_25 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_58, full_int_array_1) + del add_58 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_26 = paddle._C_ops.transpose(reshape_26, [0, 2, 1, 3]) + del reshape_26 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_7 = paddle._C_ops.scale(transpose_24, full_5, float("0"), True) + del transpose_24 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_51 = paddle._C_ops.matmul(scale_7, transpose_25, False, True) + del scale_7, transpose_25 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_59 = paddle._C_ops.add(matmul_51, unsqueeze_0) + del matmul_51 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_6 = paddle._C_ops.softmax(add_59, -1) + del add_59 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_52 = paddle._C_ops.matmul(dropout_38, transpose_26, False, False) + del dropout_38, transpose_26 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_27 = paddle._C_ops.transpose(matmul_52, [0, 2, 1, 3]) + del matmul_52 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(transpose_27, full_int_array_2) + del transpose_27 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_53 = paddle._C_ops.matmul(reshape_27, parameter_89, False, False) + del parameter_89, reshape_27 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_60 = paddle._C_ops.add(matmul_53, parameter_88) + del matmul_53, parameter_88 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_60, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_60 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_61 = paddle._C_ops.add(layer_norm_37, dropout_40) + del dropout_40, layer_norm_37 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_40, layer_norm_41, layer_norm_42 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_61, parameter_83, parameter_82, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_61, parameter_82, parameter_83 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_54 = paddle._C_ops.matmul(layer_norm_40, parameter_87, False, False) + del parameter_87 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_62 = paddle._C_ops.add(matmul_54, parameter_86) + del matmul_54, parameter_86 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_6 = paddle._C_ops.gelu(add_62, False) + del add_62 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_55 = paddle._C_ops.matmul(gelu_6, parameter_85, False, False) + del gelu_6, parameter_85 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_63 = paddle._C_ops.add(matmul_55, parameter_84) + del matmul_55, parameter_84 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_63, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_63 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_64 = paddle._C_ops.add(layer_norm_40, dropout_42) + del dropout_42, layer_norm_40 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_43, layer_norm_44, layer_norm_45 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_64, parameter_81, parameter_80, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_64, parameter_80, parameter_81 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_56 = paddle._C_ops.matmul(layer_norm_43, parameter_79, False, False) + del parameter_79 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_65 = paddle._C_ops.add(matmul_56, parameter_78) + del matmul_56, parameter_78 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(add_65, full_int_array_1) + del add_65 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_28 = paddle._C_ops.transpose(reshape_28, [0, 2, 1, 3]) + del reshape_28 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_57 = paddle._C_ops.matmul(layer_norm_43, parameter_77, False, False) + del parameter_77 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_66 = paddle._C_ops.add(matmul_57, parameter_76) + del matmul_57, parameter_76 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_58 = paddle._C_ops.matmul(layer_norm_43, parameter_75, False, False) + del parameter_75 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_67 = paddle._C_ops.add(matmul_58, parameter_74) + del matmul_58, parameter_74 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(add_66, full_int_array_1) + del add_66 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_29 = paddle._C_ops.transpose(reshape_29, [0, 2, 1, 3]) + del reshape_29 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_67, full_int_array_1) + del add_67 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_30 = paddle._C_ops.transpose(reshape_30, [0, 2, 1, 3]) + del reshape_30 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_8 = paddle._C_ops.scale(transpose_28, full_5, float("0"), True) + del transpose_28 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_59 = paddle._C_ops.matmul(scale_8, transpose_29, False, True) + del scale_8, transpose_29 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_68 = paddle._C_ops.add(matmul_59, unsqueeze_0) + del matmul_59 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_7 = paddle._C_ops.softmax(add_68, -1) + del add_68 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_60 = paddle._C_ops.matmul(dropout_44, transpose_30, False, False) + del dropout_44, transpose_30 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_31 = paddle._C_ops.transpose(matmul_60, [0, 2, 1, 3]) + del matmul_60 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_31 = paddle._C_ops.reshape(transpose_31, full_int_array_2) + del transpose_31 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_61 = paddle._C_ops.matmul(reshape_31, parameter_73, False, False) + del parameter_73, reshape_31 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_69 = paddle._C_ops.add(matmul_61, parameter_72) + del matmul_61, parameter_72 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_69, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_69 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_70 = paddle._C_ops.add(layer_norm_43, dropout_46) + del dropout_46, layer_norm_43 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_46, layer_norm_47, layer_norm_48 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_70, parameter_67, parameter_66, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_70, parameter_66, parameter_67 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_62 = paddle._C_ops.matmul(layer_norm_46, parameter_71, False, False) + del parameter_71 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_71 = paddle._C_ops.add(matmul_62, parameter_70) + del matmul_62, parameter_70 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_7 = paddle._C_ops.gelu(add_71, False) + del add_71 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_63 = paddle._C_ops.matmul(gelu_7, parameter_69, False, False) + del gelu_7, parameter_69 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_72 = paddle._C_ops.add(matmul_63, parameter_68) + del matmul_63, parameter_68 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_72, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_72 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_73 = paddle._C_ops.add(layer_norm_46, dropout_48) + del dropout_48, layer_norm_46 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_49, layer_norm_50, layer_norm_51 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_65, parameter_64, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_73, parameter_64, parameter_65 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_64 = paddle._C_ops.matmul(layer_norm_49, parameter_63, False, False) + del parameter_63 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_74 = paddle._C_ops.add(matmul_64, parameter_62) + del matmul_64, parameter_62 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(add_74, full_int_array_1) + del add_74 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_32 = paddle._C_ops.transpose(reshape_32, [0, 2, 1, 3]) + del reshape_32 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_49, parameter_61, False, False) + del parameter_61 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_75 = paddle._C_ops.add(matmul_65, parameter_60) + del matmul_65, parameter_60 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_49, parameter_59, False, False) + del parameter_59 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_76 = paddle._C_ops.add(matmul_66, parameter_58) + del matmul_66, parameter_58 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(add_75, full_int_array_1) + del add_75 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_33 = paddle._C_ops.transpose(reshape_33, [0, 2, 1, 3]) + del reshape_33 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_76, full_int_array_1) + del add_76 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_34 = paddle._C_ops.transpose(reshape_34, [0, 2, 1, 3]) + del reshape_34 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_9 = paddle._C_ops.scale(transpose_32, full_5, float("0"), True) + del transpose_32 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_67 = paddle._C_ops.matmul(scale_9, transpose_33, False, True) + del scale_9, transpose_33 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_77 = paddle._C_ops.add(matmul_67, unsqueeze_0) + del matmul_67 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_8 = paddle._C_ops.softmax(add_77, -1) + del add_77 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_50, dropout_51 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_68 = paddle._C_ops.matmul(dropout_50, transpose_34, False, False) + del dropout_50, transpose_34 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_35 = paddle._C_ops.transpose(matmul_68, [0, 2, 1, 3]) + del matmul_68 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_35 = paddle._C_ops.reshape(transpose_35, full_int_array_2) + del transpose_35 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_69 = paddle._C_ops.matmul(reshape_35, parameter_57, False, False) + del parameter_57, reshape_35 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_78 = paddle._C_ops.add(matmul_69, parameter_56) + del matmul_69, parameter_56 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_52, dropout_53 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_78, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_78 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_79 = paddle._C_ops.add(layer_norm_49, dropout_52) + del dropout_52, layer_norm_49 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_52, layer_norm_53, layer_norm_54 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_79, parameter_51, parameter_50, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_79, parameter_50, parameter_51 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_52, parameter_55, False, False) + del parameter_55 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_80 = paddle._C_ops.add(matmul_70, parameter_54) + del matmul_70, parameter_54 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_8 = paddle._C_ops.gelu(add_80, False) + del add_80 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_71 = paddle._C_ops.matmul(gelu_8, parameter_53, False, False) + del gelu_8, parameter_53 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_81 = paddle._C_ops.add(matmul_71, parameter_52) + del matmul_71, parameter_52 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_54, dropout_55 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_81, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_81 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_82 = paddle._C_ops.add(layer_norm_52, dropout_54) + del dropout_54, layer_norm_52 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_55, layer_norm_56, layer_norm_57 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_82, parameter_49, parameter_48, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_82, parameter_48, parameter_49 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_72 = paddle._C_ops.matmul(layer_norm_55, parameter_47, False, False) + del parameter_47 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_83 = paddle._C_ops.add(matmul_72, parameter_46) + del matmul_72, parameter_46 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(add_83, full_int_array_1) + del add_83 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_36 = paddle._C_ops.transpose(reshape_36, [0, 2, 1, 3]) + del reshape_36 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_73 = paddle._C_ops.matmul(layer_norm_55, parameter_45, False, False) + del parameter_45 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_84 = paddle._C_ops.add(matmul_73, parameter_44) + del matmul_73, parameter_44 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_74 = paddle._C_ops.matmul(layer_norm_55, parameter_43, False, False) + del parameter_43 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_85 = paddle._C_ops.add(matmul_74, parameter_42) + del matmul_74, parameter_42 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_84, full_int_array_1) + del add_84 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_37 = paddle._C_ops.transpose(reshape_37, [0, 2, 1, 3]) + del reshape_37 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(add_85, full_int_array_1) + del add_85 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_38 = paddle._C_ops.transpose(reshape_38, [0, 2, 1, 3]) + del reshape_38 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_10 = paddle._C_ops.scale(transpose_36, full_5, float("0"), True) + del transpose_36 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_75 = paddle._C_ops.matmul(scale_10, transpose_37, False, True) + del scale_10, transpose_37 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_86 = paddle._C_ops.add(matmul_75, unsqueeze_0) + del matmul_75 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_9 = paddle._C_ops.softmax(add_86, -1) + del add_86 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_56, dropout_57 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_76 = paddle._C_ops.matmul(dropout_56, transpose_38, False, False) + del dropout_56, transpose_38 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_39 = paddle._C_ops.transpose(matmul_76, [0, 2, 1, 3]) + del matmul_76 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(transpose_39, full_int_array_2) + del transpose_39 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_77 = paddle._C_ops.matmul(reshape_39, parameter_41, False, False) + del parameter_41, reshape_39 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_87 = paddle._C_ops.add(matmul_77, parameter_40) + del matmul_77, parameter_40 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_58, dropout_59 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_87, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_87 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_88 = paddle._C_ops.add(layer_norm_55, dropout_58) + del dropout_58, layer_norm_55 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_58, layer_norm_59, layer_norm_60 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_88, parameter_35, parameter_34, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_88, parameter_34, parameter_35 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_78 = paddle._C_ops.matmul(layer_norm_58, parameter_39, False, False) + del parameter_39 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_89 = paddle._C_ops.add(matmul_78, parameter_38) + del matmul_78, parameter_38 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_9 = paddle._C_ops.gelu(add_89, False) + del add_89 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_79 = paddle._C_ops.matmul(gelu_9, parameter_37, False, False) + del gelu_9, parameter_37 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_90 = paddle._C_ops.add(matmul_79, parameter_36) + del matmul_79, parameter_36 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_60, dropout_61 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_90, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_90 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_91 = paddle._C_ops.add(layer_norm_58, dropout_60) + del dropout_60, layer_norm_58 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_61, layer_norm_62, layer_norm_63 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_91, parameter_33, parameter_32, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_91, parameter_32, parameter_33 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_80 = paddle._C_ops.matmul(layer_norm_61, parameter_31, False, False) + del parameter_31 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_92 = paddle._C_ops.add(matmul_80, parameter_30) + del matmul_80, parameter_30 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(add_92, full_int_array_1) + del add_92 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_40 = paddle._C_ops.transpose(reshape_40, [0, 2, 1, 3]) + del reshape_40 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_81 = paddle._C_ops.matmul(layer_norm_61, parameter_29, False, False) + del parameter_29 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_93 = paddle._C_ops.add(matmul_81, parameter_28) + del matmul_81, parameter_28 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_82 = paddle._C_ops.matmul(layer_norm_61, parameter_27, False, False) + del parameter_27 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_94 = paddle._C_ops.add(matmul_82, parameter_26) + del matmul_82, parameter_26 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_93, full_int_array_1) + del add_93 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_41 = paddle._C_ops.transpose(reshape_41, [0, 2, 1, 3]) + del reshape_41 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(add_94, full_int_array_1) + del add_94 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_42 = paddle._C_ops.transpose(reshape_42, [0, 2, 1, 3]) + del reshape_42 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_11 = paddle._C_ops.scale(transpose_40, full_5, float("0"), True) + del transpose_40 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_83 = paddle._C_ops.matmul(scale_11, transpose_41, False, True) + del scale_11, transpose_41 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_95 = paddle._C_ops.add(matmul_83, unsqueeze_0) + del matmul_83 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_10 = paddle._C_ops.softmax(add_95, -1) + del add_95 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_62, dropout_63 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_84 = paddle._C_ops.matmul(dropout_62, transpose_42, False, False) + del dropout_62, transpose_42 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_43 = paddle._C_ops.transpose(matmul_84, [0, 2, 1, 3]) + del matmul_84 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_43, full_int_array_2) + del transpose_43 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_85 = paddle._C_ops.matmul(reshape_43, parameter_25, False, False) + del parameter_25, reshape_43 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_96 = paddle._C_ops.add(matmul_85, parameter_24) + del matmul_85, parameter_24 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_64, dropout_65 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_96, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_96 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_97 = paddle._C_ops.add(layer_norm_61, dropout_64) + del dropout_64, layer_norm_61 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_64, layer_norm_65, layer_norm_66 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_97, parameter_19, parameter_18, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_97, parameter_18, parameter_19 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_86 = paddle._C_ops.matmul(layer_norm_64, parameter_23, False, False) + del parameter_23 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_98 = paddle._C_ops.add(matmul_86, parameter_22) + del matmul_86, parameter_22 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_10 = paddle._C_ops.gelu(add_98, False) + del add_98 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_87 = paddle._C_ops.matmul(gelu_10, parameter_21, False, False) + del gelu_10, parameter_21 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_99 = paddle._C_ops.add(matmul_87, parameter_20) + del matmul_87, parameter_20 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_66, dropout_67 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_99, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_99 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_100 = paddle._C_ops.add(layer_norm_64, dropout_66) + del dropout_66, layer_norm_64 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_67, layer_norm_68, layer_norm_69 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_100, parameter_17, parameter_16, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_100, parameter_16, parameter_17 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_88 = paddle._C_ops.matmul(layer_norm_67, parameter_15, False, False) + del parameter_15 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_101 = paddle._C_ops.add(matmul_88, parameter_14) + del matmul_88, parameter_14 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_101, full_int_array_1) + del add_101 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_44 = paddle._C_ops.transpose(reshape_44, [0, 2, 1, 3]) + del reshape_44 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_89 = paddle._C_ops.matmul(layer_norm_67, parameter_13, False, False) + del parameter_13 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_102 = paddle._C_ops.add(matmul_89, parameter_12) + del matmul_89, parameter_12 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_67, parameter_11, False, False) + del parameter_11 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_103 = paddle._C_ops.add(matmul_90, parameter_10) + del matmul_90, parameter_10 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(add_102, full_int_array_1) + del add_102 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_45 = paddle._C_ops.transpose(reshape_45, [0, 2, 1, 3]) + del reshape_45 + + # pd_op.reshape: (1x21x12x64xf32) <- (1x21x768xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(add_103, full_int_array_1) + del add_103, full_int_array_1 + + # pd_op.transpose: (1x12x21x64xf32) <- (1x21x12x64xf32) + transpose_46 = paddle._C_ops.transpose(reshape_46, [0, 2, 1, 3]) + del reshape_46 + + # pd_op.scale: (1x12x21x64xf32) <- (1x12x21x64xf32, 1xf32) + scale_12 = paddle._C_ops.scale(transpose_44, full_5, float("0"), True) + del full_5, transpose_44 + + # pd_op.matmul: (1x12x21x21xf32) <- (1x12x21x64xf32, 1x12x21x64xf32) + matmul_91 = paddle._C_ops.matmul(scale_12, transpose_45, False, True) + del scale_12, transpose_45 + + # pd_op.add: (1x12x21x21xf32) <- (1x12x21x21xf32, 1x1x1x21xf32) + add_104 = paddle._C_ops.add(matmul_91, unsqueeze_0) + del matmul_91, unsqueeze_0 + + # pd_op.softmax: (1x12x21x21xf32) <- (1x12x21x21xf32) + softmax_11 = paddle._C_ops.softmax(add_104, -1) + del add_104 + + # pd_op.dropout: (1x12x21x21xf32, 1x12x21x21xui8) <- (1x12x21x21xf32, None, 1xf32) + dropout_68, dropout_69 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # pd_op.matmul: (1x12x21x64xf32) <- (1x12x21x21xf32, 1x12x21x64xf32) + matmul_92 = paddle._C_ops.matmul(dropout_68, transpose_46, False, False) + del dropout_68, transpose_46 + + # pd_op.transpose: (1x21x12x64xf32) <- (1x12x21x64xf32) + transpose_47 = paddle._C_ops.transpose(matmul_92, [0, 2, 1, 3]) + del matmul_92 + + # pd_op.reshape: (1x21x768xf32) <- (1x21x12x64xf32, 3xi64) + reshape_47 = paddle._C_ops.reshape(transpose_47, full_int_array_2) + del full_int_array_2, transpose_47 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x768xf32, 768x768xf32) + matmul_93 = paddle._C_ops.matmul(reshape_47, parameter_9, False, False) + del parameter_9, reshape_47 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_105 = paddle._C_ops.add(matmul_93, parameter_8) + del matmul_93, parameter_8 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_70, dropout_71 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_105, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_105 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_106 = paddle._C_ops.add(layer_norm_67, dropout_70) + del dropout_70, layer_norm_67 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_70, layer_norm_71, layer_norm_72 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_106, parameter_3, parameter_2, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_106, parameter_2, parameter_3 + + # pd_op.matmul: (1x21x3072xf32) <- (1x21x768xf32, 768x3072xf32) + matmul_94 = paddle._C_ops.matmul(layer_norm_70, parameter_7, False, False) + del parameter_7 + + # pd_op.add: (1x21x3072xf32) <- (1x21x3072xf32, 3072xf32) + add_107 = paddle._C_ops.add(matmul_94, parameter_6) + del matmul_94, parameter_6 + + # pd_op.gelu: (1x21x3072xf32) <- (1x21x3072xf32) + gelu_11 = paddle._C_ops.gelu(add_107, False) + del add_107 + + # pd_op.matmul: (1x21x768xf32) <- (1x21x3072xf32, 3072x768xf32) + matmul_95 = paddle._C_ops.matmul(gelu_11, parameter_5, False, False) + del gelu_11, parameter_5 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 768xf32) + add_108 = paddle._C_ops.add(matmul_95, parameter_4) + del matmul_95, parameter_4 + + # pd_op.dropout: (1x21x768xf32, 1x21x768xui8) <- (1x21x768xf32, None, 1xf32) + dropout_72, dropout_73 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_108, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_108, full_4 + + # pd_op.add: (1x21x768xf32) <- (1x21x768xf32, 1x21x768xf32) + add_109 = paddle._C_ops.add(layer_norm_70, dropout_72) + del dropout_72, layer_norm_70 + + # pd_op.layer_norm: (1x21x768xf32, 1x21xf32, 1x21xf32) <- (1x21x768xf32, 768xf32, 768xf32) + layer_norm_0, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_109, parameter_1, parameter_0, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_109, parameter_0, parameter_1 + + return layer_norm_0 diff --git a/paddle_samples/PaddleNLP/electra-base/weight_meta.py b/paddle_samples/PaddleNLP/electra-base/weight_meta.py new file mode 100644 index 000000000..311a30b05 --- /dev/null +++ b/paddle_samples/PaddleNLP/electra-base/weight_meta.py @@ -0,0 +1,1752 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.112911") + max_val = float("0.105344") + mean = float("-1.78666e-05") + std = float("0.0199831") + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0970534") + max_val = float("0.0983472") + mean = float("-7.57025e-06") + std = float("0.019999") + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0885477") + max_val = float("0.0996747") + mean = float("4.10738e-06") + std = float("0.019993") + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.104738") + max_val = float("0.100103") + mean = float("-1.57427e-06") + std = float("0.019985") + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0976071") + max_val = float("0.0899185") + mean = float("-2.57316e-05") + std = float("0.0199829") + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.102359") + max_val = float("0.0988091") + mean = float("-3.37607e-05") + std = float("0.0200038") + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.104361") + max_val = float("0.0975729") + mean = float("2.27353e-05") + std = float("0.0200044") + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.105976") + max_val = float("0.0963787") + mean = float("4.49493e-06") + std = float("0.0200072") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0970664") + max_val = float("0.0867254") + mean = float("3.01599e-05") + std = float("0.0200109") + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0919803") + max_val = float("0.0979644") + mean = float("1.19891e-05") + std = float("0.0200087") + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0974545") + max_val = float("0.0973422") + mean = float("-3.80493e-05") + std = float("0.0200223") + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0924923") + max_val = float("0.0896068") + mean = float("-5.75358e-06") + std = float("0.0199917") + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.106303") + max_val = float("0.0961744") + mean = float("7.69563e-06") + std = float("0.0200148") + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0993679") + max_val = float("0.105999") + mean = float("2.82338e-06") + std = float("0.0200116") + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0915466") + max_val = float("0.0905387") + mean = float("-3.00381e-05") + std = float("0.0199906") + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0947608") + max_val = float("0.0908757") + mean = float("2.66403e-05") + std = float("0.019982") + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0908663") + max_val = float("0.092095") + mean = float("9.12968e-06") + std = float("0.0199945") + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.09452") + max_val = float("0.0954796") + mean = float("7.25202e-05") + std = float("0.0199683") + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.104917") + max_val = float("0.098149") + mean = float("1.17544e-05") + std = float("0.0200076") + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.099535") + max_val = float("0.0968779") + mean = float("8.39104e-06") + std = float("0.0200083") + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0890943") + max_val = float("0.0882421") + mean = float("-1.84185e-05") + std = float("0.0200301") + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0909657") + max_val = float("0.101797") + mean = float("3.30663e-05") + std = float("0.0199599") + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0922083") + max_val = float("0.0910458") + mean = float("-3.93247e-05") + std = float("0.0200152") + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0955951") + max_val = float("0.094516") + mean = float("4.12674e-05") + std = float("0.0199938") + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0970147") + max_val = float("0.0997038") + mean = float("-2.73814e-06") + std = float("0.0200045") + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0965627") + max_val = float("0.102391") + mean = float("7.99892e-06") + std = float("0.0199973") + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0945233") + max_val = float("0.0916558") + mean = float("2.57153e-05") + std = float("0.0199956") + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0916911") + max_val = float("0.103966") + mean = float("-7.42874e-06") + std = float("0.0199991") + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0950194") + max_val = float("0.0903942") + mean = float("1.73772e-06") + std = float("0.0200138") + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.098085") + max_val = float("0.0965276") + mean = float("4.1221e-06") + std = float("0.0199732") + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0997857") + max_val = float("0.0960941") + mean = float("-1.9982e-05") + std = float("0.0200077") + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0957993") + max_val = float("0.103336") + mean = float("-3.83955e-06") + std = float("0.0200146") + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0937591") + max_val = float("0.0962128") + mean = float("3.13594e-05") + std = float("0.0200031") + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0902273") + max_val = float("0.102267") + mean = float("-2.61945e-06") + std = float("0.020003") + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.100566") + max_val = float("0.0946033") + mean = float("1.66822e-05") + std = float("0.0199829") + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0944841") + max_val = float("0.0922926") + mean = float("1.49627e-05") + std = float("0.0200268") + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.100352") + max_val = float("0.104806") + mean = float("-1.48755e-05") + std = float("0.0199962") + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.104199") + max_val = float("0.0993377") + mean = float("1.65112e-05") + std = float("0.0199893") + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.100868") + max_val = float("0.093312") + mean = float("-3.52612e-05") + std = float("0.0199884") + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0910631") + max_val = float("0.0962955") + mean = float("3.69561e-05") + std = float("0.0199836") + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0923313") + max_val = float("0.0983016") + mean = float("1.82966e-05") + std = float("0.0200224") + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0977213") + max_val = float("0.0963792") + mean = float("-2.34436e-05") + std = float("0.0199944") + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.104299") + max_val = float("0.100014") + mean = float("9.10385e-06") + std = float("0.0200074") + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0986788") + max_val = float("0.0999534") + mean = float("-2.08982e-05") + std = float("0.0200126") + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0993112") + max_val = float("0.0928147") + mean = float("9.87719e-06") + std = float("0.0200109") + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0914301") + max_val = float("0.100819") + mean = float("2.56854e-05") + std = float("0.020019") + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.100195") + max_val = float("0.096326") + mean = float("-2.15789e-05") + std = float("0.0200166") + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0900616") + max_val = float("0.0903554") + mean = float("-2.23346e-05") + std = float("0.020015") + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0972663") + max_val = float("0.100066") + mean = float("-1.70247e-05") + std = float("0.0200106") + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0946462") + max_val = float("0.0997865") + mean = float("1.24361e-05") + std = float("0.0200027") + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0946197") + max_val = float("0.09637") + mean = float("3.62792e-05") + std = float("0.0199739") + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0946805") + max_val = float("0.0935694") + mean = float("-1.39272e-05") + std = float("0.0200221") + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.100536") + max_val = float("0.0976385") + mean = float("5.22242e-05") + std = float("0.0200336") + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.094087") + max_val = float("0.0933464") + mean = float("-1.50155e-05") + std = float("0.0199803") + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0937669") + max_val = float("0.10367") + mean = float("1.59461e-05") + std = float("0.020015") + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0972653") + max_val = float("0.104285") + mean = float("-4.33062e-06") + std = float("0.0199925") + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0892165") + max_val = float("0.101168") + mean = float("-1.94917e-05") + std = float("0.0200067") + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0940633") + max_val = float("0.099139") + mean = float("3.13527e-05") + std = float("0.0200274") + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0858162") + max_val = float("0.101152") + mean = float("-2.68832e-05") + std = float("0.0200188") + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0994817") + max_val = float("0.0933422") + mean = float("-4.01129e-05") + std = float("0.0199821") + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.102356") + max_val = float("0.0996039") + mean = float("1.52325e-05") + std = float("0.0199989") + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0979594") + max_val = float("0.107895") + mean = float("1.81089e-05") + std = float("0.0200017") + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0925432") + max_val = float("0.0922786") + mean = float("9.74066e-07") + std = float("0.0200203") + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0942276") + max_val = float("0.101956") + mean = float("3.90008e-05") + std = float("0.0200106") + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0977912") + max_val = float("0.0912753") + mean = float("-3.01177e-06") + std = float("0.0200107") + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0921556") + max_val = float("0.0963372") + mean = float("4.3242e-05") + std = float("0.0199868") + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.100796") + max_val = float("0.0964686") + mean = float("9.70896e-06") + std = float("0.0200039") + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.10284") + max_val = float("0.0961186") + mean = float("-1.93596e-05") + std = float("0.0200024") + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0879186") + max_val = float("0.0967267") + mean = float("9.92058e-06") + std = float("0.0200037") + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0918201") + max_val = float("0.0888838") + mean = float("2.55881e-05") + std = float("0.0200143") + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.102558") + max_val = float("0.102179") + mean = float("1.0835e-05") + std = float("0.0200018") + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0961731") + max_val = float("0.0941608") + mean = float("1.02162e-05") + std = float("0.02002") + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [2, 768] + dtype = "float32" + min_val = float("-0.0703824") + max_val = float("0.0797147") + mean = float("0.000712158") + std = float("0.0198968") + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [512, 768] + dtype = "float32" + min_val = float("-0.105861") + max_val = float("0.0923115") + mean = float("-1.11405e-05") + std = float("0.0199904") + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [30522, 768] + dtype = "float32" + min_val = float("-0.11153") + max_val = float("0.122706") + mean = float("-3.2749e-06") + std = float("0.02") + data = None diff --git a/paddle_samples/PaddleNLP/electra-large/graph_hash.txt b/paddle_samples/PaddleNLP/electra-large/graph_hash.txt new file mode 100644 index 000000000..4e0a92616 --- /dev/null +++ b/paddle_samples/PaddleNLP/electra-large/graph_hash.txt @@ -0,0 +1 @@ +75a03ee235704dba470323aa7c62449e838a94e6c753916b564b3b176035b31f \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/electra-large/graph_net.json b/paddle_samples/PaddleNLP/electra-large/graph_net.json new file mode 100644 index 000000000..f809746db --- /dev/null +++ b/paddle_samples/PaddleNLP/electra-large/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "electra-large", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/electra-large/input_meta.py b/paddle_samples/PaddleNLP/electra-large/input_meta.py new file mode 100644 index 000000000..fd1f7db6f --- /dev/null +++ b/paddle_samples/PaddleNLP/electra-large/input_meta.py @@ -0,0 +1,34 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 21] + dtype = "int64" + data = [ + 101, + 7592, + 1010, + 2026, + 2171, + 2003, + 3960, + 1012, + 1045, + 2572, + 4083, + 2055, + 2312, + 2653, + 4275, + 1998, + 2037, + 4294, + 2015, + 1012, + 102, + ] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 21] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/electra-large/model.py b/paddle_samples/PaddleNLP/electra-large/model.py new file mode 100644 index 000000000..db9e8a58a --- /dev/null +++ b/paddle_samples/PaddleNLP/electra-large/model.py @@ -0,0 +1,4277 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + parameter_205, + parameter_206, + parameter_207, + parameter_208, + parameter_209, + parameter_210, + parameter_211, + parameter_212, + parameter_213, + parameter_214, + parameter_215, + parameter_216, + parameter_217, + parameter_218, + parameter_219, + parameter_220, + parameter_221, + parameter_222, + parameter_223, + parameter_224, + parameter_225, + parameter_226, + parameter_227, + parameter_228, + parameter_229, + parameter_230, + parameter_231, + parameter_232, + parameter_233, + parameter_234, + parameter_235, + parameter_236, + parameter_237, + parameter_238, + parameter_239, + parameter_240, + parameter_241, + parameter_242, + parameter_243, + parameter_244, + parameter_245, + parameter_246, + parameter_247, + parameter_248, + parameter_249, + parameter_250, + parameter_251, + parameter_252, + parameter_253, + parameter_254, + parameter_255, + parameter_256, + parameter_257, + parameter_258, + parameter_259, + parameter_260, + parameter_261, + parameter_262, + parameter_263, + parameter_264, + parameter_265, + parameter_266, + parameter_267, + parameter_268, + parameter_269, + parameter_270, + parameter_271, + parameter_272, + parameter_273, + parameter_274, + parameter_275, + parameter_276, + parameter_277, + parameter_278, + parameter_279, + parameter_280, + parameter_281, + parameter_282, + parameter_283, + parameter_284, + parameter_285, + parameter_286, + parameter_287, + parameter_288, + parameter_289, + parameter_290, + parameter_291, + parameter_292, + parameter_293, + parameter_294, + parameter_295, + parameter_296, + parameter_297, + parameter_298, + parameter_299, + parameter_300, + parameter_301, + parameter_302, + parameter_303, + parameter_304, + parameter_305, + parameter_306, + parameter_307, + parameter_308, + parameter_309, + parameter_310, + parameter_311, + parameter_312, + parameter_313, + parameter_314, + parameter_315, + parameter_316, + parameter_317, + parameter_318, + parameter_319, + parameter_320, + parameter_321, + parameter_322, + parameter_323, + parameter_324, + parameter_325, + parameter_326, + parameter_327, + parameter_328, + parameter_329, + parameter_330, + parameter_331, + parameter_332, + parameter_333, + parameter_334, + parameter_335, + parameter_336, + parameter_337, + parameter_338, + parameter_339, + parameter_340, + parameter_341, + parameter_342, + parameter_343, + parameter_344, + parameter_345, + parameter_346, + parameter_347, + parameter_348, + parameter_349, + parameter_350, + parameter_351, + parameter_352, + parameter_353, + parameter_354, + parameter_355, + parameter_356, + parameter_357, + parameter_358, + parameter_359, + parameter_360, + parameter_361, + parameter_362, + parameter_363, + parameter_364, + parameter_365, + parameter_366, + parameter_367, + parameter_368, + parameter_369, + parameter_370, + parameter_371, + parameter_372, + parameter_373, + parameter_374, + parameter_375, + parameter_376, + parameter_377, + parameter_378, + parameter_379, + parameter_380, + parameter_381, + parameter_382, + parameter_383, + parameter_384, + parameter_385, + parameter_386, + parameter_387, + parameter_388, + data_0, + data_1, + ): + # pd_op.full: (xi64) <- () + full_0 = paddle._C_ops.full( + [], float("0"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.equal: (1x21xb) <- (1x21xi64, xi64) + equal_0 = paddle._C_ops.equal(data_0, full_0) + del full_0 + + # pd_op.cast: (1x21xf32) <- (1x21xb) + cast_0 = paddle._C_ops.cast(equal_0, paddle.float32) + del equal_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x21xf32) <- (1x21xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_1, float("0"), True) + del cast_0, full_1 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_0 = [1, 2] + + # pd_op.unsqueeze: (1x1x1x21xf32) <- (1x21xf32, 2xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(scale_0, full_int_array_0) + del full_int_array_0, scale_0 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full_like: (1x21xi64) <- (1x21xi64, 1xf32) + full_like_0 = paddle._C_ops.full_like( + data_0, full_2, paddle.int64, paddle.framework._current_expected_place() + ) + del full_2 + + # pd_op.full: (1xi32) <- () + full_3 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.cumsum: (1x21xi64) <- (1x21xi64, 1xi32) + cumsum_0 = paddle._C_ops.cumsum(full_like_0, full_3, False, False, False) + del full_3 + + # pd_op.subtract: (1x21xi64) <- (1x21xi64, 1x21xi64) + subtract_0 = paddle._C_ops.subtract(cumsum_0, full_like_0) + del cumsum_0, full_like_0 + + # pd_op.embedding: (1x21x1024xf32) <- (1x21xi64, 30522x1024xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_388, -1, False) + del data_0, parameter_388 + + # pd_op.embedding: (1x21x1024xf32) <- (1x21xi64, 512x1024xf32) + embedding_1 = paddle._C_ops.embedding(subtract_0, parameter_387, -1, False) + del parameter_387, subtract_0 + + # pd_op.embedding: (1x21x1024xf32) <- (1x21xi64, 2x1024xf32) + embedding_2 = paddle._C_ops.embedding(data_1, parameter_386, -1, False) + del data_1, parameter_386 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_1 = paddle._C_ops.add(add_0, embedding_2) + del add_0, embedding_2 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_1, layer_norm_2, layer_norm_3 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_1, parameter_385, parameter_384, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_1, parameter_384, parameter_385 + + # pd_op.full: (1xf32) <- () + full_4 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + layer_norm_1, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del layer_norm_1 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_0 = paddle._C_ops.matmul(dropout_0, parameter_383, False, False) + del parameter_383 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_382) + del matmul_0, parameter_382 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_1 = [0, 0, 16, 64] + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(add_2, full_int_array_1) + del add_2 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_1 = paddle._C_ops.matmul(dropout_0, parameter_381, False, False) + del parameter_381 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_3 = paddle._C_ops.add(matmul_1, parameter_380) + del matmul_1, parameter_380 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_2 = paddle._C_ops.matmul(dropout_0, parameter_379, False, False) + del parameter_379 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_4 = paddle._C_ops.add(matmul_2, parameter_378) + del matmul_2, parameter_378 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(add_3, full_int_array_1) + del add_3 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(add_4, full_int_array_1) + del add_4 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.full: (1xf32) <- () + full_5 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_1 = paddle._C_ops.scale(transpose_0, full_5, float("0"), True) + del transpose_0 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_3 = paddle._C_ops.matmul(scale_1, transpose_1, False, True) + del scale_1, transpose_1 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_5 = paddle._C_ops.add(matmul_3, unsqueeze_0) + del matmul_3 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_0 = paddle._C_ops.softmax(add_5, -1) + del add_5 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_4 = paddle._C_ops.matmul(dropout_2, transpose_2, False, False) + del dropout_2, transpose_2 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_3 = paddle._C_ops.transpose(matmul_4, [0, 2, 1, 3]) + del matmul_4 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_2 = [0, 0, 1024] + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_2) + del transpose_3 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_5 = paddle._C_ops.matmul(reshape_3, parameter_377, False, False) + del parameter_377, reshape_3 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_6 = paddle._C_ops.add(matmul_5, parameter_376) + del matmul_5, parameter_376 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_6, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_6 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_7 = paddle._C_ops.add(dropout_0, dropout_4) + del dropout_0, dropout_4 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_4, layer_norm_5, layer_norm_6 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_7, parameter_371, parameter_370, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_7, parameter_370, parameter_371 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_6 = paddle._C_ops.matmul(layer_norm_4, parameter_375, False, False) + del parameter_375 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_8 = paddle._C_ops.add(matmul_6, parameter_374) + del matmul_6, parameter_374 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_0 = paddle._C_ops.gelu(add_8, False) + del add_8 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_7 = paddle._C_ops.matmul(gelu_0, parameter_373, False, False) + del gelu_0, parameter_373 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_9 = paddle._C_ops.add(matmul_7, parameter_372) + del matmul_7, parameter_372 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_9, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_9 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_10 = paddle._C_ops.add(layer_norm_4, dropout_6) + del dropout_6, layer_norm_4 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_7, layer_norm_8, layer_norm_9 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_10, parameter_369, parameter_368, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_10, parameter_368, parameter_369 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_7, parameter_367, False, False) + del parameter_367 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_11 = paddle._C_ops.add(matmul_8, parameter_366) + del matmul_8, parameter_366 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(add_11, full_int_array_1) + del add_11 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_9 = paddle._C_ops.matmul(layer_norm_7, parameter_365, False, False) + del parameter_365 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_364) + del matmul_9, parameter_364 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_7, parameter_363, False, False) + del parameter_363 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_13 = paddle._C_ops.add(matmul_10, parameter_362) + del matmul_10, parameter_362 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(add_12, full_int_array_1) + del add_12 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_13, full_int_array_1) + del add_13 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_2 = paddle._C_ops.scale(transpose_4, full_5, float("0"), True) + del transpose_4 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_11 = paddle._C_ops.matmul(scale_2, transpose_5, False, True) + del scale_2, transpose_5 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_14 = paddle._C_ops.add(matmul_11, unsqueeze_0) + del matmul_11 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_1 = paddle._C_ops.softmax(add_14, -1) + del add_14 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_12 = paddle._C_ops.matmul(dropout_8, transpose_6, False, False) + del dropout_8, transpose_6 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_7 = paddle._C_ops.transpose(matmul_12, [0, 2, 1, 3]) + del matmul_12 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_2) + del transpose_7 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_13 = paddle._C_ops.matmul(reshape_7, parameter_361, False, False) + del parameter_361, reshape_7 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_15 = paddle._C_ops.add(matmul_13, parameter_360) + del matmul_13, parameter_360 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_15, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_15 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_16 = paddle._C_ops.add(layer_norm_7, dropout_10) + del dropout_10, layer_norm_7 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_10, layer_norm_11, layer_norm_12 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_16, parameter_355, parameter_354, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_16, parameter_354, parameter_355 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_10, parameter_359, False, False) + del parameter_359 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_17 = paddle._C_ops.add(matmul_14, parameter_358) + del matmul_14, parameter_358 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_1 = paddle._C_ops.gelu(add_17, False) + del add_17 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_15 = paddle._C_ops.matmul(gelu_1, parameter_357, False, False) + del gelu_1, parameter_357 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_18 = paddle._C_ops.add(matmul_15, parameter_356) + del matmul_15, parameter_356 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_18, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_18 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_19 = paddle._C_ops.add(layer_norm_10, dropout_12) + del dropout_12, layer_norm_10 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_13, layer_norm_14, layer_norm_15 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_19, parameter_353, parameter_352, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_19, parameter_352, parameter_353 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_16 = paddle._C_ops.matmul(layer_norm_13, parameter_351, False, False) + del parameter_351 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_20 = paddle._C_ops.add(matmul_16, parameter_350) + del matmul_16, parameter_350 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(add_20, full_int_array_1) + del add_20 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_13, parameter_349, False, False) + del parameter_349 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_348) + del matmul_17, parameter_348 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_13, parameter_347, False, False) + del parameter_347 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_346) + del matmul_18, parameter_346 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_21, full_int_array_1) + del add_21 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(add_22, full_int_array_1) + del add_22 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_3 = paddle._C_ops.scale(transpose_8, full_5, float("0"), True) + del transpose_8 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_19 = paddle._C_ops.matmul(scale_3, transpose_9, False, True) + del scale_3, transpose_9 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_23 = paddle._C_ops.add(matmul_19, unsqueeze_0) + del matmul_19 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_2 = paddle._C_ops.softmax(add_23, -1) + del add_23 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_20 = paddle._C_ops.matmul(dropout_14, transpose_10, False, False) + del dropout_14, transpose_10 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_11 = paddle._C_ops.transpose(matmul_20, [0, 2, 1, 3]) + del matmul_20 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_2) + del transpose_11 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_21 = paddle._C_ops.matmul(reshape_11, parameter_345, False, False) + del parameter_345, reshape_11 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_24 = paddle._C_ops.add(matmul_21, parameter_344) + del matmul_21, parameter_344 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_24, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_24 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_25 = paddle._C_ops.add(layer_norm_13, dropout_16) + del dropout_16, layer_norm_13 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_16, layer_norm_17, layer_norm_18 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_25, parameter_339, parameter_338, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_25, parameter_338, parameter_339 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_22 = paddle._C_ops.matmul(layer_norm_16, parameter_343, False, False) + del parameter_343 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_26 = paddle._C_ops.add(matmul_22, parameter_342) + del matmul_22, parameter_342 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_2 = paddle._C_ops.gelu(add_26, False) + del add_26 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_23 = paddle._C_ops.matmul(gelu_2, parameter_341, False, False) + del gelu_2, parameter_341 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_27 = paddle._C_ops.add(matmul_23, parameter_340) + del matmul_23, parameter_340 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_27, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_27 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_28 = paddle._C_ops.add(layer_norm_16, dropout_18) + del dropout_18, layer_norm_16 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_19, layer_norm_20, layer_norm_21 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_28, parameter_337, parameter_336, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_28, parameter_336, parameter_337 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_24 = paddle._C_ops.matmul(layer_norm_19, parameter_335, False, False) + del parameter_335 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_29 = paddle._C_ops.add(matmul_24, parameter_334) + del matmul_24, parameter_334 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(add_29, full_int_array_1) + del add_29 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_19, parameter_333, False, False) + del parameter_333 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_30 = paddle._C_ops.add(matmul_25, parameter_332) + del matmul_25, parameter_332 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_19, parameter_331, False, False) + del parameter_331 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_31 = paddle._C_ops.add(matmul_26, parameter_330) + del matmul_26, parameter_330 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(add_30, full_int_array_1) + del add_30 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(add_31, full_int_array_1) + del add_31 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_4 = paddle._C_ops.scale(transpose_12, full_5, float("0"), True) + del transpose_12 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_27 = paddle._C_ops.matmul(scale_4, transpose_13, False, True) + del scale_4, transpose_13 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_32 = paddle._C_ops.add(matmul_27, unsqueeze_0) + del matmul_27 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_3 = paddle._C_ops.softmax(add_32, -1) + del add_32 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_28 = paddle._C_ops.matmul(dropout_20, transpose_14, False, False) + del dropout_20, transpose_14 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_15 = paddle._C_ops.transpose(matmul_28, [0, 2, 1, 3]) + del matmul_28 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_2) + del transpose_15 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_29 = paddle._C_ops.matmul(reshape_15, parameter_329, False, False) + del parameter_329, reshape_15 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_33 = paddle._C_ops.add(matmul_29, parameter_328) + del matmul_29, parameter_328 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_33, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_33 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_34 = paddle._C_ops.add(layer_norm_19, dropout_22) + del dropout_22, layer_norm_19 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_22, layer_norm_23, layer_norm_24 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_34, parameter_323, parameter_322, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_34, parameter_322, parameter_323 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_22, parameter_327, False, False) + del parameter_327 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_35 = paddle._C_ops.add(matmul_30, parameter_326) + del matmul_30, parameter_326 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_3 = paddle._C_ops.gelu(add_35, False) + del add_35 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_31 = paddle._C_ops.matmul(gelu_3, parameter_325, False, False) + del gelu_3, parameter_325 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_36 = paddle._C_ops.add(matmul_31, parameter_324) + del matmul_31, parameter_324 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_36, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_36 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_37 = paddle._C_ops.add(layer_norm_22, dropout_24) + del dropout_24, layer_norm_22 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_25, layer_norm_26, layer_norm_27 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_37, parameter_321, parameter_320, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_37, parameter_320, parameter_321 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_32 = paddle._C_ops.matmul(layer_norm_25, parameter_319, False, False) + del parameter_319 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_38 = paddle._C_ops.add(matmul_32, parameter_318) + del matmul_32, parameter_318 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(add_38, full_int_array_1) + del add_38 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_16 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_33 = paddle._C_ops.matmul(layer_norm_25, parameter_317, False, False) + del parameter_317 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_39 = paddle._C_ops.add(matmul_33, parameter_316) + del matmul_33, parameter_316 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_25, parameter_315, False, False) + del parameter_315 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_40 = paddle._C_ops.add(matmul_34, parameter_314) + del matmul_34, parameter_314 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(add_39, full_int_array_1) + del add_39 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_40, full_int_array_1) + del add_40 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_5 = paddle._C_ops.scale(transpose_16, full_5, float("0"), True) + del transpose_16 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_35 = paddle._C_ops.matmul(scale_5, transpose_17, False, True) + del scale_5, transpose_17 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_41 = paddle._C_ops.add(matmul_35, unsqueeze_0) + del matmul_35 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_4 = paddle._C_ops.softmax(add_41, -1) + del add_41 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_36 = paddle._C_ops.matmul(dropout_26, transpose_18, False, False) + del dropout_26, transpose_18 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_19 = paddle._C_ops.transpose(matmul_36, [0, 2, 1, 3]) + del matmul_36 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_19, full_int_array_2) + del transpose_19 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_37 = paddle._C_ops.matmul(reshape_19, parameter_313, False, False) + del parameter_313, reshape_19 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_42 = paddle._C_ops.add(matmul_37, parameter_312) + del matmul_37, parameter_312 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_42, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_42 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_43 = paddle._C_ops.add(layer_norm_25, dropout_28) + del dropout_28, layer_norm_25 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_28, layer_norm_29, layer_norm_30 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_43, parameter_307, parameter_306, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_43, parameter_306, parameter_307 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_38 = paddle._C_ops.matmul(layer_norm_28, parameter_311, False, False) + del parameter_311 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_44 = paddle._C_ops.add(matmul_38, parameter_310) + del matmul_38, parameter_310 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_4 = paddle._C_ops.gelu(add_44, False) + del add_44 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_39 = paddle._C_ops.matmul(gelu_4, parameter_309, False, False) + del gelu_4, parameter_309 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_45 = paddle._C_ops.add(matmul_39, parameter_308) + del matmul_39, parameter_308 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_45, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_45 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_46 = paddle._C_ops.add(layer_norm_28, dropout_30) + del dropout_30, layer_norm_28 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_31, layer_norm_32, layer_norm_33 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_46, parameter_305, parameter_304, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_46, parameter_304, parameter_305 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_31, parameter_303, False, False) + del parameter_303 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_47 = paddle._C_ops.add(matmul_40, parameter_302) + del matmul_40, parameter_302 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(add_47, full_int_array_1) + del add_47 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_20 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_41 = paddle._C_ops.matmul(layer_norm_31, parameter_301, False, False) + del parameter_301 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_48 = paddle._C_ops.add(matmul_41, parameter_300) + del matmul_41, parameter_300 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_31, parameter_299, False, False) + del parameter_299 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_49 = paddle._C_ops.add(matmul_42, parameter_298) + del matmul_42, parameter_298 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(add_48, full_int_array_1) + del add_48 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(add_49, full_int_array_1) + del add_49 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_6 = paddle._C_ops.scale(transpose_20, full_5, float("0"), True) + del transpose_20 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_43 = paddle._C_ops.matmul(scale_6, transpose_21, False, True) + del scale_6, transpose_21 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_50 = paddle._C_ops.add(matmul_43, unsqueeze_0) + del matmul_43 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_5 = paddle._C_ops.softmax(add_50, -1) + del add_50 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_44 = paddle._C_ops.matmul(dropout_32, transpose_22, False, False) + del dropout_32, transpose_22 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_23 = paddle._C_ops.transpose(matmul_44, [0, 2, 1, 3]) + del matmul_44 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_23, full_int_array_2) + del transpose_23 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_45 = paddle._C_ops.matmul(reshape_23, parameter_297, False, False) + del parameter_297, reshape_23 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_51 = paddle._C_ops.add(matmul_45, parameter_296) + del matmul_45, parameter_296 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_51, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_51 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_52 = paddle._C_ops.add(layer_norm_31, dropout_34) + del dropout_34, layer_norm_31 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_34, layer_norm_35, layer_norm_36 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_52, parameter_291, parameter_290, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_52, parameter_290, parameter_291 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_46 = paddle._C_ops.matmul(layer_norm_34, parameter_295, False, False) + del parameter_295 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_53 = paddle._C_ops.add(matmul_46, parameter_294) + del matmul_46, parameter_294 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_5 = paddle._C_ops.gelu(add_53, False) + del add_53 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_47 = paddle._C_ops.matmul(gelu_5, parameter_293, False, False) + del gelu_5, parameter_293 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_54 = paddle._C_ops.add(matmul_47, parameter_292) + del matmul_47, parameter_292 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_54, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_54 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_55 = paddle._C_ops.add(layer_norm_34, dropout_36) + del dropout_36, layer_norm_34 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_37, layer_norm_38, layer_norm_39 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_55, parameter_289, parameter_288, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_55, parameter_288, parameter_289 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_48 = paddle._C_ops.matmul(layer_norm_37, parameter_287, False, False) + del parameter_287 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_56 = paddle._C_ops.add(matmul_48, parameter_286) + del matmul_48, parameter_286 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_56, full_int_array_1) + del add_56 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_24 = paddle._C_ops.transpose(reshape_24, [0, 2, 1, 3]) + del reshape_24 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_49 = paddle._C_ops.matmul(layer_norm_37, parameter_285, False, False) + del parameter_285 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_57 = paddle._C_ops.add(matmul_49, parameter_284) + del matmul_49, parameter_284 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_37, parameter_283, False, False) + del parameter_283 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_58 = paddle._C_ops.add(matmul_50, parameter_282) + del matmul_50, parameter_282 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_57, full_int_array_1) + del add_57 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_25 = paddle._C_ops.transpose(reshape_25, [0, 2, 1, 3]) + del reshape_25 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_58, full_int_array_1) + del add_58 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_26 = paddle._C_ops.transpose(reshape_26, [0, 2, 1, 3]) + del reshape_26 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_7 = paddle._C_ops.scale(transpose_24, full_5, float("0"), True) + del transpose_24 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_51 = paddle._C_ops.matmul(scale_7, transpose_25, False, True) + del scale_7, transpose_25 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_59 = paddle._C_ops.add(matmul_51, unsqueeze_0) + del matmul_51 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_6 = paddle._C_ops.softmax(add_59, -1) + del add_59 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_52 = paddle._C_ops.matmul(dropout_38, transpose_26, False, False) + del dropout_38, transpose_26 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_27 = paddle._C_ops.transpose(matmul_52, [0, 2, 1, 3]) + del matmul_52 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(transpose_27, full_int_array_2) + del transpose_27 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_53 = paddle._C_ops.matmul(reshape_27, parameter_281, False, False) + del parameter_281, reshape_27 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_60 = paddle._C_ops.add(matmul_53, parameter_280) + del matmul_53, parameter_280 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_60, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_60 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_61 = paddle._C_ops.add(layer_norm_37, dropout_40) + del dropout_40, layer_norm_37 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_40, layer_norm_41, layer_norm_42 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_61, parameter_275, parameter_274, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_61, parameter_274, parameter_275 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_54 = paddle._C_ops.matmul(layer_norm_40, parameter_279, False, False) + del parameter_279 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_62 = paddle._C_ops.add(matmul_54, parameter_278) + del matmul_54, parameter_278 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_6 = paddle._C_ops.gelu(add_62, False) + del add_62 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_55 = paddle._C_ops.matmul(gelu_6, parameter_277, False, False) + del gelu_6, parameter_277 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_63 = paddle._C_ops.add(matmul_55, parameter_276) + del matmul_55, parameter_276 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_63, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_63 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_64 = paddle._C_ops.add(layer_norm_40, dropout_42) + del dropout_42, layer_norm_40 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_43, layer_norm_44, layer_norm_45 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_64, parameter_273, parameter_272, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_64, parameter_272, parameter_273 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_56 = paddle._C_ops.matmul(layer_norm_43, parameter_271, False, False) + del parameter_271 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_65 = paddle._C_ops.add(matmul_56, parameter_270) + del matmul_56, parameter_270 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(add_65, full_int_array_1) + del add_65 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_28 = paddle._C_ops.transpose(reshape_28, [0, 2, 1, 3]) + del reshape_28 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_57 = paddle._C_ops.matmul(layer_norm_43, parameter_269, False, False) + del parameter_269 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_66 = paddle._C_ops.add(matmul_57, parameter_268) + del matmul_57, parameter_268 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_58 = paddle._C_ops.matmul(layer_norm_43, parameter_267, False, False) + del parameter_267 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_67 = paddle._C_ops.add(matmul_58, parameter_266) + del matmul_58, parameter_266 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(add_66, full_int_array_1) + del add_66 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_29 = paddle._C_ops.transpose(reshape_29, [0, 2, 1, 3]) + del reshape_29 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_67, full_int_array_1) + del add_67 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_30 = paddle._C_ops.transpose(reshape_30, [0, 2, 1, 3]) + del reshape_30 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_8 = paddle._C_ops.scale(transpose_28, full_5, float("0"), True) + del transpose_28 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_59 = paddle._C_ops.matmul(scale_8, transpose_29, False, True) + del scale_8, transpose_29 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_68 = paddle._C_ops.add(matmul_59, unsqueeze_0) + del matmul_59 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_7 = paddle._C_ops.softmax(add_68, -1) + del add_68 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_60 = paddle._C_ops.matmul(dropout_44, transpose_30, False, False) + del dropout_44, transpose_30 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_31 = paddle._C_ops.transpose(matmul_60, [0, 2, 1, 3]) + del matmul_60 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_31 = paddle._C_ops.reshape(transpose_31, full_int_array_2) + del transpose_31 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_61 = paddle._C_ops.matmul(reshape_31, parameter_265, False, False) + del parameter_265, reshape_31 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_69 = paddle._C_ops.add(matmul_61, parameter_264) + del matmul_61, parameter_264 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_69, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_69 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_70 = paddle._C_ops.add(layer_norm_43, dropout_46) + del dropout_46, layer_norm_43 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_46, layer_norm_47, layer_norm_48 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_70, parameter_259, parameter_258, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_70, parameter_258, parameter_259 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_62 = paddle._C_ops.matmul(layer_norm_46, parameter_263, False, False) + del parameter_263 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_71 = paddle._C_ops.add(matmul_62, parameter_262) + del matmul_62, parameter_262 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_7 = paddle._C_ops.gelu(add_71, False) + del add_71 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_63 = paddle._C_ops.matmul(gelu_7, parameter_261, False, False) + del gelu_7, parameter_261 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_72 = paddle._C_ops.add(matmul_63, parameter_260) + del matmul_63, parameter_260 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_72, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_72 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_73 = paddle._C_ops.add(layer_norm_46, dropout_48) + del dropout_48, layer_norm_46 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_49, layer_norm_50, layer_norm_51 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_257, parameter_256, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_73, parameter_256, parameter_257 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_64 = paddle._C_ops.matmul(layer_norm_49, parameter_255, False, False) + del parameter_255 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_74 = paddle._C_ops.add(matmul_64, parameter_254) + del matmul_64, parameter_254 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(add_74, full_int_array_1) + del add_74 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_32 = paddle._C_ops.transpose(reshape_32, [0, 2, 1, 3]) + del reshape_32 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_49, parameter_253, False, False) + del parameter_253 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_75 = paddle._C_ops.add(matmul_65, parameter_252) + del matmul_65, parameter_252 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_49, parameter_251, False, False) + del parameter_251 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_76 = paddle._C_ops.add(matmul_66, parameter_250) + del matmul_66, parameter_250 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(add_75, full_int_array_1) + del add_75 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_33 = paddle._C_ops.transpose(reshape_33, [0, 2, 1, 3]) + del reshape_33 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_76, full_int_array_1) + del add_76 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_34 = paddle._C_ops.transpose(reshape_34, [0, 2, 1, 3]) + del reshape_34 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_9 = paddle._C_ops.scale(transpose_32, full_5, float("0"), True) + del transpose_32 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_67 = paddle._C_ops.matmul(scale_9, transpose_33, False, True) + del scale_9, transpose_33 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_77 = paddle._C_ops.add(matmul_67, unsqueeze_0) + del matmul_67 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_8 = paddle._C_ops.softmax(add_77, -1) + del add_77 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_50, dropout_51 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_68 = paddle._C_ops.matmul(dropout_50, transpose_34, False, False) + del dropout_50, transpose_34 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_35 = paddle._C_ops.transpose(matmul_68, [0, 2, 1, 3]) + del matmul_68 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_35 = paddle._C_ops.reshape(transpose_35, full_int_array_2) + del transpose_35 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_69 = paddle._C_ops.matmul(reshape_35, parameter_249, False, False) + del parameter_249, reshape_35 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_78 = paddle._C_ops.add(matmul_69, parameter_248) + del matmul_69, parameter_248 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_52, dropout_53 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_78, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_78 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_79 = paddle._C_ops.add(layer_norm_49, dropout_52) + del dropout_52, layer_norm_49 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_52, layer_norm_53, layer_norm_54 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_79, parameter_243, parameter_242, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_79, parameter_242, parameter_243 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_52, parameter_247, False, False) + del parameter_247 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_80 = paddle._C_ops.add(matmul_70, parameter_246) + del matmul_70, parameter_246 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_8 = paddle._C_ops.gelu(add_80, False) + del add_80 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_71 = paddle._C_ops.matmul(gelu_8, parameter_245, False, False) + del gelu_8, parameter_245 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_81 = paddle._C_ops.add(matmul_71, parameter_244) + del matmul_71, parameter_244 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_54, dropout_55 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_81, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_81 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_82 = paddle._C_ops.add(layer_norm_52, dropout_54) + del dropout_54, layer_norm_52 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_55, layer_norm_56, layer_norm_57 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_82, parameter_241, parameter_240, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_82, parameter_240, parameter_241 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_72 = paddle._C_ops.matmul(layer_norm_55, parameter_239, False, False) + del parameter_239 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_83 = paddle._C_ops.add(matmul_72, parameter_238) + del matmul_72, parameter_238 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(add_83, full_int_array_1) + del add_83 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_36 = paddle._C_ops.transpose(reshape_36, [0, 2, 1, 3]) + del reshape_36 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_73 = paddle._C_ops.matmul(layer_norm_55, parameter_237, False, False) + del parameter_237 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_84 = paddle._C_ops.add(matmul_73, parameter_236) + del matmul_73, parameter_236 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_74 = paddle._C_ops.matmul(layer_norm_55, parameter_235, False, False) + del parameter_235 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_85 = paddle._C_ops.add(matmul_74, parameter_234) + del matmul_74, parameter_234 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_84, full_int_array_1) + del add_84 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_37 = paddle._C_ops.transpose(reshape_37, [0, 2, 1, 3]) + del reshape_37 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(add_85, full_int_array_1) + del add_85 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_38 = paddle._C_ops.transpose(reshape_38, [0, 2, 1, 3]) + del reshape_38 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_10 = paddle._C_ops.scale(transpose_36, full_5, float("0"), True) + del transpose_36 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_75 = paddle._C_ops.matmul(scale_10, transpose_37, False, True) + del scale_10, transpose_37 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_86 = paddle._C_ops.add(matmul_75, unsqueeze_0) + del matmul_75 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_9 = paddle._C_ops.softmax(add_86, -1) + del add_86 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_56, dropout_57 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_76 = paddle._C_ops.matmul(dropout_56, transpose_38, False, False) + del dropout_56, transpose_38 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_39 = paddle._C_ops.transpose(matmul_76, [0, 2, 1, 3]) + del matmul_76 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(transpose_39, full_int_array_2) + del transpose_39 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_77 = paddle._C_ops.matmul(reshape_39, parameter_233, False, False) + del parameter_233, reshape_39 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_87 = paddle._C_ops.add(matmul_77, parameter_232) + del matmul_77, parameter_232 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_58, dropout_59 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_87, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_87 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_88 = paddle._C_ops.add(layer_norm_55, dropout_58) + del dropout_58, layer_norm_55 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_58, layer_norm_59, layer_norm_60 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_88, parameter_227, parameter_226, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_88, parameter_226, parameter_227 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_78 = paddle._C_ops.matmul(layer_norm_58, parameter_231, False, False) + del parameter_231 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_89 = paddle._C_ops.add(matmul_78, parameter_230) + del matmul_78, parameter_230 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_9 = paddle._C_ops.gelu(add_89, False) + del add_89 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_79 = paddle._C_ops.matmul(gelu_9, parameter_229, False, False) + del gelu_9, parameter_229 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_90 = paddle._C_ops.add(matmul_79, parameter_228) + del matmul_79, parameter_228 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_60, dropout_61 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_90, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_90 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_91 = paddle._C_ops.add(layer_norm_58, dropout_60) + del dropout_60, layer_norm_58 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_61, layer_norm_62, layer_norm_63 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_91, parameter_225, parameter_224, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_91, parameter_224, parameter_225 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_80 = paddle._C_ops.matmul(layer_norm_61, parameter_223, False, False) + del parameter_223 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_92 = paddle._C_ops.add(matmul_80, parameter_222) + del matmul_80, parameter_222 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(add_92, full_int_array_1) + del add_92 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_40 = paddle._C_ops.transpose(reshape_40, [0, 2, 1, 3]) + del reshape_40 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_81 = paddle._C_ops.matmul(layer_norm_61, parameter_221, False, False) + del parameter_221 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_93 = paddle._C_ops.add(matmul_81, parameter_220) + del matmul_81, parameter_220 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_82 = paddle._C_ops.matmul(layer_norm_61, parameter_219, False, False) + del parameter_219 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_94 = paddle._C_ops.add(matmul_82, parameter_218) + del matmul_82, parameter_218 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_93, full_int_array_1) + del add_93 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_41 = paddle._C_ops.transpose(reshape_41, [0, 2, 1, 3]) + del reshape_41 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(add_94, full_int_array_1) + del add_94 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_42 = paddle._C_ops.transpose(reshape_42, [0, 2, 1, 3]) + del reshape_42 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_11 = paddle._C_ops.scale(transpose_40, full_5, float("0"), True) + del transpose_40 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_83 = paddle._C_ops.matmul(scale_11, transpose_41, False, True) + del scale_11, transpose_41 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_95 = paddle._C_ops.add(matmul_83, unsqueeze_0) + del matmul_83 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_10 = paddle._C_ops.softmax(add_95, -1) + del add_95 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_62, dropout_63 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_84 = paddle._C_ops.matmul(dropout_62, transpose_42, False, False) + del dropout_62, transpose_42 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_43 = paddle._C_ops.transpose(matmul_84, [0, 2, 1, 3]) + del matmul_84 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_43, full_int_array_2) + del transpose_43 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_85 = paddle._C_ops.matmul(reshape_43, parameter_217, False, False) + del parameter_217, reshape_43 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_96 = paddle._C_ops.add(matmul_85, parameter_216) + del matmul_85, parameter_216 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_64, dropout_65 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_96, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_96 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_97 = paddle._C_ops.add(layer_norm_61, dropout_64) + del dropout_64, layer_norm_61 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_64, layer_norm_65, layer_norm_66 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_97, parameter_211, parameter_210, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_97, parameter_210, parameter_211 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_86 = paddle._C_ops.matmul(layer_norm_64, parameter_215, False, False) + del parameter_215 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_98 = paddle._C_ops.add(matmul_86, parameter_214) + del matmul_86, parameter_214 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_10 = paddle._C_ops.gelu(add_98, False) + del add_98 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_87 = paddle._C_ops.matmul(gelu_10, parameter_213, False, False) + del gelu_10, parameter_213 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_99 = paddle._C_ops.add(matmul_87, parameter_212) + del matmul_87, parameter_212 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_66, dropout_67 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_99, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_99 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_100 = paddle._C_ops.add(layer_norm_64, dropout_66) + del dropout_66, layer_norm_64 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_67, layer_norm_68, layer_norm_69 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_100, parameter_209, parameter_208, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_100, parameter_208, parameter_209 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_88 = paddle._C_ops.matmul(layer_norm_67, parameter_207, False, False) + del parameter_207 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_101 = paddle._C_ops.add(matmul_88, parameter_206) + del matmul_88, parameter_206 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_101, full_int_array_1) + del add_101 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_44 = paddle._C_ops.transpose(reshape_44, [0, 2, 1, 3]) + del reshape_44 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_89 = paddle._C_ops.matmul(layer_norm_67, parameter_205, False, False) + del parameter_205 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_102 = paddle._C_ops.add(matmul_89, parameter_204) + del matmul_89, parameter_204 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_67, parameter_203, False, False) + del parameter_203 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_103 = paddle._C_ops.add(matmul_90, parameter_202) + del matmul_90, parameter_202 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(add_102, full_int_array_1) + del add_102 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_45 = paddle._C_ops.transpose(reshape_45, [0, 2, 1, 3]) + del reshape_45 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(add_103, full_int_array_1) + del add_103 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_46 = paddle._C_ops.transpose(reshape_46, [0, 2, 1, 3]) + del reshape_46 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_12 = paddle._C_ops.scale(transpose_44, full_5, float("0"), True) + del transpose_44 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_91 = paddle._C_ops.matmul(scale_12, transpose_45, False, True) + del scale_12, transpose_45 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_104 = paddle._C_ops.add(matmul_91, unsqueeze_0) + del matmul_91 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_11 = paddle._C_ops.softmax(add_104, -1) + del add_104 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_68, dropout_69 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_92 = paddle._C_ops.matmul(dropout_68, transpose_46, False, False) + del dropout_68, transpose_46 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_47 = paddle._C_ops.transpose(matmul_92, [0, 2, 1, 3]) + del matmul_92 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_47 = paddle._C_ops.reshape(transpose_47, full_int_array_2) + del transpose_47 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_93 = paddle._C_ops.matmul(reshape_47, parameter_201, False, False) + del parameter_201, reshape_47 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_105 = paddle._C_ops.add(matmul_93, parameter_200) + del matmul_93, parameter_200 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_70, dropout_71 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_105, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_105 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_106 = paddle._C_ops.add(layer_norm_67, dropout_70) + del dropout_70, layer_norm_67 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_70, layer_norm_71, layer_norm_72 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_106, parameter_195, parameter_194, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_106, parameter_194, parameter_195 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_94 = paddle._C_ops.matmul(layer_norm_70, parameter_199, False, False) + del parameter_199 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_107 = paddle._C_ops.add(matmul_94, parameter_198) + del matmul_94, parameter_198 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_11 = paddle._C_ops.gelu(add_107, False) + del add_107 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_95 = paddle._C_ops.matmul(gelu_11, parameter_197, False, False) + del gelu_11, parameter_197 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_108 = paddle._C_ops.add(matmul_95, parameter_196) + del matmul_95, parameter_196 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_72, dropout_73 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_108, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_108 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_109 = paddle._C_ops.add(layer_norm_70, dropout_72) + del dropout_72, layer_norm_70 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_73, layer_norm_74, layer_norm_75 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_109, parameter_193, parameter_192, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_109, parameter_192, parameter_193 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_96 = paddle._C_ops.matmul(layer_norm_73, parameter_191, False, False) + del parameter_191 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_110 = paddle._C_ops.add(matmul_96, parameter_190) + del matmul_96, parameter_190 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_48 = paddle._C_ops.reshape(add_110, full_int_array_1) + del add_110 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_48 = paddle._C_ops.transpose(reshape_48, [0, 2, 1, 3]) + del reshape_48 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_97 = paddle._C_ops.matmul(layer_norm_73, parameter_189, False, False) + del parameter_189 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_111 = paddle._C_ops.add(matmul_97, parameter_188) + del matmul_97, parameter_188 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_98 = paddle._C_ops.matmul(layer_norm_73, parameter_187, False, False) + del parameter_187 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_112 = paddle._C_ops.add(matmul_98, parameter_186) + del matmul_98, parameter_186 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_49 = paddle._C_ops.reshape(add_111, full_int_array_1) + del add_111 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_49 = paddle._C_ops.transpose(reshape_49, [0, 2, 1, 3]) + del reshape_49 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_50 = paddle._C_ops.reshape(add_112, full_int_array_1) + del add_112 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_50 = paddle._C_ops.transpose(reshape_50, [0, 2, 1, 3]) + del reshape_50 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_13 = paddle._C_ops.scale(transpose_48, full_5, float("0"), True) + del transpose_48 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_99 = paddle._C_ops.matmul(scale_13, transpose_49, False, True) + del scale_13, transpose_49 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_113 = paddle._C_ops.add(matmul_99, unsqueeze_0) + del matmul_99 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_12 = paddle._C_ops.softmax(add_113, -1) + del add_113 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_74, dropout_75 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_12, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_12 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_100 = paddle._C_ops.matmul(dropout_74, transpose_50, False, False) + del dropout_74, transpose_50 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_51 = paddle._C_ops.transpose(matmul_100, [0, 2, 1, 3]) + del matmul_100 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_51 = paddle._C_ops.reshape(transpose_51, full_int_array_2) + del transpose_51 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_101 = paddle._C_ops.matmul(reshape_51, parameter_185, False, False) + del parameter_185, reshape_51 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_114 = paddle._C_ops.add(matmul_101, parameter_184) + del matmul_101, parameter_184 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_76, dropout_77 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_114, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_114 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_115 = paddle._C_ops.add(layer_norm_73, dropout_76) + del dropout_76, layer_norm_73 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_76, layer_norm_77, layer_norm_78 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_115, parameter_179, parameter_178, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_115, parameter_178, parameter_179 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_102 = paddle._C_ops.matmul(layer_norm_76, parameter_183, False, False) + del parameter_183 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_116 = paddle._C_ops.add(matmul_102, parameter_182) + del matmul_102, parameter_182 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_12 = paddle._C_ops.gelu(add_116, False) + del add_116 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_103 = paddle._C_ops.matmul(gelu_12, parameter_181, False, False) + del gelu_12, parameter_181 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_117 = paddle._C_ops.add(matmul_103, parameter_180) + del matmul_103, parameter_180 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_78, dropout_79 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_117, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_117 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_118 = paddle._C_ops.add(layer_norm_76, dropout_78) + del dropout_78, layer_norm_76 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_79, layer_norm_80, layer_norm_81 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_118, parameter_177, parameter_176, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_118, parameter_176, parameter_177 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_104 = paddle._C_ops.matmul(layer_norm_79, parameter_175, False, False) + del parameter_175 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_119 = paddle._C_ops.add(matmul_104, parameter_174) + del matmul_104, parameter_174 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_52 = paddle._C_ops.reshape(add_119, full_int_array_1) + del add_119 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_52 = paddle._C_ops.transpose(reshape_52, [0, 2, 1, 3]) + del reshape_52 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_105 = paddle._C_ops.matmul(layer_norm_79, parameter_173, False, False) + del parameter_173 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_120 = paddle._C_ops.add(matmul_105, parameter_172) + del matmul_105, parameter_172 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_106 = paddle._C_ops.matmul(layer_norm_79, parameter_171, False, False) + del parameter_171 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_121 = paddle._C_ops.add(matmul_106, parameter_170) + del matmul_106, parameter_170 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(add_120, full_int_array_1) + del add_120 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_53 = paddle._C_ops.transpose(reshape_53, [0, 2, 1, 3]) + del reshape_53 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(add_121, full_int_array_1) + del add_121 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_54 = paddle._C_ops.transpose(reshape_54, [0, 2, 1, 3]) + del reshape_54 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_14 = paddle._C_ops.scale(transpose_52, full_5, float("0"), True) + del transpose_52 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_107 = paddle._C_ops.matmul(scale_14, transpose_53, False, True) + del scale_14, transpose_53 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_122 = paddle._C_ops.add(matmul_107, unsqueeze_0) + del matmul_107 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_13 = paddle._C_ops.softmax(add_122, -1) + del add_122 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_80, dropout_81 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_13, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_13 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_108 = paddle._C_ops.matmul(dropout_80, transpose_54, False, False) + del dropout_80, transpose_54 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_55 = paddle._C_ops.transpose(matmul_108, [0, 2, 1, 3]) + del matmul_108 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(transpose_55, full_int_array_2) + del transpose_55 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_109 = paddle._C_ops.matmul(reshape_55, parameter_169, False, False) + del parameter_169, reshape_55 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_123 = paddle._C_ops.add(matmul_109, parameter_168) + del matmul_109, parameter_168 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_82, dropout_83 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_123, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_123 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_124 = paddle._C_ops.add(layer_norm_79, dropout_82) + del dropout_82, layer_norm_79 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_82, layer_norm_83, layer_norm_84 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_124, parameter_163, parameter_162, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_124, parameter_162, parameter_163 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_110 = paddle._C_ops.matmul(layer_norm_82, parameter_167, False, False) + del parameter_167 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_125 = paddle._C_ops.add(matmul_110, parameter_166) + del matmul_110, parameter_166 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_13 = paddle._C_ops.gelu(add_125, False) + del add_125 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_111 = paddle._C_ops.matmul(gelu_13, parameter_165, False, False) + del gelu_13, parameter_165 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_126 = paddle._C_ops.add(matmul_111, parameter_164) + del matmul_111, parameter_164 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_84, dropout_85 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_126, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_126 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_127 = paddle._C_ops.add(layer_norm_82, dropout_84) + del dropout_84, layer_norm_82 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_85, layer_norm_86, layer_norm_87 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_127, parameter_161, parameter_160, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_127, parameter_160, parameter_161 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_112 = paddle._C_ops.matmul(layer_norm_85, parameter_159, False, False) + del parameter_159 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_128 = paddle._C_ops.add(matmul_112, parameter_158) + del matmul_112, parameter_158 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(add_128, full_int_array_1) + del add_128 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_56 = paddle._C_ops.transpose(reshape_56, [0, 2, 1, 3]) + del reshape_56 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_113 = paddle._C_ops.matmul(layer_norm_85, parameter_157, False, False) + del parameter_157 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_129 = paddle._C_ops.add(matmul_113, parameter_156) + del matmul_113, parameter_156 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_114 = paddle._C_ops.matmul(layer_norm_85, parameter_155, False, False) + del parameter_155 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_130 = paddle._C_ops.add(matmul_114, parameter_154) + del matmul_114, parameter_154 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(add_129, full_int_array_1) + del add_129 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_57 = paddle._C_ops.transpose(reshape_57, [0, 2, 1, 3]) + del reshape_57 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_58 = paddle._C_ops.reshape(add_130, full_int_array_1) + del add_130 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_58 = paddle._C_ops.transpose(reshape_58, [0, 2, 1, 3]) + del reshape_58 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_15 = paddle._C_ops.scale(transpose_56, full_5, float("0"), True) + del transpose_56 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_115 = paddle._C_ops.matmul(scale_15, transpose_57, False, True) + del scale_15, transpose_57 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_131 = paddle._C_ops.add(matmul_115, unsqueeze_0) + del matmul_115 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_14 = paddle._C_ops.softmax(add_131, -1) + del add_131 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_86, dropout_87 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_14, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_14 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_116 = paddle._C_ops.matmul(dropout_86, transpose_58, False, False) + del dropout_86, transpose_58 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_59 = paddle._C_ops.transpose(matmul_116, [0, 2, 1, 3]) + del matmul_116 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_59 = paddle._C_ops.reshape(transpose_59, full_int_array_2) + del transpose_59 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_117 = paddle._C_ops.matmul(reshape_59, parameter_153, False, False) + del parameter_153, reshape_59 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_132 = paddle._C_ops.add(matmul_117, parameter_152) + del matmul_117, parameter_152 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_88, dropout_89 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_132, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_132 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_133 = paddle._C_ops.add(layer_norm_85, dropout_88) + del dropout_88, layer_norm_85 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_88, layer_norm_89, layer_norm_90 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_133, parameter_147, parameter_146, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_133, parameter_146, parameter_147 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_118 = paddle._C_ops.matmul(layer_norm_88, parameter_151, False, False) + del parameter_151 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_134 = paddle._C_ops.add(matmul_118, parameter_150) + del matmul_118, parameter_150 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_14 = paddle._C_ops.gelu(add_134, False) + del add_134 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_119 = paddle._C_ops.matmul(gelu_14, parameter_149, False, False) + del gelu_14, parameter_149 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_135 = paddle._C_ops.add(matmul_119, parameter_148) + del matmul_119, parameter_148 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_90, dropout_91 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_135, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_135 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_136 = paddle._C_ops.add(layer_norm_88, dropout_90) + del dropout_90, layer_norm_88 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_91, layer_norm_92, layer_norm_93 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_136, parameter_145, parameter_144, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_136, parameter_144, parameter_145 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_120 = paddle._C_ops.matmul(layer_norm_91, parameter_143, False, False) + del parameter_143 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_137 = paddle._C_ops.add(matmul_120, parameter_142) + del matmul_120, parameter_142 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_60 = paddle._C_ops.reshape(add_137, full_int_array_1) + del add_137 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_60 = paddle._C_ops.transpose(reshape_60, [0, 2, 1, 3]) + del reshape_60 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_121 = paddle._C_ops.matmul(layer_norm_91, parameter_141, False, False) + del parameter_141 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_138 = paddle._C_ops.add(matmul_121, parameter_140) + del matmul_121, parameter_140 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_122 = paddle._C_ops.matmul(layer_norm_91, parameter_139, False, False) + del parameter_139 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_139 = paddle._C_ops.add(matmul_122, parameter_138) + del matmul_122, parameter_138 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(add_138, full_int_array_1) + del add_138 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_61 = paddle._C_ops.transpose(reshape_61, [0, 2, 1, 3]) + del reshape_61 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(add_139, full_int_array_1) + del add_139 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_62 = paddle._C_ops.transpose(reshape_62, [0, 2, 1, 3]) + del reshape_62 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_16 = paddle._C_ops.scale(transpose_60, full_5, float("0"), True) + del transpose_60 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_123 = paddle._C_ops.matmul(scale_16, transpose_61, False, True) + del scale_16, transpose_61 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_140 = paddle._C_ops.add(matmul_123, unsqueeze_0) + del matmul_123 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_15 = paddle._C_ops.softmax(add_140, -1) + del add_140 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_92, dropout_93 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_15, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_15 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_124 = paddle._C_ops.matmul(dropout_92, transpose_62, False, False) + del dropout_92, transpose_62 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_63 = paddle._C_ops.transpose(matmul_124, [0, 2, 1, 3]) + del matmul_124 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_63 = paddle._C_ops.reshape(transpose_63, full_int_array_2) + del transpose_63 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_125 = paddle._C_ops.matmul(reshape_63, parameter_137, False, False) + del parameter_137, reshape_63 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_141 = paddle._C_ops.add(matmul_125, parameter_136) + del matmul_125, parameter_136 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_94, dropout_95 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_141, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_141 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_142 = paddle._C_ops.add(layer_norm_91, dropout_94) + del dropout_94, layer_norm_91 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_94, layer_norm_95, layer_norm_96 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_142, parameter_131, parameter_130, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_142, parameter_130, parameter_131 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_126 = paddle._C_ops.matmul(layer_norm_94, parameter_135, False, False) + del parameter_135 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_143 = paddle._C_ops.add(matmul_126, parameter_134) + del matmul_126, parameter_134 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_15 = paddle._C_ops.gelu(add_143, False) + del add_143 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_127 = paddle._C_ops.matmul(gelu_15, parameter_133, False, False) + del gelu_15, parameter_133 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_144 = paddle._C_ops.add(matmul_127, parameter_132) + del matmul_127, parameter_132 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_96, dropout_97 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_144, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_144 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_145 = paddle._C_ops.add(layer_norm_94, dropout_96) + del dropout_96, layer_norm_94 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_97, layer_norm_98, layer_norm_99 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_145, parameter_129, parameter_128, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_145, parameter_128, parameter_129 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_128 = paddle._C_ops.matmul(layer_norm_97, parameter_127, False, False) + del parameter_127 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_146 = paddle._C_ops.add(matmul_128, parameter_126) + del matmul_128, parameter_126 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(add_146, full_int_array_1) + del add_146 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_64 = paddle._C_ops.transpose(reshape_64, [0, 2, 1, 3]) + del reshape_64 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_129 = paddle._C_ops.matmul(layer_norm_97, parameter_125, False, False) + del parameter_125 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_147 = paddle._C_ops.add(matmul_129, parameter_124) + del matmul_129, parameter_124 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_130 = paddle._C_ops.matmul(layer_norm_97, parameter_123, False, False) + del parameter_123 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_148 = paddle._C_ops.add(matmul_130, parameter_122) + del matmul_130, parameter_122 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(add_147, full_int_array_1) + del add_147 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_65 = paddle._C_ops.transpose(reshape_65, [0, 2, 1, 3]) + del reshape_65 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_66 = paddle._C_ops.reshape(add_148, full_int_array_1) + del add_148 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_66 = paddle._C_ops.transpose(reshape_66, [0, 2, 1, 3]) + del reshape_66 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_17 = paddle._C_ops.scale(transpose_64, full_5, float("0"), True) + del transpose_64 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_131 = paddle._C_ops.matmul(scale_17, transpose_65, False, True) + del scale_17, transpose_65 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_149 = paddle._C_ops.add(matmul_131, unsqueeze_0) + del matmul_131 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_16 = paddle._C_ops.softmax(add_149, -1) + del add_149 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_98, dropout_99 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_16, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_16 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_132 = paddle._C_ops.matmul(dropout_98, transpose_66, False, False) + del dropout_98, transpose_66 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_67 = paddle._C_ops.transpose(matmul_132, [0, 2, 1, 3]) + del matmul_132 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_67 = paddle._C_ops.reshape(transpose_67, full_int_array_2) + del transpose_67 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_133 = paddle._C_ops.matmul(reshape_67, parameter_121, False, False) + del parameter_121, reshape_67 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_150 = paddle._C_ops.add(matmul_133, parameter_120) + del matmul_133, parameter_120 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_100, dropout_101 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_150, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_150 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_151 = paddle._C_ops.add(layer_norm_97, dropout_100) + del dropout_100, layer_norm_97 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_100, layer_norm_101, layer_norm_102 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_151, parameter_115, parameter_114, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_151, parameter_114, parameter_115 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_134 = paddle._C_ops.matmul(layer_norm_100, parameter_119, False, False) + del parameter_119 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_152 = paddle._C_ops.add(matmul_134, parameter_118) + del matmul_134, parameter_118 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_16 = paddle._C_ops.gelu(add_152, False) + del add_152 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_135 = paddle._C_ops.matmul(gelu_16, parameter_117, False, False) + del gelu_16, parameter_117 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_153 = paddle._C_ops.add(matmul_135, parameter_116) + del matmul_135, parameter_116 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_102, dropout_103 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_153, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_153 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_154 = paddle._C_ops.add(layer_norm_100, dropout_102) + del dropout_102, layer_norm_100 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_103, layer_norm_104, layer_norm_105 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_154, parameter_113, parameter_112, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_154, parameter_112, parameter_113 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_136 = paddle._C_ops.matmul(layer_norm_103, parameter_111, False, False) + del parameter_111 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_155 = paddle._C_ops.add(matmul_136, parameter_110) + del matmul_136, parameter_110 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_68 = paddle._C_ops.reshape(add_155, full_int_array_1) + del add_155 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_68 = paddle._C_ops.transpose(reshape_68, [0, 2, 1, 3]) + del reshape_68 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_137 = paddle._C_ops.matmul(layer_norm_103, parameter_109, False, False) + del parameter_109 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_156 = paddle._C_ops.add(matmul_137, parameter_108) + del matmul_137, parameter_108 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_138 = paddle._C_ops.matmul(layer_norm_103, parameter_107, False, False) + del parameter_107 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_157 = paddle._C_ops.add(matmul_138, parameter_106) + del matmul_138, parameter_106 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_69 = paddle._C_ops.reshape(add_156, full_int_array_1) + del add_156 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_69 = paddle._C_ops.transpose(reshape_69, [0, 2, 1, 3]) + del reshape_69 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(add_157, full_int_array_1) + del add_157 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_70 = paddle._C_ops.transpose(reshape_70, [0, 2, 1, 3]) + del reshape_70 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_18 = paddle._C_ops.scale(transpose_68, full_5, float("0"), True) + del transpose_68 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_139 = paddle._C_ops.matmul(scale_18, transpose_69, False, True) + del scale_18, transpose_69 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_158 = paddle._C_ops.add(matmul_139, unsqueeze_0) + del matmul_139 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_17 = paddle._C_ops.softmax(add_158, -1) + del add_158 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_104, dropout_105 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_17, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_17 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_140 = paddle._C_ops.matmul(dropout_104, transpose_70, False, False) + del dropout_104, transpose_70 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_71 = paddle._C_ops.transpose(matmul_140, [0, 2, 1, 3]) + del matmul_140 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_71 = paddle._C_ops.reshape(transpose_71, full_int_array_2) + del transpose_71 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_141 = paddle._C_ops.matmul(reshape_71, parameter_105, False, False) + del parameter_105, reshape_71 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_159 = paddle._C_ops.add(matmul_141, parameter_104) + del matmul_141, parameter_104 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_106, dropout_107 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_159, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_159 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_160 = paddle._C_ops.add(layer_norm_103, dropout_106) + del dropout_106, layer_norm_103 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_106, layer_norm_107, layer_norm_108 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_160, parameter_99, parameter_98, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_160, parameter_98, parameter_99 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_142 = paddle._C_ops.matmul(layer_norm_106, parameter_103, False, False) + del parameter_103 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_161 = paddle._C_ops.add(matmul_142, parameter_102) + del matmul_142, parameter_102 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_17 = paddle._C_ops.gelu(add_161, False) + del add_161 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_143 = paddle._C_ops.matmul(gelu_17, parameter_101, False, False) + del gelu_17, parameter_101 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_162 = paddle._C_ops.add(matmul_143, parameter_100) + del matmul_143, parameter_100 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_108, dropout_109 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_162, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_162 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_163 = paddle._C_ops.add(layer_norm_106, dropout_108) + del dropout_108, layer_norm_106 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_109, layer_norm_110, layer_norm_111 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_163, parameter_97, parameter_96, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_163, parameter_96, parameter_97 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_144 = paddle._C_ops.matmul(layer_norm_109, parameter_95, False, False) + del parameter_95 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_164 = paddle._C_ops.add(matmul_144, parameter_94) + del matmul_144, parameter_94 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_72 = paddle._C_ops.reshape(add_164, full_int_array_1) + del add_164 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_72 = paddle._C_ops.transpose(reshape_72, [0, 2, 1, 3]) + del reshape_72 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_145 = paddle._C_ops.matmul(layer_norm_109, parameter_93, False, False) + del parameter_93 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_165 = paddle._C_ops.add(matmul_145, parameter_92) + del matmul_145, parameter_92 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_146 = paddle._C_ops.matmul(layer_norm_109, parameter_91, False, False) + del parameter_91 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_166 = paddle._C_ops.add(matmul_146, parameter_90) + del matmul_146, parameter_90 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(add_165, full_int_array_1) + del add_165 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_73 = paddle._C_ops.transpose(reshape_73, [0, 2, 1, 3]) + del reshape_73 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_74 = paddle._C_ops.reshape(add_166, full_int_array_1) + del add_166 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_74 = paddle._C_ops.transpose(reshape_74, [0, 2, 1, 3]) + del reshape_74 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_19 = paddle._C_ops.scale(transpose_72, full_5, float("0"), True) + del transpose_72 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_147 = paddle._C_ops.matmul(scale_19, transpose_73, False, True) + del scale_19, transpose_73 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_167 = paddle._C_ops.add(matmul_147, unsqueeze_0) + del matmul_147 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_18 = paddle._C_ops.softmax(add_167, -1) + del add_167 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_110, dropout_111 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_18, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_18 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_148 = paddle._C_ops.matmul(dropout_110, transpose_74, False, False) + del dropout_110, transpose_74 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_75 = paddle._C_ops.transpose(matmul_148, [0, 2, 1, 3]) + del matmul_148 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_75 = paddle._C_ops.reshape(transpose_75, full_int_array_2) + del transpose_75 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_149 = paddle._C_ops.matmul(reshape_75, parameter_89, False, False) + del parameter_89, reshape_75 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_168 = paddle._C_ops.add(matmul_149, parameter_88) + del matmul_149, parameter_88 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_112, dropout_113 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_168, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_168 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_169 = paddle._C_ops.add(layer_norm_109, dropout_112) + del dropout_112, layer_norm_109 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_112, layer_norm_113, layer_norm_114 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_169, parameter_83, parameter_82, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_169, parameter_82, parameter_83 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_150 = paddle._C_ops.matmul(layer_norm_112, parameter_87, False, False) + del parameter_87 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_170 = paddle._C_ops.add(matmul_150, parameter_86) + del matmul_150, parameter_86 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_18 = paddle._C_ops.gelu(add_170, False) + del add_170 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_151 = paddle._C_ops.matmul(gelu_18, parameter_85, False, False) + del gelu_18, parameter_85 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_171 = paddle._C_ops.add(matmul_151, parameter_84) + del matmul_151, parameter_84 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_114, dropout_115 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_171, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_171 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_172 = paddle._C_ops.add(layer_norm_112, dropout_114) + del dropout_114, layer_norm_112 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_115, layer_norm_116, layer_norm_117 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_172, parameter_81, parameter_80, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_172, parameter_80, parameter_81 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_152 = paddle._C_ops.matmul(layer_norm_115, parameter_79, False, False) + del parameter_79 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_173 = paddle._C_ops.add(matmul_152, parameter_78) + del matmul_152, parameter_78 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_76 = paddle._C_ops.reshape(add_173, full_int_array_1) + del add_173 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_76 = paddle._C_ops.transpose(reshape_76, [0, 2, 1, 3]) + del reshape_76 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_153 = paddle._C_ops.matmul(layer_norm_115, parameter_77, False, False) + del parameter_77 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_174 = paddle._C_ops.add(matmul_153, parameter_76) + del matmul_153, parameter_76 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_154 = paddle._C_ops.matmul(layer_norm_115, parameter_75, False, False) + del parameter_75 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_175 = paddle._C_ops.add(matmul_154, parameter_74) + del matmul_154, parameter_74 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_77 = paddle._C_ops.reshape(add_174, full_int_array_1) + del add_174 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_77 = paddle._C_ops.transpose(reshape_77, [0, 2, 1, 3]) + del reshape_77 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_78 = paddle._C_ops.reshape(add_175, full_int_array_1) + del add_175 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_78 = paddle._C_ops.transpose(reshape_78, [0, 2, 1, 3]) + del reshape_78 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_20 = paddle._C_ops.scale(transpose_76, full_5, float("0"), True) + del transpose_76 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_155 = paddle._C_ops.matmul(scale_20, transpose_77, False, True) + del scale_20, transpose_77 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_176 = paddle._C_ops.add(matmul_155, unsqueeze_0) + del matmul_155 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_19 = paddle._C_ops.softmax(add_176, -1) + del add_176 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_116, dropout_117 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_19, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_19 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_156 = paddle._C_ops.matmul(dropout_116, transpose_78, False, False) + del dropout_116, transpose_78 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_79 = paddle._C_ops.transpose(matmul_156, [0, 2, 1, 3]) + del matmul_156 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_79 = paddle._C_ops.reshape(transpose_79, full_int_array_2) + del transpose_79 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_157 = paddle._C_ops.matmul(reshape_79, parameter_73, False, False) + del parameter_73, reshape_79 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_177 = paddle._C_ops.add(matmul_157, parameter_72) + del matmul_157, parameter_72 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_118, dropout_119 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_177, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_177 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_178 = paddle._C_ops.add(layer_norm_115, dropout_118) + del dropout_118, layer_norm_115 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_118, layer_norm_119, layer_norm_120 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_178, parameter_67, parameter_66, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_178, parameter_66, parameter_67 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_158 = paddle._C_ops.matmul(layer_norm_118, parameter_71, False, False) + del parameter_71 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_179 = paddle._C_ops.add(matmul_158, parameter_70) + del matmul_158, parameter_70 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_19 = paddle._C_ops.gelu(add_179, False) + del add_179 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_159 = paddle._C_ops.matmul(gelu_19, parameter_69, False, False) + del gelu_19, parameter_69 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_180 = paddle._C_ops.add(matmul_159, parameter_68) + del matmul_159, parameter_68 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_120, dropout_121 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_180, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_180 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_181 = paddle._C_ops.add(layer_norm_118, dropout_120) + del dropout_120, layer_norm_118 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_121, layer_norm_122, layer_norm_123 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_181, parameter_65, parameter_64, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_181, parameter_64, parameter_65 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_160 = paddle._C_ops.matmul(layer_norm_121, parameter_63, False, False) + del parameter_63 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_182 = paddle._C_ops.add(matmul_160, parameter_62) + del matmul_160, parameter_62 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_80 = paddle._C_ops.reshape(add_182, full_int_array_1) + del add_182 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_80 = paddle._C_ops.transpose(reshape_80, [0, 2, 1, 3]) + del reshape_80 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_161 = paddle._C_ops.matmul(layer_norm_121, parameter_61, False, False) + del parameter_61 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_183 = paddle._C_ops.add(matmul_161, parameter_60) + del matmul_161, parameter_60 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_162 = paddle._C_ops.matmul(layer_norm_121, parameter_59, False, False) + del parameter_59 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_184 = paddle._C_ops.add(matmul_162, parameter_58) + del matmul_162, parameter_58 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_81 = paddle._C_ops.reshape(add_183, full_int_array_1) + del add_183 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_81 = paddle._C_ops.transpose(reshape_81, [0, 2, 1, 3]) + del reshape_81 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(add_184, full_int_array_1) + del add_184 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_82 = paddle._C_ops.transpose(reshape_82, [0, 2, 1, 3]) + del reshape_82 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_21 = paddle._C_ops.scale(transpose_80, full_5, float("0"), True) + del transpose_80 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_163 = paddle._C_ops.matmul(scale_21, transpose_81, False, True) + del scale_21, transpose_81 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_185 = paddle._C_ops.add(matmul_163, unsqueeze_0) + del matmul_163 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_20 = paddle._C_ops.softmax(add_185, -1) + del add_185 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_122, dropout_123 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_20, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_20 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_164 = paddle._C_ops.matmul(dropout_122, transpose_82, False, False) + del dropout_122, transpose_82 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_83 = paddle._C_ops.transpose(matmul_164, [0, 2, 1, 3]) + del matmul_164 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_83 = paddle._C_ops.reshape(transpose_83, full_int_array_2) + del transpose_83 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_165 = paddle._C_ops.matmul(reshape_83, parameter_57, False, False) + del parameter_57, reshape_83 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_186 = paddle._C_ops.add(matmul_165, parameter_56) + del matmul_165, parameter_56 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_124, dropout_125 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_186, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_186 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_187 = paddle._C_ops.add(layer_norm_121, dropout_124) + del dropout_124, layer_norm_121 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_124, layer_norm_125, layer_norm_126 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_187, parameter_51, parameter_50, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_187, parameter_50, parameter_51 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_166 = paddle._C_ops.matmul(layer_norm_124, parameter_55, False, False) + del parameter_55 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_188 = paddle._C_ops.add(matmul_166, parameter_54) + del matmul_166, parameter_54 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_20 = paddle._C_ops.gelu(add_188, False) + del add_188 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_167 = paddle._C_ops.matmul(gelu_20, parameter_53, False, False) + del gelu_20, parameter_53 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_189 = paddle._C_ops.add(matmul_167, parameter_52) + del matmul_167, parameter_52 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_126, dropout_127 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_189, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_189 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_190 = paddle._C_ops.add(layer_norm_124, dropout_126) + del dropout_126, layer_norm_124 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_127, layer_norm_128, layer_norm_129 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_190, parameter_49, parameter_48, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_190, parameter_48, parameter_49 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_168 = paddle._C_ops.matmul(layer_norm_127, parameter_47, False, False) + del parameter_47 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_191 = paddle._C_ops.add(matmul_168, parameter_46) + del matmul_168, parameter_46 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_84 = paddle._C_ops.reshape(add_191, full_int_array_1) + del add_191 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_84 = paddle._C_ops.transpose(reshape_84, [0, 2, 1, 3]) + del reshape_84 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_169 = paddle._C_ops.matmul(layer_norm_127, parameter_45, False, False) + del parameter_45 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_192 = paddle._C_ops.add(matmul_169, parameter_44) + del matmul_169, parameter_44 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_170 = paddle._C_ops.matmul(layer_norm_127, parameter_43, False, False) + del parameter_43 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_193 = paddle._C_ops.add(matmul_170, parameter_42) + del matmul_170, parameter_42 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_85 = paddle._C_ops.reshape(add_192, full_int_array_1) + del add_192 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_85 = paddle._C_ops.transpose(reshape_85, [0, 2, 1, 3]) + del reshape_85 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_86 = paddle._C_ops.reshape(add_193, full_int_array_1) + del add_193 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_86 = paddle._C_ops.transpose(reshape_86, [0, 2, 1, 3]) + del reshape_86 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_22 = paddle._C_ops.scale(transpose_84, full_5, float("0"), True) + del transpose_84 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_171 = paddle._C_ops.matmul(scale_22, transpose_85, False, True) + del scale_22, transpose_85 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_194 = paddle._C_ops.add(matmul_171, unsqueeze_0) + del matmul_171 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_21 = paddle._C_ops.softmax(add_194, -1) + del add_194 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_128, dropout_129 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_21, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_21 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_172 = paddle._C_ops.matmul(dropout_128, transpose_86, False, False) + del dropout_128, transpose_86 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_87 = paddle._C_ops.transpose(matmul_172, [0, 2, 1, 3]) + del matmul_172 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_87 = paddle._C_ops.reshape(transpose_87, full_int_array_2) + del transpose_87 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_173 = paddle._C_ops.matmul(reshape_87, parameter_41, False, False) + del parameter_41, reshape_87 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_195 = paddle._C_ops.add(matmul_173, parameter_40) + del matmul_173, parameter_40 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_130, dropout_131 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_195, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_195 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_196 = paddle._C_ops.add(layer_norm_127, dropout_130) + del dropout_130, layer_norm_127 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_130, layer_norm_131, layer_norm_132 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_196, parameter_35, parameter_34, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_196, parameter_34, parameter_35 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_174 = paddle._C_ops.matmul(layer_norm_130, parameter_39, False, False) + del parameter_39 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_197 = paddle._C_ops.add(matmul_174, parameter_38) + del matmul_174, parameter_38 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_21 = paddle._C_ops.gelu(add_197, False) + del add_197 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_175 = paddle._C_ops.matmul(gelu_21, parameter_37, False, False) + del gelu_21, parameter_37 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_198 = paddle._C_ops.add(matmul_175, parameter_36) + del matmul_175, parameter_36 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_132, dropout_133 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_198, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_198 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_199 = paddle._C_ops.add(layer_norm_130, dropout_132) + del dropout_132, layer_norm_130 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_133, layer_norm_134, layer_norm_135 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_199, parameter_33, parameter_32, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_199, parameter_32, parameter_33 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_176 = paddle._C_ops.matmul(layer_norm_133, parameter_31, False, False) + del parameter_31 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_200 = paddle._C_ops.add(matmul_176, parameter_30) + del matmul_176, parameter_30 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(add_200, full_int_array_1) + del add_200 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_88 = paddle._C_ops.transpose(reshape_88, [0, 2, 1, 3]) + del reshape_88 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_177 = paddle._C_ops.matmul(layer_norm_133, parameter_29, False, False) + del parameter_29 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_201 = paddle._C_ops.add(matmul_177, parameter_28) + del matmul_177, parameter_28 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_178 = paddle._C_ops.matmul(layer_norm_133, parameter_27, False, False) + del parameter_27 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_202 = paddle._C_ops.add(matmul_178, parameter_26) + del matmul_178, parameter_26 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_89 = paddle._C_ops.reshape(add_201, full_int_array_1) + del add_201 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_89 = paddle._C_ops.transpose(reshape_89, [0, 2, 1, 3]) + del reshape_89 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(add_202, full_int_array_1) + del add_202 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_90 = paddle._C_ops.transpose(reshape_90, [0, 2, 1, 3]) + del reshape_90 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_23 = paddle._C_ops.scale(transpose_88, full_5, float("0"), True) + del transpose_88 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_179 = paddle._C_ops.matmul(scale_23, transpose_89, False, True) + del scale_23, transpose_89 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_203 = paddle._C_ops.add(matmul_179, unsqueeze_0) + del matmul_179 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_22 = paddle._C_ops.softmax(add_203, -1) + del add_203 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_134, dropout_135 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_22, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_22 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_180 = paddle._C_ops.matmul(dropout_134, transpose_90, False, False) + del dropout_134, transpose_90 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_91 = paddle._C_ops.transpose(matmul_180, [0, 2, 1, 3]) + del matmul_180 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_91 = paddle._C_ops.reshape(transpose_91, full_int_array_2) + del transpose_91 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_181 = paddle._C_ops.matmul(reshape_91, parameter_25, False, False) + del parameter_25, reshape_91 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_204 = paddle._C_ops.add(matmul_181, parameter_24) + del matmul_181, parameter_24 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_136, dropout_137 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_204, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_204 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_205 = paddle._C_ops.add(layer_norm_133, dropout_136) + del dropout_136, layer_norm_133 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_136, layer_norm_137, layer_norm_138 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_205, parameter_19, parameter_18, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_205, parameter_18, parameter_19 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_182 = paddle._C_ops.matmul(layer_norm_136, parameter_23, False, False) + del parameter_23 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_206 = paddle._C_ops.add(matmul_182, parameter_22) + del matmul_182, parameter_22 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_22 = paddle._C_ops.gelu(add_206, False) + del add_206 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_183 = paddle._C_ops.matmul(gelu_22, parameter_21, False, False) + del gelu_22, parameter_21 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_207 = paddle._C_ops.add(matmul_183, parameter_20) + del matmul_183, parameter_20 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_138, dropout_139 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_207, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_207 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_208 = paddle._C_ops.add(layer_norm_136, dropout_138) + del dropout_138, layer_norm_136 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_139, layer_norm_140, layer_norm_141 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_208, parameter_17, parameter_16, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_208, parameter_16, parameter_17 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_184 = paddle._C_ops.matmul(layer_norm_139, parameter_15, False, False) + del parameter_15 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_209 = paddle._C_ops.add(matmul_184, parameter_14) + del matmul_184, parameter_14 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_92 = paddle._C_ops.reshape(add_209, full_int_array_1) + del add_209 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_92 = paddle._C_ops.transpose(reshape_92, [0, 2, 1, 3]) + del reshape_92 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_185 = paddle._C_ops.matmul(layer_norm_139, parameter_13, False, False) + del parameter_13 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_210 = paddle._C_ops.add(matmul_185, parameter_12) + del matmul_185, parameter_12 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_186 = paddle._C_ops.matmul(layer_norm_139, parameter_11, False, False) + del parameter_11 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_211 = paddle._C_ops.add(matmul_186, parameter_10) + del matmul_186, parameter_10 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_93 = paddle._C_ops.reshape(add_210, full_int_array_1) + del add_210 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_93 = paddle._C_ops.transpose(reshape_93, [0, 2, 1, 3]) + del reshape_93 + + # pd_op.reshape: (1x21x16x64xf32) <- (1x21x1024xf32, 4xi64) + reshape_94 = paddle._C_ops.reshape(add_211, full_int_array_1) + del add_211, full_int_array_1 + + # pd_op.transpose: (1x16x21x64xf32) <- (1x21x16x64xf32) + transpose_94 = paddle._C_ops.transpose(reshape_94, [0, 2, 1, 3]) + del reshape_94 + + # pd_op.scale: (1x16x21x64xf32) <- (1x16x21x64xf32, 1xf32) + scale_24 = paddle._C_ops.scale(transpose_92, full_5, float("0"), True) + del full_5, transpose_92 + + # pd_op.matmul: (1x16x21x21xf32) <- (1x16x21x64xf32, 1x16x21x64xf32) + matmul_187 = paddle._C_ops.matmul(scale_24, transpose_93, False, True) + del scale_24, transpose_93 + + # pd_op.add: (1x16x21x21xf32) <- (1x16x21x21xf32, 1x1x1x21xf32) + add_212 = paddle._C_ops.add(matmul_187, unsqueeze_0) + del matmul_187, unsqueeze_0 + + # pd_op.softmax: (1x16x21x21xf32) <- (1x16x21x21xf32) + softmax_23 = paddle._C_ops.softmax(add_212, -1) + del add_212 + + # pd_op.dropout: (1x16x21x21xf32, 1x16x21x21xui8) <- (1x16x21x21xf32, None, 1xf32) + dropout_140, dropout_141 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_23, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_23 + + # pd_op.matmul: (1x16x21x64xf32) <- (1x16x21x21xf32, 1x16x21x64xf32) + matmul_188 = paddle._C_ops.matmul(dropout_140, transpose_94, False, False) + del dropout_140, transpose_94 + + # pd_op.transpose: (1x21x16x64xf32) <- (1x16x21x64xf32) + transpose_95 = paddle._C_ops.transpose(matmul_188, [0, 2, 1, 3]) + del matmul_188 + + # pd_op.reshape: (1x21x1024xf32) <- (1x21x16x64xf32, 3xi64) + reshape_95 = paddle._C_ops.reshape(transpose_95, full_int_array_2) + del full_int_array_2, transpose_95 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x1024xf32, 1024x1024xf32) + matmul_189 = paddle._C_ops.matmul(reshape_95, parameter_9, False, False) + del parameter_9, reshape_95 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_213 = paddle._C_ops.add(matmul_189, parameter_8) + del matmul_189, parameter_8 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_142, dropout_143 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_213, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_213 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_214 = paddle._C_ops.add(layer_norm_139, dropout_142) + del dropout_142, layer_norm_139 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_142, layer_norm_143, layer_norm_144 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_214, parameter_3, parameter_2, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_214, parameter_2, parameter_3 + + # pd_op.matmul: (1x21x4096xf32) <- (1x21x1024xf32, 1024x4096xf32) + matmul_190 = paddle._C_ops.matmul(layer_norm_142, parameter_7, False, False) + del parameter_7 + + # pd_op.add: (1x21x4096xf32) <- (1x21x4096xf32, 4096xf32) + add_215 = paddle._C_ops.add(matmul_190, parameter_6) + del matmul_190, parameter_6 + + # pd_op.gelu: (1x21x4096xf32) <- (1x21x4096xf32) + gelu_23 = paddle._C_ops.gelu(add_215, False) + del add_215 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x4096xf32, 4096x1024xf32) + matmul_191 = paddle._C_ops.matmul(gelu_23, parameter_5, False, False) + del gelu_23, parameter_5 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_216 = paddle._C_ops.add(matmul_191, parameter_4) + del matmul_191, parameter_4 + + # pd_op.dropout: (1x21x1024xf32, 1x21x1024xui8) <- (1x21x1024xf32, None, 1xf32) + dropout_144, dropout_145 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_216, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_216, full_4 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1x21x1024xf32) + add_217 = paddle._C_ops.add(layer_norm_142, dropout_144) + del dropout_144, layer_norm_142 + + # pd_op.layer_norm: (1x21x1024xf32, 1x21xf32, 1x21xf32) <- (1x21x1024xf32, 1024xf32, 1024xf32) + layer_norm_0, layer_norm_145, layer_norm_146 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_217, parameter_1, parameter_0, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_217, parameter_0, parameter_1 + + return layer_norm_0 diff --git a/paddle_samples/PaddleNLP/electra-large/weight_meta.py b/paddle_samples/PaddleNLP/electra-large/weight_meta.py new file mode 100644 index 000000000..026d42260 --- /dev/null +++ b/paddle_samples/PaddleNLP/electra-large/weight_meta.py @@ -0,0 +1,3456 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.101024") + max_val = float("0.0996568") + mean = float("7.91738e-06") + std = float("0.0200089") + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.102222") + max_val = float("0.105952") + mean = float("4.97701e-06") + std = float("0.0200014") + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.10177") + max_val = float("0.0940287") + mean = float("-4.04233e-07") + std = float("0.0199921") + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0933784") + max_val = float("0.0967114") + mean = float("-9.14002e-06") + std = float("0.020004") + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0939795") + max_val = float("0.0948213") + mean = float("1.40832e-05") + std = float("0.0200067") + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.100487") + max_val = float("0.0999951") + mean = float("-1.03226e-05") + std = float("0.0200161") + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0960376") + max_val = float("0.10324") + mean = float("1.96859e-05") + std = float("0.0199999") + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.105295") + max_val = float("0.0996869") + mean = float("7.16952e-06") + std = float("0.0200059") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0930148") + max_val = float("0.0921824") + mean = float("-1.56746e-05") + std = float("0.0200027") + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.101346") + max_val = float("0.0941041") + mean = float("-1.00789e-05") + std = float("0.0199812") + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.103122") + max_val = float("0.0941355") + mean = float("1.06138e-05") + std = float("0.0200098") + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.116736") + max_val = float("0.0977742") + mean = float("-6.80129e-05") + std = float("0.0200096") + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0997121") + max_val = float("0.103272") + mean = float("2.14878e-05") + std = float("0.0199946") + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0969378") + max_val = float("0.095086") + mean = float("-1.6667e-05") + std = float("0.0200047") + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0961546") + max_val = float("0.101277") + mean = float("-9.08123e-07") + std = float("0.0200241") + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.100256") + max_val = float("0.102231") + mean = float("1.14292e-05") + std = float("0.0199978") + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0927109") + max_val = float("0.0915999") + mean = float("2.5427e-05") + std = float("0.020001") + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0933872") + max_val = float("0.095345") + mean = float("-1.30901e-05") + std = float("0.0200051") + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.10062") + max_val = float("0.106327") + mean = float("-1.75622e-06") + std = float("0.0200036") + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.100227") + max_val = float("0.101815") + mean = float("6.61959e-06") + std = float("0.0199992") + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.108988") + max_val = float("0.0928629") + mean = float("-3.63083e-06") + std = float("0.0199923") + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0981934") + max_val = float("0.0978936") + mean = float("2.33092e-05") + std = float("0.0200211") + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0921629") + max_val = float("0.0946287") + mean = float("-5.47764e-06") + std = float("0.0200063") + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0976839") + max_val = float("0.091436") + mean = float("-3.85916e-05") + std = float("0.020003") + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.105164") + max_val = float("0.0981605") + mean = float("-1.94638e-06") + std = float("0.0200116") + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.100258") + max_val = float("0.10403") + mean = float("-2.37825e-07") + std = float("0.0200032") + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.104234") + max_val = float("0.107171") + mean = float("4.9785e-06") + std = float("0.0200128") + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0942081") + max_val = float("0.104097") + mean = float("-2.22057e-05") + std = float("0.0199925") + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.10032") + max_val = float("0.0951253") + mean = float("-1.45727e-05") + std = float("0.0199907") + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.107363") + max_val = float("0.101099") + mean = float("8.49895e-06") + std = float("0.0200312") + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0980016") + max_val = float("0.103153") + mean = float("5.15533e-06") + std = float("0.0199911") + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.100411") + max_val = float("0.100554") + mean = float("-1.04932e-05") + std = float("0.0200066") + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0964624") + max_val = float("0.0943479") + mean = float("-1.91967e-06") + std = float("0.020005") + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0916461") + max_val = float("0.101003") + mean = float("-4.46455e-05") + std = float("0.0200058") + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0950738") + max_val = float("0.0911693") + mean = float("1.99384e-05") + std = float("0.0199958") + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.101071") + max_val = float("0.10099") + mean = float("-8.54916e-07") + std = float("0.0199965") + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0987915") + max_val = float("0.105461") + mean = float("1.48772e-05") + std = float("0.0199935") + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.105636") + max_val = float("0.106007") + mean = float("-1.8333e-07") + std = float("0.020004") + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0967739") + max_val = float("0.0983523") + mean = float("-4.28609e-06") + std = float("0.0200152") + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0982177") + max_val = float("0.108669") + mean = float("-6.22116e-06") + std = float("0.020003") + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.101944") + max_val = float("0.108104") + mean = float("1.41679e-06") + std = float("0.0199866") + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0975958") + max_val = float("0.0969667") + mean = float("3.1012e-06") + std = float("0.0200122") + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.108365") + max_val = float("0.110338") + mean = float("-5.4938e-06") + std = float("0.0200027") + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0991883") + max_val = float("0.104308") + mean = float("5.91969e-06") + std = float("0.0200165") + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0973095") + max_val = float("0.0965508") + mean = float("-1.43482e-06") + std = float("0.0199965") + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.100309") + max_val = float("0.0955139") + mean = float("-1.19554e-05") + std = float("0.0199813") + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0979183") + max_val = float("0.0932543") + mean = float("-1.91321e-05") + std = float("0.0199751") + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0949482") + max_val = float("0.0943983") + mean = float("-2.1525e-05") + std = float("0.0199898") + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.112197") + max_val = float("0.102659") + mean = float("-2.24975e-06") + std = float("0.0199888") + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.103548") + max_val = float("0.0991941") + mean = float("-1.89938e-05") + std = float("0.0199951") + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.104776") + max_val = float("0.0953147") + mean = float("1.58962e-05") + std = float("0.020005") + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0936792") + max_val = float("0.102495") + mean = float("1.08738e-05") + std = float("0.0200051") + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0972616") + max_val = float("0.0892398") + mean = float("-1.28953e-05") + std = float("0.0199976") + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0950761") + max_val = float("0.0986504") + mean = float("-1.68947e-05") + std = float("0.0200096") + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.111441") + max_val = float("0.0998872") + mean = float("-1.43684e-05") + std = float("0.020009") + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0957917") + max_val = float("0.0995322") + mean = float("1.06302e-05") + std = float("0.0199943") + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0955322") + max_val = float("0.0997123") + mean = float("3.30754e-06") + std = float("0.020023") + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0911856") + max_val = float("0.0899201") + mean = float("-1.63109e-06") + std = float("0.0200013") + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0997793") + max_val = float("0.0951418") + mean = float("-2.05218e-05") + std = float("0.0200092") + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0911784") + max_val = float("0.10182") + mean = float("-1.83781e-05") + std = float("0.0199805") + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0984256") + max_val = float("0.100259") + mean = float("-4.53413e-06") + std = float("0.0200071") + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.102666") + max_val = float("0.0964367") + mean = float("-1.40732e-05") + std = float("0.0199908") + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0934146") + max_val = float("0.0915575") + mean = float("5.00605e-06") + std = float("0.0199817") + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.09928") + max_val = float("0.097574") + mean = float("-1.40373e-06") + std = float("0.0200134") + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0959881") + max_val = float("0.102034") + mean = float("1.93249e-05") + std = float("0.0199823") + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0913679") + max_val = float("0.0950945") + mean = float("-1.29294e-05") + std = float("0.0199856") + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0962138") + max_val = float("0.104995") + mean = float("3.09728e-06") + std = float("0.0199942") + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.100574") + max_val = float("0.10119") + mean = float("5.86181e-06") + std = float("0.0200128") + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0950479") + max_val = float("0.0954443") + mean = float("2.21724e-06") + std = float("0.0200219") + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.100817") + max_val = float("0.0939626") + mean = float("-2.39056e-06") + std = float("0.0199885") + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0909043") + max_val = float("0.1126") + mean = float("2.08148e-05") + std = float("0.0200083") + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0991872") + max_val = float("0.0951475") + mean = float("-3.03595e-05") + std = float("0.0200114") + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.103481") + max_val = float("0.108682") + mean = float("-1.08522e-05") + std = float("0.0199941") + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.10028") + max_val = float("0.099953") + mean = float("-1.08107e-06") + std = float("0.0199911") + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0998735") + max_val = float("0.0902037") + mean = float("-8.12361e-06") + std = float("0.0199937") + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0955451") + max_val = float("0.105333") + mean = float("7.51084e-06") + std = float("0.0200203") + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_205: + name = "parameter_205" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0955035") + max_val = float("0.105939") + mean = float("3.18838e-05") + std = float("0.0200036") + data = None + + +class Program_weight_tensor_parameter_206: + name = "parameter_206" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_207: + name = "parameter_207" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0952035") + max_val = float("0.0936923") + mean = float("8.32838e-06") + std = float("0.0199997") + data = None + + +class Program_weight_tensor_parameter_208: + name = "parameter_208" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_209: + name = "parameter_209" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_210: + name = "parameter_210" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_211: + name = "parameter_211" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_212: + name = "parameter_212" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_213: + name = "parameter_213" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.102455") + max_val = float("0.100239") + mean = float("-2.31832e-06") + std = float("0.0200087") + data = None + + +class Program_weight_tensor_parameter_214: + name = "parameter_214" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_215: + name = "parameter_215" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.103125") + max_val = float("0.110648") + mean = float("-2.13546e-05") + std = float("0.0199904") + data = None + + +class Program_weight_tensor_parameter_216: + name = "parameter_216" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_217: + name = "parameter_217" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0891126") + max_val = float("0.09302") + mean = float("2.48546e-06") + std = float("0.0199864") + data = None + + +class Program_weight_tensor_parameter_218: + name = "parameter_218" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_219: + name = "parameter_219" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.100095") + max_val = float("0.091154") + mean = float("6.08949e-06") + std = float("0.0200021") + data = None + + +class Program_weight_tensor_parameter_220: + name = "parameter_220" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_221: + name = "parameter_221" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0941208") + max_val = float("0.106838") + mean = float("9.30511e-06") + std = float("0.0200097") + data = None + + +class Program_weight_tensor_parameter_222: + name = "parameter_222" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_223: + name = "parameter_223" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.10085") + max_val = float("0.0926805") + mean = float("-1.05217e-05") + std = float("0.0199965") + data = None + + +class Program_weight_tensor_parameter_224: + name = "parameter_224" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_225: + name = "parameter_225" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_226: + name = "parameter_226" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_227: + name = "parameter_227" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_228: + name = "parameter_228" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_229: + name = "parameter_229" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.112202") + max_val = float("0.0982251") + mean = float("-1.24947e-05") + std = float("0.0200028") + data = None + + +class Program_weight_tensor_parameter_230: + name = "parameter_230" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_231: + name = "parameter_231" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.117546") + max_val = float("0.0997155") + mean = float("4.63825e-06") + std = float("0.0200009") + data = None + + +class Program_weight_tensor_parameter_232: + name = "parameter_232" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_233: + name = "parameter_233" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0949573") + max_val = float("0.0987111") + mean = float("1.06365e-05") + std = float("0.0200084") + data = None + + +class Program_weight_tensor_parameter_234: + name = "parameter_234" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_235: + name = "parameter_235" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0991016") + max_val = float("0.0952928") + mean = float("-1.85336e-05") + std = float("0.0200211") + data = None + + +class Program_weight_tensor_parameter_236: + name = "parameter_236" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_237: + name = "parameter_237" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0981591") + max_val = float("0.0968659") + mean = float("4.17688e-06") + std = float("0.0199927") + data = None + + +class Program_weight_tensor_parameter_238: + name = "parameter_238" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_239: + name = "parameter_239" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0992943") + max_val = float("0.0971305") + mean = float("3.70269e-05") + std = float("0.0199815") + data = None + + +class Program_weight_tensor_parameter_240: + name = "parameter_240" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_241: + name = "parameter_241" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_242: + name = "parameter_242" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_243: + name = "parameter_243" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_244: + name = "parameter_244" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_245: + name = "parameter_245" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.107651") + max_val = float("0.110664") + mean = float("-3.27741e-06") + std = float("0.0199939") + data = None + + +class Program_weight_tensor_parameter_246: + name = "parameter_246" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_247: + name = "parameter_247" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.101359") + max_val = float("0.101609") + mean = float("-8.71073e-06") + std = float("0.0199887") + data = None + + +class Program_weight_tensor_parameter_248: + name = "parameter_248" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_249: + name = "parameter_249" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.096324") + max_val = float("0.102893") + mean = float("-3.50265e-05") + std = float("0.0200087") + data = None + + +class Program_weight_tensor_parameter_250: + name = "parameter_250" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_251: + name = "parameter_251" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0950433") + max_val = float("0.0948723") + mean = float("1.04623e-05") + std = float("0.0199896") + data = None + + +class Program_weight_tensor_parameter_252: + name = "parameter_252" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_253: + name = "parameter_253" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0914984") + max_val = float("0.0988369") + mean = float("3.31462e-06") + std = float("0.0199744") + data = None + + +class Program_weight_tensor_parameter_254: + name = "parameter_254" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_255: + name = "parameter_255" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0945651") + max_val = float("0.102084") + mean = float("-3.44749e-06") + std = float("0.0200089") + data = None + + +class Program_weight_tensor_parameter_256: + name = "parameter_256" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_257: + name = "parameter_257" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_258: + name = "parameter_258" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_259: + name = "parameter_259" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_260: + name = "parameter_260" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_261: + name = "parameter_261" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.100974") + max_val = float("0.102265") + mean = float("4.26867e-06") + std = float("0.020009") + data = None + + +class Program_weight_tensor_parameter_262: + name = "parameter_262" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_263: + name = "parameter_263" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.101585") + max_val = float("0.0934663") + mean = float("-1.5917e-05") + std = float("0.0199898") + data = None + + +class Program_weight_tensor_parameter_264: + name = "parameter_264" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_265: + name = "parameter_265" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.09419") + max_val = float("0.0972716") + mean = float("2.15896e-08") + std = float("0.0200158") + data = None + + +class Program_weight_tensor_parameter_266: + name = "parameter_266" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_267: + name = "parameter_267" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0918587") + max_val = float("0.0958336") + mean = float("4.23266e-05") + std = float("0.0200012") + data = None + + +class Program_weight_tensor_parameter_268: + name = "parameter_268" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_269: + name = "parameter_269" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0967065") + max_val = float("0.0871559") + mean = float("1.67458e-05") + std = float("0.019988") + data = None + + +class Program_weight_tensor_parameter_270: + name = "parameter_270" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_271: + name = "parameter_271" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.103325") + max_val = float("0.0966935") + mean = float("7.70402e-06") + std = float("0.0199889") + data = None + + +class Program_weight_tensor_parameter_272: + name = "parameter_272" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_273: + name = "parameter_273" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_274: + name = "parameter_274" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_275: + name = "parameter_275" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_276: + name = "parameter_276" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_277: + name = "parameter_277" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.101714") + max_val = float("0.0951662") + mean = float("-7.34203e-06") + std = float("0.0200036") + data = None + + +class Program_weight_tensor_parameter_278: + name = "parameter_278" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_279: + name = "parameter_279" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.101227") + max_val = float("0.100256") + mean = float("-1.01447e-05") + std = float("0.0200028") + data = None + + +class Program_weight_tensor_parameter_280: + name = "parameter_280" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_281: + name = "parameter_281" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.100645") + max_val = float("0.101394") + mean = float("1.91943e-05") + std = float("0.0199948") + data = None + + +class Program_weight_tensor_parameter_282: + name = "parameter_282" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_283: + name = "parameter_283" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0935647") + max_val = float("0.0951403") + mean = float("2.39552e-05") + std = float("0.0200041") + data = None + + +class Program_weight_tensor_parameter_284: + name = "parameter_284" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_285: + name = "parameter_285" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0948083") + max_val = float("0.0945721") + mean = float("-4.36244e-05") + std = float("0.0200117") + data = None + + +class Program_weight_tensor_parameter_286: + name = "parameter_286" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_287: + name = "parameter_287" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.10125") + max_val = float("0.105984") + mean = float("-1.5154e-05") + std = float("0.0199826") + data = None + + +class Program_weight_tensor_parameter_288: + name = "parameter_288" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_289: + name = "parameter_289" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_290: + name = "parameter_290" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_291: + name = "parameter_291" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_292: + name = "parameter_292" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_293: + name = "parameter_293" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.105489") + max_val = float("0.0970206") + mean = float("-4.0272e-06") + std = float("0.019993") + data = None + + +class Program_weight_tensor_parameter_294: + name = "parameter_294" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_295: + name = "parameter_295" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.109938") + max_val = float("0.0962291") + mean = float("2.31217e-06") + std = float("0.0200025") + data = None + + +class Program_weight_tensor_parameter_296: + name = "parameter_296" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_297: + name = "parameter_297" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.103673") + max_val = float("0.0941239") + mean = float("-2.21075e-05") + std = float("0.0200097") + data = None + + +class Program_weight_tensor_parameter_298: + name = "parameter_298" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_299: + name = "parameter_299" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.103817") + max_val = float("0.0922414") + mean = float("1.38817e-06") + std = float("0.0199829") + data = None + + +class Program_weight_tensor_parameter_300: + name = "parameter_300" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_301: + name = "parameter_301" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0999624") + max_val = float("0.0968656") + mean = float("6.2417e-06") + std = float("0.0199768") + data = None + + +class Program_weight_tensor_parameter_302: + name = "parameter_302" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_303: + name = "parameter_303" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0929787") + max_val = float("0.102448") + mean = float("-7.695e-06") + std = float("0.0199963") + data = None + + +class Program_weight_tensor_parameter_304: + name = "parameter_304" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_305: + name = "parameter_305" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_306: + name = "parameter_306" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_307: + name = "parameter_307" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_308: + name = "parameter_308" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_309: + name = "parameter_309" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0973307") + max_val = float("0.0981055") + mean = float("1.78622e-06") + std = float("0.0199914") + data = None + + +class Program_weight_tensor_parameter_310: + name = "parameter_310" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_311: + name = "parameter_311" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0950832") + max_val = float("0.102809") + mean = float("-9.69448e-06") + std = float("0.0199968") + data = None + + +class Program_weight_tensor_parameter_312: + name = "parameter_312" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_313: + name = "parameter_313" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.100043") + max_val = float("0.0991401") + mean = float("1.63355e-05") + std = float("0.0199959") + data = None + + +class Program_weight_tensor_parameter_314: + name = "parameter_314" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_315: + name = "parameter_315" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.102857") + max_val = float("0.0994937") + mean = float("-2.46077e-05") + std = float("0.0200046") + data = None + + +class Program_weight_tensor_parameter_316: + name = "parameter_316" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_317: + name = "parameter_317" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0926927") + max_val = float("0.101478") + mean = float("3.80998e-05") + std = float("0.0199855") + data = None + + +class Program_weight_tensor_parameter_318: + name = "parameter_318" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_319: + name = "parameter_319" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0927712") + max_val = float("0.094872") + mean = float("-5.84154e-06") + std = float("0.0199985") + data = None + + +class Program_weight_tensor_parameter_320: + name = "parameter_320" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_321: + name = "parameter_321" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_322: + name = "parameter_322" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_323: + name = "parameter_323" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_324: + name = "parameter_324" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_325: + name = "parameter_325" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.101364") + max_val = float("0.0987481") + mean = float("-1.04383e-05") + std = float("0.0199978") + data = None + + +class Program_weight_tensor_parameter_326: + name = "parameter_326" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_327: + name = "parameter_327" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0984395") + max_val = float("0.114692") + mean = float("-7.58863e-06") + std = float("0.0199863") + data = None + + +class Program_weight_tensor_parameter_328: + name = "parameter_328" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_329: + name = "parameter_329" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0933822") + max_val = float("0.093067") + mean = float("-4.91644e-06") + std = float("0.019999") + data = None + + +class Program_weight_tensor_parameter_330: + name = "parameter_330" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_331: + name = "parameter_331" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0996301") + max_val = float("0.106063") + mean = float("9.30408e-06") + std = float("0.0200111") + data = None + + +class Program_weight_tensor_parameter_332: + name = "parameter_332" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_333: + name = "parameter_333" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0904877") + max_val = float("0.0940888") + mean = float("-1.41462e-05") + std = float("0.0199926") + data = None + + +class Program_weight_tensor_parameter_334: + name = "parameter_334" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_335: + name = "parameter_335" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0918694") + max_val = float("0.0934674") + mean = float("1.14706e-05") + std = float("0.0199906") + data = None + + +class Program_weight_tensor_parameter_336: + name = "parameter_336" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_337: + name = "parameter_337" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_338: + name = "parameter_338" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_339: + name = "parameter_339" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_340: + name = "parameter_340" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_341: + name = "parameter_341" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.101678") + max_val = float("0.109827") + mean = float("1.05289e-05") + std = float("0.0200057") + data = None + + +class Program_weight_tensor_parameter_342: + name = "parameter_342" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_343: + name = "parameter_343" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.103579") + max_val = float("0.0981863") + mean = float("1.11598e-05") + std = float("0.0199954") + data = None + + +class Program_weight_tensor_parameter_344: + name = "parameter_344" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_345: + name = "parameter_345" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0976172") + max_val = float("0.0945761") + mean = float("-4.23601e-05") + std = float("0.019981") + data = None + + +class Program_weight_tensor_parameter_346: + name = "parameter_346" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_347: + name = "parameter_347" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0923529") + max_val = float("0.0910175") + mean = float("-5.49981e-06") + std = float("0.0200086") + data = None + + +class Program_weight_tensor_parameter_348: + name = "parameter_348" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_349: + name = "parameter_349" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0969205") + max_val = float("0.106305") + mean = float("-8.18555e-06") + std = float("0.0199883") + data = None + + +class Program_weight_tensor_parameter_350: + name = "parameter_350" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_351: + name = "parameter_351" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0962243") + max_val = float("0.0928129") + mean = float("-2.59163e-05") + std = float("0.0199797") + data = None + + +class Program_weight_tensor_parameter_352: + name = "parameter_352" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_353: + name = "parameter_353" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_354: + name = "parameter_354" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_355: + name = "parameter_355" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_356: + name = "parameter_356" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_357: + name = "parameter_357" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.102966") + max_val = float("0.120958") + mean = float("9.83077e-06") + std = float("0.0200021") + data = None + + +class Program_weight_tensor_parameter_358: + name = "parameter_358" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_359: + name = "parameter_359" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.102961") + max_val = float("0.102509") + mean = float("-1.44663e-07") + std = float("0.0200013") + data = None + + +class Program_weight_tensor_parameter_360: + name = "parameter_360" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_361: + name = "parameter_361" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0960667") + max_val = float("0.0973655") + mean = float("1.37768e-05") + std = float("0.0200099") + data = None + + +class Program_weight_tensor_parameter_362: + name = "parameter_362" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_363: + name = "parameter_363" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0960439") + max_val = float("0.0992138") + mean = float("-1.79947e-05") + std = float("0.0199956") + data = None + + +class Program_weight_tensor_parameter_364: + name = "parameter_364" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_365: + name = "parameter_365" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0892008") + max_val = float("0.10303") + mean = float("-1.61993e-05") + std = float("0.0199913") + data = None + + +class Program_weight_tensor_parameter_366: + name = "parameter_366" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_367: + name = "parameter_367" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0966827") + max_val = float("0.0982516") + mean = float("-3.37853e-06") + std = float("0.0200001") + data = None + + +class Program_weight_tensor_parameter_368: + name = "parameter_368" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_369: + name = "parameter_369" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_370: + name = "parameter_370" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_371: + name = "parameter_371" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_372: + name = "parameter_372" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_373: + name = "parameter_373" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.102366") + max_val = float("0.0965679") + mean = float("1.18863e-05") + std = float("0.0199941") + data = None + + +class Program_weight_tensor_parameter_374: + name = "parameter_374" + shape = [4096] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_375: + name = "parameter_375" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0995519") + max_val = float("0.102959") + mean = float("1.20653e-06") + std = float("0.0199943") + data = None + + +class Program_weight_tensor_parameter_376: + name = "parameter_376" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_377: + name = "parameter_377" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0925929") + max_val = float("0.0958564") + mean = float("-1.79695e-05") + std = float("0.0200074") + data = None + + +class Program_weight_tensor_parameter_378: + name = "parameter_378" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_379: + name = "parameter_379" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0948622") + max_val = float("0.0957332") + mean = float("-1.74333e-05") + std = float("0.020001") + data = None + + +class Program_weight_tensor_parameter_380: + name = "parameter_380" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_381: + name = "parameter_381" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0993251") + max_val = float("0.096294") + mean = float("-2.26693e-05") + std = float("0.0200031") + data = None + + +class Program_weight_tensor_parameter_382: + name = "parameter_382" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_383: + name = "parameter_383" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.09569") + max_val = float("0.0910205") + mean = float("-8.24706e-06") + std = float("0.019999") + data = None + + +class Program_weight_tensor_parameter_384: + name = "parameter_384" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_385: + name = "parameter_385" + shape = [1024] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_386: + name = "parameter_386" + shape = [2, 1024] + dtype = "float32" + min_val = float("-0.0732863") + max_val = float("0.0652442") + mean = float("-0.000265454") + std = float("0.0201428") + data = None + + +class Program_weight_tensor_parameter_387: + name = "parameter_387" + shape = [512, 1024] + dtype = "float32" + min_val = float("-0.0985827") + max_val = float("0.0960425") + mean = float("9.6319e-06") + std = float("0.0199712") + data = None + + +class Program_weight_tensor_parameter_388: + name = "parameter_388" + shape = [30522, 1024] + dtype = "float32" + min_val = float("-0.114022") + max_val = float("0.109962") + mean = float("-1.03586e-06") + std = float("0.0199996") + data = None diff --git a/paddle_samples/PaddleNLP/electra-small/graph_hash.txt b/paddle_samples/PaddleNLP/electra-small/graph_hash.txt new file mode 100644 index 000000000..766a42342 --- /dev/null +++ b/paddle_samples/PaddleNLP/electra-small/graph_hash.txt @@ -0,0 +1 @@ +0c541398cc137b5f9af13453965469489969b09bf4f9241b084c9f428bdc740c \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/electra-small/graph_net.json b/paddle_samples/PaddleNLP/electra-small/graph_net.json new file mode 100644 index 000000000..e93dd1b4f --- /dev/null +++ b/paddle_samples/PaddleNLP/electra-small/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "electra-small", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/electra-small/input_meta.py b/paddle_samples/PaddleNLP/electra-small/input_meta.py new file mode 100644 index 000000000..fd1f7db6f --- /dev/null +++ b/paddle_samples/PaddleNLP/electra-small/input_meta.py @@ -0,0 +1,34 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 21] + dtype = "int64" + data = [ + 101, + 7592, + 1010, + 2026, + 2171, + 2003, + 3960, + 1012, + 1045, + 2572, + 4083, + 2055, + 2312, + 2653, + 4275, + 1998, + 2037, + 4294, + 2015, + 1012, + 102, + ] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 21] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/electra-small/model.py b/paddle_samples/PaddleNLP/electra-small/model.py new file mode 100644 index 000000000..44e04e3df --- /dev/null +++ b/paddle_samples/PaddleNLP/electra-small/model.py @@ -0,0 +1,2211 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + data_0, + data_1, + ): + # pd_op.full: (xi64) <- () + full_0 = paddle._C_ops.full( + [], float("0"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.equal: (1x21xb) <- (1x21xi64, xi64) + equal_0 = paddle._C_ops.equal(data_0, full_0) + del full_0 + + # pd_op.cast: (1x21xf32) <- (1x21xb) + cast_0 = paddle._C_ops.cast(equal_0, paddle.float32) + del equal_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x21xf32) <- (1x21xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_1, float("0"), True) + del cast_0, full_1 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_0 = [1, 2] + + # pd_op.unsqueeze: (1x1x1x21xf32) <- (1x21xf32, 2xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(scale_0, full_int_array_0) + del full_int_array_0, scale_0 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full_like: (1x21xi64) <- (1x21xi64, 1xf32) + full_like_0 = paddle._C_ops.full_like( + data_0, full_2, paddle.int64, paddle.framework._current_expected_place() + ) + del full_2 + + # pd_op.full: (1xi32) <- () + full_3 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.cumsum: (1x21xi64) <- (1x21xi64, 1xi32) + cumsum_0 = paddle._C_ops.cumsum(full_like_0, full_3, False, False, False) + del full_3 + + # pd_op.subtract: (1x21xi64) <- (1x21xi64, 1x21xi64) + subtract_0 = paddle._C_ops.subtract(cumsum_0, full_like_0) + del cumsum_0, full_like_0 + + # pd_op.embedding: (1x21x128xf32) <- (1x21xi64, 30522x128xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_198, -1, False) + del data_0, parameter_198 + + # pd_op.embedding: (1x21x128xf32) <- (1x21xi64, 512x128xf32) + embedding_1 = paddle._C_ops.embedding(subtract_0, parameter_197, -1, False) + del parameter_197, subtract_0 + + # pd_op.embedding: (1x21x128xf32) <- (1x21xi64, 2x128xf32) + embedding_2 = paddle._C_ops.embedding(data_1, parameter_196, -1, False) + del data_1, parameter_196 + + # pd_op.add: (1x21x128xf32) <- (1x21x128xf32, 1x21x128xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.add: (1x21x128xf32) <- (1x21x128xf32, 1x21x128xf32) + add_1 = paddle._C_ops.add(add_0, embedding_2) + del add_0, embedding_2 + + # pd_op.layer_norm: (1x21x128xf32, 1x21xf32, 1x21xf32) <- (1x21x128xf32, 128xf32, 128xf32) + layer_norm_1, layer_norm_2, layer_norm_3 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_1, parameter_195, parameter_194, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_1, parameter_194, parameter_195 + + # pd_op.full: (1xf32) <- () + full_4 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (1x21x128xf32, 1x21x128xui8) <- (1x21x128xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + layer_norm_1, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del layer_norm_1 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x128xf32, 128x256xf32) + matmul_0 = paddle._C_ops.matmul(dropout_0, parameter_193, False, False) + del dropout_0, parameter_193 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_192) + del matmul_0, parameter_192 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_1 = paddle._C_ops.matmul(add_2, parameter_191, False, False) + del parameter_191 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_3 = paddle._C_ops.add(matmul_1, parameter_190) + del matmul_1, parameter_190 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_1 = [0, 0, 4, 64] + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(add_3, full_int_array_1) + del add_3 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_2 = paddle._C_ops.matmul(add_2, parameter_189, False, False) + del parameter_189 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_4 = paddle._C_ops.add(matmul_2, parameter_188) + del matmul_2, parameter_188 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_3 = paddle._C_ops.matmul(add_2, parameter_187, False, False) + del parameter_187 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_5 = paddle._C_ops.add(matmul_3, parameter_186) + del matmul_3, parameter_186 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(add_4, full_int_array_1) + del add_4 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(add_5, full_int_array_1) + del add_5 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.full: (1xf32) <- () + full_5 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x4x21x64xf32) <- (1x4x21x64xf32, 1xf32) + scale_1 = paddle._C_ops.scale(transpose_0, full_5, float("0"), True) + del transpose_0 + + # pd_op.matmul: (1x4x21x21xf32) <- (1x4x21x64xf32, 1x4x21x64xf32) + matmul_4 = paddle._C_ops.matmul(scale_1, transpose_1, False, True) + del scale_1, transpose_1 + + # pd_op.add: (1x4x21x21xf32) <- (1x4x21x21xf32, 1x1x1x21xf32) + add_6 = paddle._C_ops.add(matmul_4, unsqueeze_0) + del matmul_4 + + # pd_op.softmax: (1x4x21x21xf32) <- (1x4x21x21xf32) + softmax_0 = paddle._C_ops.softmax(add_6, -1) + del add_6 + + # pd_op.dropout: (1x4x21x21xf32, 1x4x21x21xui8) <- (1x4x21x21xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # pd_op.matmul: (1x4x21x64xf32) <- (1x4x21x21xf32, 1x4x21x64xf32) + matmul_5 = paddle._C_ops.matmul(dropout_2, transpose_2, False, False) + del dropout_2, transpose_2 + + # pd_op.transpose: (1x21x4x64xf32) <- (1x4x21x64xf32) + transpose_3 = paddle._C_ops.transpose(matmul_5, [0, 2, 1, 3]) + del matmul_5 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_2 = [0, 0, 256] + + # pd_op.reshape: (1x21x256xf32) <- (1x21x4x64xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_2) + del transpose_3 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_6 = paddle._C_ops.matmul(reshape_3, parameter_185, False, False) + del parameter_185, reshape_3 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_7 = paddle._C_ops.add(matmul_6, parameter_184) + del matmul_6, parameter_184 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_7, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_7 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_8 = paddle._C_ops.add(add_2, dropout_4) + del add_2, dropout_4 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_4, layer_norm_5, layer_norm_6 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_8, parameter_179, parameter_178, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_8, parameter_178, parameter_179 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x256xf32, 256x1024xf32) + matmul_7 = paddle._C_ops.matmul(layer_norm_4, parameter_183, False, False) + del parameter_183 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_9 = paddle._C_ops.add(matmul_7, parameter_182) + del matmul_7, parameter_182 + + # pd_op.gelu: (1x21x1024xf32) <- (1x21x1024xf32) + gelu_0 = paddle._C_ops.gelu(add_9, False) + del add_9 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x1024xf32, 1024x256xf32) + matmul_8 = paddle._C_ops.matmul(gelu_0, parameter_181, False, False) + del gelu_0, parameter_181 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_10 = paddle._C_ops.add(matmul_8, parameter_180) + del matmul_8, parameter_180 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_10, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_10 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_11 = paddle._C_ops.add(layer_norm_4, dropout_6) + del dropout_6, layer_norm_4 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_7, layer_norm_8, layer_norm_9 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_11, parameter_177, parameter_176, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_11, parameter_176, parameter_177 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_9 = paddle._C_ops.matmul(layer_norm_7, parameter_175, False, False) + del parameter_175 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_174) + del matmul_9, parameter_174 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(add_12, full_int_array_1) + del add_12 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_7, parameter_173, False, False) + del parameter_173 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_13 = paddle._C_ops.add(matmul_10, parameter_172) + del matmul_10, parameter_172 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_11 = paddle._C_ops.matmul(layer_norm_7, parameter_171, False, False) + del parameter_171 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_14 = paddle._C_ops.add(matmul_11, parameter_170) + del matmul_11, parameter_170 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(add_13, full_int_array_1) + del add_13 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_14, full_int_array_1) + del add_14 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.scale: (1x4x21x64xf32) <- (1x4x21x64xf32, 1xf32) + scale_2 = paddle._C_ops.scale(transpose_4, full_5, float("0"), True) + del transpose_4 + + # pd_op.matmul: (1x4x21x21xf32) <- (1x4x21x64xf32, 1x4x21x64xf32) + matmul_12 = paddle._C_ops.matmul(scale_2, transpose_5, False, True) + del scale_2, transpose_5 + + # pd_op.add: (1x4x21x21xf32) <- (1x4x21x21xf32, 1x1x1x21xf32) + add_15 = paddle._C_ops.add(matmul_12, unsqueeze_0) + del matmul_12 + + # pd_op.softmax: (1x4x21x21xf32) <- (1x4x21x21xf32) + softmax_1 = paddle._C_ops.softmax(add_15, -1) + del add_15 + + # pd_op.dropout: (1x4x21x21xf32, 1x4x21x21xui8) <- (1x4x21x21xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # pd_op.matmul: (1x4x21x64xf32) <- (1x4x21x21xf32, 1x4x21x64xf32) + matmul_13 = paddle._C_ops.matmul(dropout_8, transpose_6, False, False) + del dropout_8, transpose_6 + + # pd_op.transpose: (1x21x4x64xf32) <- (1x4x21x64xf32) + transpose_7 = paddle._C_ops.transpose(matmul_13, [0, 2, 1, 3]) + del matmul_13 + + # pd_op.reshape: (1x21x256xf32) <- (1x21x4x64xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_2) + del transpose_7 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_14 = paddle._C_ops.matmul(reshape_7, parameter_169, False, False) + del parameter_169, reshape_7 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_16 = paddle._C_ops.add(matmul_14, parameter_168) + del matmul_14, parameter_168 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_16, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_16 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_17 = paddle._C_ops.add(layer_norm_7, dropout_10) + del dropout_10, layer_norm_7 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_10, layer_norm_11, layer_norm_12 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_17, parameter_163, parameter_162, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_17, parameter_162, parameter_163 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x256xf32, 256x1024xf32) + matmul_15 = paddle._C_ops.matmul(layer_norm_10, parameter_167, False, False) + del parameter_167 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_18 = paddle._C_ops.add(matmul_15, parameter_166) + del matmul_15, parameter_166 + + # pd_op.gelu: (1x21x1024xf32) <- (1x21x1024xf32) + gelu_1 = paddle._C_ops.gelu(add_18, False) + del add_18 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x1024xf32, 1024x256xf32) + matmul_16 = paddle._C_ops.matmul(gelu_1, parameter_165, False, False) + del gelu_1, parameter_165 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_19 = paddle._C_ops.add(matmul_16, parameter_164) + del matmul_16, parameter_164 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_19, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_19 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_20 = paddle._C_ops.add(layer_norm_10, dropout_12) + del dropout_12, layer_norm_10 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_13, layer_norm_14, layer_norm_15 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_20, parameter_161, parameter_160, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_20, parameter_160, parameter_161 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_13, parameter_159, False, False) + del parameter_159 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_158) + del matmul_17, parameter_158 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(add_21, full_int_array_1) + del add_21 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_13, parameter_157, False, False) + del parameter_157 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_156) + del matmul_18, parameter_156 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_13, parameter_155, False, False) + del parameter_155 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_23 = paddle._C_ops.add(matmul_19, parameter_154) + del matmul_19, parameter_154 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_22, full_int_array_1) + del add_22 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(add_23, full_int_array_1) + del add_23 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.scale: (1x4x21x64xf32) <- (1x4x21x64xf32, 1xf32) + scale_3 = paddle._C_ops.scale(transpose_8, full_5, float("0"), True) + del transpose_8 + + # pd_op.matmul: (1x4x21x21xf32) <- (1x4x21x64xf32, 1x4x21x64xf32) + matmul_20 = paddle._C_ops.matmul(scale_3, transpose_9, False, True) + del scale_3, transpose_9 + + # pd_op.add: (1x4x21x21xf32) <- (1x4x21x21xf32, 1x1x1x21xf32) + add_24 = paddle._C_ops.add(matmul_20, unsqueeze_0) + del matmul_20 + + # pd_op.softmax: (1x4x21x21xf32) <- (1x4x21x21xf32) + softmax_2 = paddle._C_ops.softmax(add_24, -1) + del add_24 + + # pd_op.dropout: (1x4x21x21xf32, 1x4x21x21xui8) <- (1x4x21x21xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # pd_op.matmul: (1x4x21x64xf32) <- (1x4x21x21xf32, 1x4x21x64xf32) + matmul_21 = paddle._C_ops.matmul(dropout_14, transpose_10, False, False) + del dropout_14, transpose_10 + + # pd_op.transpose: (1x21x4x64xf32) <- (1x4x21x64xf32) + transpose_11 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # pd_op.reshape: (1x21x256xf32) <- (1x21x4x64xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_2) + del transpose_11 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_22 = paddle._C_ops.matmul(reshape_11, parameter_153, False, False) + del parameter_153, reshape_11 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_25 = paddle._C_ops.add(matmul_22, parameter_152) + del matmul_22, parameter_152 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_25, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_25 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_26 = paddle._C_ops.add(layer_norm_13, dropout_16) + del dropout_16, layer_norm_13 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_16, layer_norm_17, layer_norm_18 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_26, parameter_147, parameter_146, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_26, parameter_146, parameter_147 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x256xf32, 256x1024xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_16, parameter_151, False, False) + del parameter_151 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_27 = paddle._C_ops.add(matmul_23, parameter_150) + del matmul_23, parameter_150 + + # pd_op.gelu: (1x21x1024xf32) <- (1x21x1024xf32) + gelu_2 = paddle._C_ops.gelu(add_27, False) + del add_27 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x1024xf32, 1024x256xf32) + matmul_24 = paddle._C_ops.matmul(gelu_2, parameter_149, False, False) + del gelu_2, parameter_149 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_28 = paddle._C_ops.add(matmul_24, parameter_148) + del matmul_24, parameter_148 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_28, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_28 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_29 = paddle._C_ops.add(layer_norm_16, dropout_18) + del dropout_18, layer_norm_16 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_19, layer_norm_20, layer_norm_21 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_29, parameter_145, parameter_144, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_29, parameter_144, parameter_145 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_19, parameter_143, False, False) + del parameter_143 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_30 = paddle._C_ops.add(matmul_25, parameter_142) + del matmul_25, parameter_142 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(add_30, full_int_array_1) + del add_30 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_19, parameter_141, False, False) + del parameter_141 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_31 = paddle._C_ops.add(matmul_26, parameter_140) + del matmul_26, parameter_140 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_27 = paddle._C_ops.matmul(layer_norm_19, parameter_139, False, False) + del parameter_139 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_32 = paddle._C_ops.add(matmul_27, parameter_138) + del matmul_27, parameter_138 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(add_31, full_int_array_1) + del add_31 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(add_32, full_int_array_1) + del add_32 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.scale: (1x4x21x64xf32) <- (1x4x21x64xf32, 1xf32) + scale_4 = paddle._C_ops.scale(transpose_12, full_5, float("0"), True) + del transpose_12 + + # pd_op.matmul: (1x4x21x21xf32) <- (1x4x21x64xf32, 1x4x21x64xf32) + matmul_28 = paddle._C_ops.matmul(scale_4, transpose_13, False, True) + del scale_4, transpose_13 + + # pd_op.add: (1x4x21x21xf32) <- (1x4x21x21xf32, 1x1x1x21xf32) + add_33 = paddle._C_ops.add(matmul_28, unsqueeze_0) + del matmul_28 + + # pd_op.softmax: (1x4x21x21xf32) <- (1x4x21x21xf32) + softmax_3 = paddle._C_ops.softmax(add_33, -1) + del add_33 + + # pd_op.dropout: (1x4x21x21xf32, 1x4x21x21xui8) <- (1x4x21x21xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # pd_op.matmul: (1x4x21x64xf32) <- (1x4x21x21xf32, 1x4x21x64xf32) + matmul_29 = paddle._C_ops.matmul(dropout_20, transpose_14, False, False) + del dropout_20, transpose_14 + + # pd_op.transpose: (1x21x4x64xf32) <- (1x4x21x64xf32) + transpose_15 = paddle._C_ops.transpose(matmul_29, [0, 2, 1, 3]) + del matmul_29 + + # pd_op.reshape: (1x21x256xf32) <- (1x21x4x64xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_2) + del transpose_15 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_30 = paddle._C_ops.matmul(reshape_15, parameter_137, False, False) + del parameter_137, reshape_15 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_34 = paddle._C_ops.add(matmul_30, parameter_136) + del matmul_30, parameter_136 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_34, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_34 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_35 = paddle._C_ops.add(layer_norm_19, dropout_22) + del dropout_22, layer_norm_19 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_22, layer_norm_23, layer_norm_24 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_35, parameter_131, parameter_130, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_35, parameter_130, parameter_131 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x256xf32, 256x1024xf32) + matmul_31 = paddle._C_ops.matmul(layer_norm_22, parameter_135, False, False) + del parameter_135 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_36 = paddle._C_ops.add(matmul_31, parameter_134) + del matmul_31, parameter_134 + + # pd_op.gelu: (1x21x1024xf32) <- (1x21x1024xf32) + gelu_3 = paddle._C_ops.gelu(add_36, False) + del add_36 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x1024xf32, 1024x256xf32) + matmul_32 = paddle._C_ops.matmul(gelu_3, parameter_133, False, False) + del gelu_3, parameter_133 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_37 = paddle._C_ops.add(matmul_32, parameter_132) + del matmul_32, parameter_132 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_37, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_37 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_38 = paddle._C_ops.add(layer_norm_22, dropout_24) + del dropout_24, layer_norm_22 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_25, layer_norm_26, layer_norm_27 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_38, parameter_129, parameter_128, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_38, parameter_128, parameter_129 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_33 = paddle._C_ops.matmul(layer_norm_25, parameter_127, False, False) + del parameter_127 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_39 = paddle._C_ops.add(matmul_33, parameter_126) + del matmul_33, parameter_126 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(add_39, full_int_array_1) + del add_39 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_16 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_25, parameter_125, False, False) + del parameter_125 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_40 = paddle._C_ops.add(matmul_34, parameter_124) + del matmul_34, parameter_124 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_25, parameter_123, False, False) + del parameter_123 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_41 = paddle._C_ops.add(matmul_35, parameter_122) + del matmul_35, parameter_122 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(add_40, full_int_array_1) + del add_40 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_41, full_int_array_1) + del add_41 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.scale: (1x4x21x64xf32) <- (1x4x21x64xf32, 1xf32) + scale_5 = paddle._C_ops.scale(transpose_16, full_5, float("0"), True) + del transpose_16 + + # pd_op.matmul: (1x4x21x21xf32) <- (1x4x21x64xf32, 1x4x21x64xf32) + matmul_36 = paddle._C_ops.matmul(scale_5, transpose_17, False, True) + del scale_5, transpose_17 + + # pd_op.add: (1x4x21x21xf32) <- (1x4x21x21xf32, 1x1x1x21xf32) + add_42 = paddle._C_ops.add(matmul_36, unsqueeze_0) + del matmul_36 + + # pd_op.softmax: (1x4x21x21xf32) <- (1x4x21x21xf32) + softmax_4 = paddle._C_ops.softmax(add_42, -1) + del add_42 + + # pd_op.dropout: (1x4x21x21xf32, 1x4x21x21xui8) <- (1x4x21x21xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # pd_op.matmul: (1x4x21x64xf32) <- (1x4x21x21xf32, 1x4x21x64xf32) + matmul_37 = paddle._C_ops.matmul(dropout_26, transpose_18, False, False) + del dropout_26, transpose_18 + + # pd_op.transpose: (1x21x4x64xf32) <- (1x4x21x64xf32) + transpose_19 = paddle._C_ops.transpose(matmul_37, [0, 2, 1, 3]) + del matmul_37 + + # pd_op.reshape: (1x21x256xf32) <- (1x21x4x64xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_19, full_int_array_2) + del transpose_19 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_38 = paddle._C_ops.matmul(reshape_19, parameter_121, False, False) + del parameter_121, reshape_19 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_43 = paddle._C_ops.add(matmul_38, parameter_120) + del matmul_38, parameter_120 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_43, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_43 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_44 = paddle._C_ops.add(layer_norm_25, dropout_28) + del dropout_28, layer_norm_25 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_28, layer_norm_29, layer_norm_30 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_44, parameter_115, parameter_114, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_44, parameter_114, parameter_115 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x256xf32, 256x1024xf32) + matmul_39 = paddle._C_ops.matmul(layer_norm_28, parameter_119, False, False) + del parameter_119 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_45 = paddle._C_ops.add(matmul_39, parameter_118) + del matmul_39, parameter_118 + + # pd_op.gelu: (1x21x1024xf32) <- (1x21x1024xf32) + gelu_4 = paddle._C_ops.gelu(add_45, False) + del add_45 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x1024xf32, 1024x256xf32) + matmul_40 = paddle._C_ops.matmul(gelu_4, parameter_117, False, False) + del gelu_4, parameter_117 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_46 = paddle._C_ops.add(matmul_40, parameter_116) + del matmul_40, parameter_116 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_46, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_46 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_47 = paddle._C_ops.add(layer_norm_28, dropout_30) + del dropout_30, layer_norm_28 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_31, layer_norm_32, layer_norm_33 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_47, parameter_113, parameter_112, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_47, parameter_112, parameter_113 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_41 = paddle._C_ops.matmul(layer_norm_31, parameter_111, False, False) + del parameter_111 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_48 = paddle._C_ops.add(matmul_41, parameter_110) + del matmul_41, parameter_110 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(add_48, full_int_array_1) + del add_48 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_20 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_31, parameter_109, False, False) + del parameter_109 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_49 = paddle._C_ops.add(matmul_42, parameter_108) + del matmul_42, parameter_108 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_43 = paddle._C_ops.matmul(layer_norm_31, parameter_107, False, False) + del parameter_107 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_50 = paddle._C_ops.add(matmul_43, parameter_106) + del matmul_43, parameter_106 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(add_49, full_int_array_1) + del add_49 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(add_50, full_int_array_1) + del add_50 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.scale: (1x4x21x64xf32) <- (1x4x21x64xf32, 1xf32) + scale_6 = paddle._C_ops.scale(transpose_20, full_5, float("0"), True) + del transpose_20 + + # pd_op.matmul: (1x4x21x21xf32) <- (1x4x21x64xf32, 1x4x21x64xf32) + matmul_44 = paddle._C_ops.matmul(scale_6, transpose_21, False, True) + del scale_6, transpose_21 + + # pd_op.add: (1x4x21x21xf32) <- (1x4x21x21xf32, 1x1x1x21xf32) + add_51 = paddle._C_ops.add(matmul_44, unsqueeze_0) + del matmul_44 + + # pd_op.softmax: (1x4x21x21xf32) <- (1x4x21x21xf32) + softmax_5 = paddle._C_ops.softmax(add_51, -1) + del add_51 + + # pd_op.dropout: (1x4x21x21xf32, 1x4x21x21xui8) <- (1x4x21x21xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # pd_op.matmul: (1x4x21x64xf32) <- (1x4x21x21xf32, 1x4x21x64xf32) + matmul_45 = paddle._C_ops.matmul(dropout_32, transpose_22, False, False) + del dropout_32, transpose_22 + + # pd_op.transpose: (1x21x4x64xf32) <- (1x4x21x64xf32) + transpose_23 = paddle._C_ops.transpose(matmul_45, [0, 2, 1, 3]) + del matmul_45 + + # pd_op.reshape: (1x21x256xf32) <- (1x21x4x64xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_23, full_int_array_2) + del transpose_23 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_46 = paddle._C_ops.matmul(reshape_23, parameter_105, False, False) + del parameter_105, reshape_23 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_52 = paddle._C_ops.add(matmul_46, parameter_104) + del matmul_46, parameter_104 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_52, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_52 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_53 = paddle._C_ops.add(layer_norm_31, dropout_34) + del dropout_34, layer_norm_31 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_34, layer_norm_35, layer_norm_36 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_53, parameter_99, parameter_98, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_53, parameter_98, parameter_99 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x256xf32, 256x1024xf32) + matmul_47 = paddle._C_ops.matmul(layer_norm_34, parameter_103, False, False) + del parameter_103 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_54 = paddle._C_ops.add(matmul_47, parameter_102) + del matmul_47, parameter_102 + + # pd_op.gelu: (1x21x1024xf32) <- (1x21x1024xf32) + gelu_5 = paddle._C_ops.gelu(add_54, False) + del add_54 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x1024xf32, 1024x256xf32) + matmul_48 = paddle._C_ops.matmul(gelu_5, parameter_101, False, False) + del gelu_5, parameter_101 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_55 = paddle._C_ops.add(matmul_48, parameter_100) + del matmul_48, parameter_100 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_55, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_55 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_56 = paddle._C_ops.add(layer_norm_34, dropout_36) + del dropout_36, layer_norm_34 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_37, layer_norm_38, layer_norm_39 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_56, parameter_97, parameter_96, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_56, parameter_96, parameter_97 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_49 = paddle._C_ops.matmul(layer_norm_37, parameter_95, False, False) + del parameter_95 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_57 = paddle._C_ops.add(matmul_49, parameter_94) + del matmul_49, parameter_94 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_57, full_int_array_1) + del add_57 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_24 = paddle._C_ops.transpose(reshape_24, [0, 2, 1, 3]) + del reshape_24 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_37, parameter_93, False, False) + del parameter_93 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_58 = paddle._C_ops.add(matmul_50, parameter_92) + del matmul_50, parameter_92 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_51 = paddle._C_ops.matmul(layer_norm_37, parameter_91, False, False) + del parameter_91 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_59 = paddle._C_ops.add(matmul_51, parameter_90) + del matmul_51, parameter_90 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_58, full_int_array_1) + del add_58 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_25 = paddle._C_ops.transpose(reshape_25, [0, 2, 1, 3]) + del reshape_25 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_59, full_int_array_1) + del add_59 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_26 = paddle._C_ops.transpose(reshape_26, [0, 2, 1, 3]) + del reshape_26 + + # pd_op.scale: (1x4x21x64xf32) <- (1x4x21x64xf32, 1xf32) + scale_7 = paddle._C_ops.scale(transpose_24, full_5, float("0"), True) + del transpose_24 + + # pd_op.matmul: (1x4x21x21xf32) <- (1x4x21x64xf32, 1x4x21x64xf32) + matmul_52 = paddle._C_ops.matmul(scale_7, transpose_25, False, True) + del scale_7, transpose_25 + + # pd_op.add: (1x4x21x21xf32) <- (1x4x21x21xf32, 1x1x1x21xf32) + add_60 = paddle._C_ops.add(matmul_52, unsqueeze_0) + del matmul_52 + + # pd_op.softmax: (1x4x21x21xf32) <- (1x4x21x21xf32) + softmax_6 = paddle._C_ops.softmax(add_60, -1) + del add_60 + + # pd_op.dropout: (1x4x21x21xf32, 1x4x21x21xui8) <- (1x4x21x21xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # pd_op.matmul: (1x4x21x64xf32) <- (1x4x21x21xf32, 1x4x21x64xf32) + matmul_53 = paddle._C_ops.matmul(dropout_38, transpose_26, False, False) + del dropout_38, transpose_26 + + # pd_op.transpose: (1x21x4x64xf32) <- (1x4x21x64xf32) + transpose_27 = paddle._C_ops.transpose(matmul_53, [0, 2, 1, 3]) + del matmul_53 + + # pd_op.reshape: (1x21x256xf32) <- (1x21x4x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(transpose_27, full_int_array_2) + del transpose_27 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_54 = paddle._C_ops.matmul(reshape_27, parameter_89, False, False) + del parameter_89, reshape_27 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_61 = paddle._C_ops.add(matmul_54, parameter_88) + del matmul_54, parameter_88 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_61, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_61 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_62 = paddle._C_ops.add(layer_norm_37, dropout_40) + del dropout_40, layer_norm_37 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_40, layer_norm_41, layer_norm_42 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_62, parameter_83, parameter_82, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_62, parameter_82, parameter_83 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x256xf32, 256x1024xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_40, parameter_87, False, False) + del parameter_87 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_63 = paddle._C_ops.add(matmul_55, parameter_86) + del matmul_55, parameter_86 + + # pd_op.gelu: (1x21x1024xf32) <- (1x21x1024xf32) + gelu_6 = paddle._C_ops.gelu(add_63, False) + del add_63 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x1024xf32, 1024x256xf32) + matmul_56 = paddle._C_ops.matmul(gelu_6, parameter_85, False, False) + del gelu_6, parameter_85 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_64 = paddle._C_ops.add(matmul_56, parameter_84) + del matmul_56, parameter_84 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_64, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_64 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_65 = paddle._C_ops.add(layer_norm_40, dropout_42) + del dropout_42, layer_norm_40 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_43, layer_norm_44, layer_norm_45 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_65, parameter_81, parameter_80, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_65, parameter_80, parameter_81 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_57 = paddle._C_ops.matmul(layer_norm_43, parameter_79, False, False) + del parameter_79 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_66 = paddle._C_ops.add(matmul_57, parameter_78) + del matmul_57, parameter_78 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(add_66, full_int_array_1) + del add_66 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_28 = paddle._C_ops.transpose(reshape_28, [0, 2, 1, 3]) + del reshape_28 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_58 = paddle._C_ops.matmul(layer_norm_43, parameter_77, False, False) + del parameter_77 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_67 = paddle._C_ops.add(matmul_58, parameter_76) + del matmul_58, parameter_76 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_59 = paddle._C_ops.matmul(layer_norm_43, parameter_75, False, False) + del parameter_75 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_68 = paddle._C_ops.add(matmul_59, parameter_74) + del matmul_59, parameter_74 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(add_67, full_int_array_1) + del add_67 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_29 = paddle._C_ops.transpose(reshape_29, [0, 2, 1, 3]) + del reshape_29 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_68, full_int_array_1) + del add_68 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_30 = paddle._C_ops.transpose(reshape_30, [0, 2, 1, 3]) + del reshape_30 + + # pd_op.scale: (1x4x21x64xf32) <- (1x4x21x64xf32, 1xf32) + scale_8 = paddle._C_ops.scale(transpose_28, full_5, float("0"), True) + del transpose_28 + + # pd_op.matmul: (1x4x21x21xf32) <- (1x4x21x64xf32, 1x4x21x64xf32) + matmul_60 = paddle._C_ops.matmul(scale_8, transpose_29, False, True) + del scale_8, transpose_29 + + # pd_op.add: (1x4x21x21xf32) <- (1x4x21x21xf32, 1x1x1x21xf32) + add_69 = paddle._C_ops.add(matmul_60, unsqueeze_0) + del matmul_60 + + # pd_op.softmax: (1x4x21x21xf32) <- (1x4x21x21xf32) + softmax_7 = paddle._C_ops.softmax(add_69, -1) + del add_69 + + # pd_op.dropout: (1x4x21x21xf32, 1x4x21x21xui8) <- (1x4x21x21xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # pd_op.matmul: (1x4x21x64xf32) <- (1x4x21x21xf32, 1x4x21x64xf32) + matmul_61 = paddle._C_ops.matmul(dropout_44, transpose_30, False, False) + del dropout_44, transpose_30 + + # pd_op.transpose: (1x21x4x64xf32) <- (1x4x21x64xf32) + transpose_31 = paddle._C_ops.transpose(matmul_61, [0, 2, 1, 3]) + del matmul_61 + + # pd_op.reshape: (1x21x256xf32) <- (1x21x4x64xf32, 3xi64) + reshape_31 = paddle._C_ops.reshape(transpose_31, full_int_array_2) + del transpose_31 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_62 = paddle._C_ops.matmul(reshape_31, parameter_73, False, False) + del parameter_73, reshape_31 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_70 = paddle._C_ops.add(matmul_62, parameter_72) + del matmul_62, parameter_72 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_70, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_70 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_71 = paddle._C_ops.add(layer_norm_43, dropout_46) + del dropout_46, layer_norm_43 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_46, layer_norm_47, layer_norm_48 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_71, parameter_67, parameter_66, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_71, parameter_66, parameter_67 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x256xf32, 256x1024xf32) + matmul_63 = paddle._C_ops.matmul(layer_norm_46, parameter_71, False, False) + del parameter_71 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_72 = paddle._C_ops.add(matmul_63, parameter_70) + del matmul_63, parameter_70 + + # pd_op.gelu: (1x21x1024xf32) <- (1x21x1024xf32) + gelu_7 = paddle._C_ops.gelu(add_72, False) + del add_72 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x1024xf32, 1024x256xf32) + matmul_64 = paddle._C_ops.matmul(gelu_7, parameter_69, False, False) + del gelu_7, parameter_69 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_73 = paddle._C_ops.add(matmul_64, parameter_68) + del matmul_64, parameter_68 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_73, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_73 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_74 = paddle._C_ops.add(layer_norm_46, dropout_48) + del dropout_48, layer_norm_46 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_49, layer_norm_50, layer_norm_51 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_74, parameter_65, parameter_64, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_74, parameter_64, parameter_65 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_49, parameter_63, False, False) + del parameter_63 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_75 = paddle._C_ops.add(matmul_65, parameter_62) + del matmul_65, parameter_62 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(add_75, full_int_array_1) + del add_75 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_32 = paddle._C_ops.transpose(reshape_32, [0, 2, 1, 3]) + del reshape_32 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_49, parameter_61, False, False) + del parameter_61 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_76 = paddle._C_ops.add(matmul_66, parameter_60) + del matmul_66, parameter_60 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_67 = paddle._C_ops.matmul(layer_norm_49, parameter_59, False, False) + del parameter_59 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_77 = paddle._C_ops.add(matmul_67, parameter_58) + del matmul_67, parameter_58 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(add_76, full_int_array_1) + del add_76 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_33 = paddle._C_ops.transpose(reshape_33, [0, 2, 1, 3]) + del reshape_33 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_77, full_int_array_1) + del add_77 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_34 = paddle._C_ops.transpose(reshape_34, [0, 2, 1, 3]) + del reshape_34 + + # pd_op.scale: (1x4x21x64xf32) <- (1x4x21x64xf32, 1xf32) + scale_9 = paddle._C_ops.scale(transpose_32, full_5, float("0"), True) + del transpose_32 + + # pd_op.matmul: (1x4x21x21xf32) <- (1x4x21x64xf32, 1x4x21x64xf32) + matmul_68 = paddle._C_ops.matmul(scale_9, transpose_33, False, True) + del scale_9, transpose_33 + + # pd_op.add: (1x4x21x21xf32) <- (1x4x21x21xf32, 1x1x1x21xf32) + add_78 = paddle._C_ops.add(matmul_68, unsqueeze_0) + del matmul_68 + + # pd_op.softmax: (1x4x21x21xf32) <- (1x4x21x21xf32) + softmax_8 = paddle._C_ops.softmax(add_78, -1) + del add_78 + + # pd_op.dropout: (1x4x21x21xf32, 1x4x21x21xui8) <- (1x4x21x21xf32, None, 1xf32) + dropout_50, dropout_51 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # pd_op.matmul: (1x4x21x64xf32) <- (1x4x21x21xf32, 1x4x21x64xf32) + matmul_69 = paddle._C_ops.matmul(dropout_50, transpose_34, False, False) + del dropout_50, transpose_34 + + # pd_op.transpose: (1x21x4x64xf32) <- (1x4x21x64xf32) + transpose_35 = paddle._C_ops.transpose(matmul_69, [0, 2, 1, 3]) + del matmul_69 + + # pd_op.reshape: (1x21x256xf32) <- (1x21x4x64xf32, 3xi64) + reshape_35 = paddle._C_ops.reshape(transpose_35, full_int_array_2) + del transpose_35 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_70 = paddle._C_ops.matmul(reshape_35, parameter_57, False, False) + del parameter_57, reshape_35 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_79 = paddle._C_ops.add(matmul_70, parameter_56) + del matmul_70, parameter_56 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_52, dropout_53 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_79, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_79 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_80 = paddle._C_ops.add(layer_norm_49, dropout_52) + del dropout_52, layer_norm_49 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_52, layer_norm_53, layer_norm_54 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_80, parameter_51, parameter_50, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_80, parameter_50, parameter_51 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x256xf32, 256x1024xf32) + matmul_71 = paddle._C_ops.matmul(layer_norm_52, parameter_55, False, False) + del parameter_55 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_81 = paddle._C_ops.add(matmul_71, parameter_54) + del matmul_71, parameter_54 + + # pd_op.gelu: (1x21x1024xf32) <- (1x21x1024xf32) + gelu_8 = paddle._C_ops.gelu(add_81, False) + del add_81 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x1024xf32, 1024x256xf32) + matmul_72 = paddle._C_ops.matmul(gelu_8, parameter_53, False, False) + del gelu_8, parameter_53 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_82 = paddle._C_ops.add(matmul_72, parameter_52) + del matmul_72, parameter_52 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_54, dropout_55 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_82, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_82 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_83 = paddle._C_ops.add(layer_norm_52, dropout_54) + del dropout_54, layer_norm_52 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_55, layer_norm_56, layer_norm_57 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_83, parameter_49, parameter_48, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_83, parameter_48, parameter_49 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_73 = paddle._C_ops.matmul(layer_norm_55, parameter_47, False, False) + del parameter_47 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_84 = paddle._C_ops.add(matmul_73, parameter_46) + del matmul_73, parameter_46 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(add_84, full_int_array_1) + del add_84 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_36 = paddle._C_ops.transpose(reshape_36, [0, 2, 1, 3]) + del reshape_36 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_74 = paddle._C_ops.matmul(layer_norm_55, parameter_45, False, False) + del parameter_45 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_85 = paddle._C_ops.add(matmul_74, parameter_44) + del matmul_74, parameter_44 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_75 = paddle._C_ops.matmul(layer_norm_55, parameter_43, False, False) + del parameter_43 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_86 = paddle._C_ops.add(matmul_75, parameter_42) + del matmul_75, parameter_42 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_85, full_int_array_1) + del add_85 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_37 = paddle._C_ops.transpose(reshape_37, [0, 2, 1, 3]) + del reshape_37 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(add_86, full_int_array_1) + del add_86 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_38 = paddle._C_ops.transpose(reshape_38, [0, 2, 1, 3]) + del reshape_38 + + # pd_op.scale: (1x4x21x64xf32) <- (1x4x21x64xf32, 1xf32) + scale_10 = paddle._C_ops.scale(transpose_36, full_5, float("0"), True) + del transpose_36 + + # pd_op.matmul: (1x4x21x21xf32) <- (1x4x21x64xf32, 1x4x21x64xf32) + matmul_76 = paddle._C_ops.matmul(scale_10, transpose_37, False, True) + del scale_10, transpose_37 + + # pd_op.add: (1x4x21x21xf32) <- (1x4x21x21xf32, 1x1x1x21xf32) + add_87 = paddle._C_ops.add(matmul_76, unsqueeze_0) + del matmul_76 + + # pd_op.softmax: (1x4x21x21xf32) <- (1x4x21x21xf32) + softmax_9 = paddle._C_ops.softmax(add_87, -1) + del add_87 + + # pd_op.dropout: (1x4x21x21xf32, 1x4x21x21xui8) <- (1x4x21x21xf32, None, 1xf32) + dropout_56, dropout_57 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # pd_op.matmul: (1x4x21x64xf32) <- (1x4x21x21xf32, 1x4x21x64xf32) + matmul_77 = paddle._C_ops.matmul(dropout_56, transpose_38, False, False) + del dropout_56, transpose_38 + + # pd_op.transpose: (1x21x4x64xf32) <- (1x4x21x64xf32) + transpose_39 = paddle._C_ops.transpose(matmul_77, [0, 2, 1, 3]) + del matmul_77 + + # pd_op.reshape: (1x21x256xf32) <- (1x21x4x64xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(transpose_39, full_int_array_2) + del transpose_39 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_78 = paddle._C_ops.matmul(reshape_39, parameter_41, False, False) + del parameter_41, reshape_39 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_88 = paddle._C_ops.add(matmul_78, parameter_40) + del matmul_78, parameter_40 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_58, dropout_59 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_88, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_88 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_89 = paddle._C_ops.add(layer_norm_55, dropout_58) + del dropout_58, layer_norm_55 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_58, layer_norm_59, layer_norm_60 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_89, parameter_35, parameter_34, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_89, parameter_34, parameter_35 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x256xf32, 256x1024xf32) + matmul_79 = paddle._C_ops.matmul(layer_norm_58, parameter_39, False, False) + del parameter_39 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_90 = paddle._C_ops.add(matmul_79, parameter_38) + del matmul_79, parameter_38 + + # pd_op.gelu: (1x21x1024xf32) <- (1x21x1024xf32) + gelu_9 = paddle._C_ops.gelu(add_90, False) + del add_90 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x1024xf32, 1024x256xf32) + matmul_80 = paddle._C_ops.matmul(gelu_9, parameter_37, False, False) + del gelu_9, parameter_37 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_91 = paddle._C_ops.add(matmul_80, parameter_36) + del matmul_80, parameter_36 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_60, dropout_61 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_91, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_91 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_92 = paddle._C_ops.add(layer_norm_58, dropout_60) + del dropout_60, layer_norm_58 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_61, layer_norm_62, layer_norm_63 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_92, parameter_33, parameter_32, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_92, parameter_32, parameter_33 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_81 = paddle._C_ops.matmul(layer_norm_61, parameter_31, False, False) + del parameter_31 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_93 = paddle._C_ops.add(matmul_81, parameter_30) + del matmul_81, parameter_30 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(add_93, full_int_array_1) + del add_93 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_40 = paddle._C_ops.transpose(reshape_40, [0, 2, 1, 3]) + del reshape_40 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_82 = paddle._C_ops.matmul(layer_norm_61, parameter_29, False, False) + del parameter_29 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_94 = paddle._C_ops.add(matmul_82, parameter_28) + del matmul_82, parameter_28 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_83 = paddle._C_ops.matmul(layer_norm_61, parameter_27, False, False) + del parameter_27 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_95 = paddle._C_ops.add(matmul_83, parameter_26) + del matmul_83, parameter_26 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_94, full_int_array_1) + del add_94 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_41 = paddle._C_ops.transpose(reshape_41, [0, 2, 1, 3]) + del reshape_41 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(add_95, full_int_array_1) + del add_95 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_42 = paddle._C_ops.transpose(reshape_42, [0, 2, 1, 3]) + del reshape_42 + + # pd_op.scale: (1x4x21x64xf32) <- (1x4x21x64xf32, 1xf32) + scale_11 = paddle._C_ops.scale(transpose_40, full_5, float("0"), True) + del transpose_40 + + # pd_op.matmul: (1x4x21x21xf32) <- (1x4x21x64xf32, 1x4x21x64xf32) + matmul_84 = paddle._C_ops.matmul(scale_11, transpose_41, False, True) + del scale_11, transpose_41 + + # pd_op.add: (1x4x21x21xf32) <- (1x4x21x21xf32, 1x1x1x21xf32) + add_96 = paddle._C_ops.add(matmul_84, unsqueeze_0) + del matmul_84 + + # pd_op.softmax: (1x4x21x21xf32) <- (1x4x21x21xf32) + softmax_10 = paddle._C_ops.softmax(add_96, -1) + del add_96 + + # pd_op.dropout: (1x4x21x21xf32, 1x4x21x21xui8) <- (1x4x21x21xf32, None, 1xf32) + dropout_62, dropout_63 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # pd_op.matmul: (1x4x21x64xf32) <- (1x4x21x21xf32, 1x4x21x64xf32) + matmul_85 = paddle._C_ops.matmul(dropout_62, transpose_42, False, False) + del dropout_62, transpose_42 + + # pd_op.transpose: (1x21x4x64xf32) <- (1x4x21x64xf32) + transpose_43 = paddle._C_ops.transpose(matmul_85, [0, 2, 1, 3]) + del matmul_85 + + # pd_op.reshape: (1x21x256xf32) <- (1x21x4x64xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_43, full_int_array_2) + del transpose_43 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_86 = paddle._C_ops.matmul(reshape_43, parameter_25, False, False) + del parameter_25, reshape_43 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_97 = paddle._C_ops.add(matmul_86, parameter_24) + del matmul_86, parameter_24 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_64, dropout_65 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_97, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_97 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_98 = paddle._C_ops.add(layer_norm_61, dropout_64) + del dropout_64, layer_norm_61 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_64, layer_norm_65, layer_norm_66 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_98, parameter_19, parameter_18, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_98, parameter_18, parameter_19 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x256xf32, 256x1024xf32) + matmul_87 = paddle._C_ops.matmul(layer_norm_64, parameter_23, False, False) + del parameter_23 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_99 = paddle._C_ops.add(matmul_87, parameter_22) + del matmul_87, parameter_22 + + # pd_op.gelu: (1x21x1024xf32) <- (1x21x1024xf32) + gelu_10 = paddle._C_ops.gelu(add_99, False) + del add_99 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x1024xf32, 1024x256xf32) + matmul_88 = paddle._C_ops.matmul(gelu_10, parameter_21, False, False) + del gelu_10, parameter_21 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_100 = paddle._C_ops.add(matmul_88, parameter_20) + del matmul_88, parameter_20 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_66, dropout_67 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_100, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_100 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_101 = paddle._C_ops.add(layer_norm_64, dropout_66) + del dropout_66, layer_norm_64 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_67, layer_norm_68, layer_norm_69 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_101, parameter_17, parameter_16, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_101, parameter_16, parameter_17 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_89 = paddle._C_ops.matmul(layer_norm_67, parameter_15, False, False) + del parameter_15 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_102 = paddle._C_ops.add(matmul_89, parameter_14) + del matmul_89, parameter_14 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_102, full_int_array_1) + del add_102 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_44 = paddle._C_ops.transpose(reshape_44, [0, 2, 1, 3]) + del reshape_44 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_67, parameter_13, False, False) + del parameter_13 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_103 = paddle._C_ops.add(matmul_90, parameter_12) + del matmul_90, parameter_12 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_91 = paddle._C_ops.matmul(layer_norm_67, parameter_11, False, False) + del parameter_11 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_104 = paddle._C_ops.add(matmul_91, parameter_10) + del matmul_91, parameter_10 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(add_103, full_int_array_1) + del add_103 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_45 = paddle._C_ops.transpose(reshape_45, [0, 2, 1, 3]) + del reshape_45 + + # pd_op.reshape: (1x21x4x64xf32) <- (1x21x256xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(add_104, full_int_array_1) + del add_104, full_int_array_1 + + # pd_op.transpose: (1x4x21x64xf32) <- (1x21x4x64xf32) + transpose_46 = paddle._C_ops.transpose(reshape_46, [0, 2, 1, 3]) + del reshape_46 + + # pd_op.scale: (1x4x21x64xf32) <- (1x4x21x64xf32, 1xf32) + scale_12 = paddle._C_ops.scale(transpose_44, full_5, float("0"), True) + del full_5, transpose_44 + + # pd_op.matmul: (1x4x21x21xf32) <- (1x4x21x64xf32, 1x4x21x64xf32) + matmul_92 = paddle._C_ops.matmul(scale_12, transpose_45, False, True) + del scale_12, transpose_45 + + # pd_op.add: (1x4x21x21xf32) <- (1x4x21x21xf32, 1x1x1x21xf32) + add_105 = paddle._C_ops.add(matmul_92, unsqueeze_0) + del matmul_92, unsqueeze_0 + + # pd_op.softmax: (1x4x21x21xf32) <- (1x4x21x21xf32) + softmax_11 = paddle._C_ops.softmax(add_105, -1) + del add_105 + + # pd_op.dropout: (1x4x21x21xf32, 1x4x21x21xui8) <- (1x4x21x21xf32, None, 1xf32) + dropout_68, dropout_69 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # pd_op.matmul: (1x4x21x64xf32) <- (1x4x21x21xf32, 1x4x21x64xf32) + matmul_93 = paddle._C_ops.matmul(dropout_68, transpose_46, False, False) + del dropout_68, transpose_46 + + # pd_op.transpose: (1x21x4x64xf32) <- (1x4x21x64xf32) + transpose_47 = paddle._C_ops.transpose(matmul_93, [0, 2, 1, 3]) + del matmul_93 + + # pd_op.reshape: (1x21x256xf32) <- (1x21x4x64xf32, 3xi64) + reshape_47 = paddle._C_ops.reshape(transpose_47, full_int_array_2) + del full_int_array_2, transpose_47 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x256xf32, 256x256xf32) + matmul_94 = paddle._C_ops.matmul(reshape_47, parameter_9, False, False) + del parameter_9, reshape_47 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_106 = paddle._C_ops.add(matmul_94, parameter_8) + del matmul_94, parameter_8 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_70, dropout_71 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_106, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_106 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_107 = paddle._C_ops.add(layer_norm_67, dropout_70) + del dropout_70, layer_norm_67 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_70, layer_norm_71, layer_norm_72 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_107, parameter_3, parameter_2, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_107, parameter_2, parameter_3 + + # pd_op.matmul: (1x21x1024xf32) <- (1x21x256xf32, 256x1024xf32) + matmul_95 = paddle._C_ops.matmul(layer_norm_70, parameter_7, False, False) + del parameter_7 + + # pd_op.add: (1x21x1024xf32) <- (1x21x1024xf32, 1024xf32) + add_108 = paddle._C_ops.add(matmul_95, parameter_6) + del matmul_95, parameter_6 + + # pd_op.gelu: (1x21x1024xf32) <- (1x21x1024xf32) + gelu_11 = paddle._C_ops.gelu(add_108, False) + del add_108 + + # pd_op.matmul: (1x21x256xf32) <- (1x21x1024xf32, 1024x256xf32) + matmul_96 = paddle._C_ops.matmul(gelu_11, parameter_5, False, False) + del gelu_11, parameter_5 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 256xf32) + add_109 = paddle._C_ops.add(matmul_96, parameter_4) + del matmul_96, parameter_4 + + # pd_op.dropout: (1x21x256xf32, 1x21x256xui8) <- (1x21x256xf32, None, 1xf32) + dropout_72, dropout_73 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_109, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_109, full_4 + + # pd_op.add: (1x21x256xf32) <- (1x21x256xf32, 1x21x256xf32) + add_110 = paddle._C_ops.add(layer_norm_70, dropout_72) + del dropout_72, layer_norm_70 + + # pd_op.layer_norm: (1x21x256xf32, 1x21xf32, 1x21xf32) <- (1x21x256xf32, 256xf32, 256xf32) + layer_norm_0, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_110, parameter_1, parameter_0, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_110, parameter_0, parameter_1 + + return layer_norm_0 diff --git a/paddle_samples/PaddleNLP/electra-small/weight_meta.py b/paddle_samples/PaddleNLP/electra-small/weight_meta.py new file mode 100644 index 000000000..0a8222861 --- /dev/null +++ b/paddle_samples/PaddleNLP/electra-small/weight_meta.py @@ -0,0 +1,1770 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [1024, 256] + dtype = "float32" + min_val = float("-0.0929945") + max_val = float("0.0940746") + mean = float("0.000123354") + std = float("0.0199939") + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [256, 1024] + dtype = "float32" + min_val = float("-0.0910894") + max_val = float("0.0923413") + mean = float("-3.89802e-05") + std = float("0.019977") + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0873198") + max_val = float("0.0921436") + mean = float("-6.9577e-05") + std = float("0.0199938") + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0853417") + max_val = float("0.0845899") + mean = float("0.000111344") + std = float("0.0199393") + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0906781") + max_val = float("0.0779754") + mean = float("-1.50112e-05") + std = float("0.020079") + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0844199") + max_val = float("0.085236") + mean = float("-7.84817e-05") + std = float("0.0199876") + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [1024, 256] + dtype = "float32" + min_val = float("-0.0991516") + max_val = float("0.0930038") + mean = float("1.37436e-05") + std = float("0.0199822") + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [256, 1024] + dtype = "float32" + min_val = float("-0.0933972") + max_val = float("0.0904012") + mean = float("5.46858e-05") + std = float("0.0200012") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0890345") + max_val = float("0.0901724") + mean = float("-3.22802e-05") + std = float("0.0200891") + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0830772") + max_val = float("0.0825421") + mean = float("8.11444e-05") + std = float("0.0200276") + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0845826") + max_val = float("0.0848353") + mean = float("-2.60814e-06") + std = float("0.0199588") + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0829131") + max_val = float("0.0809821") + mean = float("6.19652e-05") + std = float("0.0200435") + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [1024, 256] + dtype = "float32" + min_val = float("-0.101205") + max_val = float("0.100646") + mean = float("5.56225e-05") + std = float("0.0200255") + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [256, 1024] + dtype = "float32" + min_val = float("-0.0847212") + max_val = float("0.094118") + mean = float("5.77995e-05") + std = float("0.0199801") + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0895892") + max_val = float("0.0874706") + mean = float("-7.60244e-05") + std = float("0.0199274") + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0922891") + max_val = float("0.0891664") + mean = float("-3.33649e-05") + std = float("0.0199609") + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0843779") + max_val = float("0.0835105") + mean = float("0.000120908") + std = float("0.0200001") + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0875678") + max_val = float("0.0966042") + mean = float("-9.69519e-05") + std = float("0.0199923") + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [1024, 256] + dtype = "float32" + min_val = float("-0.0887305") + max_val = float("0.0944978") + mean = float("1.82048e-07") + std = float("0.0199701") + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [256, 1024] + dtype = "float32" + min_val = float("-0.0987456") + max_val = float("0.0848846") + mean = float("-3.4266e-05") + std = float("0.0200043") + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0857028") + max_val = float("0.0804035") + mean = float("-9.80719e-05") + std = float("0.0200301") + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0854193") + max_val = float("0.0854086") + mean = float("1.3848e-05") + std = float("0.0199978") + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0926117") + max_val = float("0.083608") + mean = float("-6.83631e-05") + std = float("0.0199781") + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0875507") + max_val = float("0.090627") + mean = float("0.000117304") + std = float("0.0200068") + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [1024, 256] + dtype = "float32" + min_val = float("-0.0966386") + max_val = float("0.0891313") + mean = float("-3.10438e-05") + std = float("0.0199916") + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [256, 1024] + dtype = "float32" + min_val = float("-0.0938048") + max_val = float("0.0857828") + mean = float("-1.32661e-05") + std = float("0.0200019") + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0839814") + max_val = float("0.0870426") + mean = float("-8.16294e-06") + std = float("0.0199103") + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0883969") + max_val = float("0.0870695") + mean = float("-0.000205522") + std = float("0.0199731") + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0942998") + max_val = float("0.0878098") + mean = float("5.85464e-05") + std = float("0.0199583") + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.07834") + max_val = float("0.0861157") + mean = float("3.6729e-05") + std = float("0.0199864") + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [1024, 256] + dtype = "float32" + min_val = float("-0.0988853") + max_val = float("0.0888091") + mean = float("-7.44819e-06") + std = float("0.0199728") + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [256, 1024] + dtype = "float32" + min_val = float("-0.0895679") + max_val = float("0.0843347") + mean = float("-1.91256e-05") + std = float("0.0200168") + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0857427") + max_val = float("0.0851083") + mean = float("0.000116156") + std = float("0.020029") + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0781572") + max_val = float("0.0833352") + mean = float("-2.14896e-06") + std = float("0.0199354") + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0877092") + max_val = float("0.0843934") + mean = float("0.000106905") + std = float("0.0199651") + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0822875") + max_val = float("0.0914724") + mean = float("2.61144e-05") + std = float("0.0199404") + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [1024, 256] + dtype = "float32" + min_val = float("-0.0969061") + max_val = float("0.0883214") + mean = float("-2.32346e-05") + std = float("0.019995") + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [256, 1024] + dtype = "float32" + min_val = float("-0.0894631") + max_val = float("0.0916864") + mean = float("8.55553e-05") + std = float("0.0200058") + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0815028") + max_val = float("0.08481") + mean = float("6.90502e-05") + std = float("0.0200096") + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0877703") + max_val = float("0.0878524") + mean = float("7.41985e-05") + std = float("0.0199234") + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0835444") + max_val = float("0.0810637") + mean = float("-6.51042e-05") + std = float("0.0200145") + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0796764") + max_val = float("0.0776683") + mean = float("-5.75949e-05") + std = float("0.019914") + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [1024, 256] + dtype = "float32" + min_val = float("-0.0884092") + max_val = float("0.0914756") + mean = float("4.18137e-05") + std = float("0.0200142") + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [256, 1024] + dtype = "float32" + min_val = float("-0.0906037") + max_val = float("0.0899752") + mean = float("-1.52011e-05") + std = float("0.0199893") + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0825674") + max_val = float("0.0900583") + mean = float("4.07071e-05") + std = float("0.0199846") + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0892479") + max_val = float("0.079615") + mean = float("0.000177854") + std = float("0.0200151") + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.113258") + max_val = float("0.0867464") + mean = float("-3.73485e-05") + std = float("0.0199746") + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0858093") + max_val = float("0.0796421") + mean = float("3.67268e-05") + std = float("0.0199718") + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [1024, 256] + dtype = "float32" + min_val = float("-0.0887048") + max_val = float("0.0849174") + mean = float("0.000150995") + std = float("0.0199901") + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [256, 1024] + dtype = "float32" + min_val = float("-0.0958811") + max_val = float("0.09424") + mean = float("4.3002e-05") + std = float("0.0200304") + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0934658") + max_val = float("0.090466") + mean = float("-8.67823e-05") + std = float("0.0200771") + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0849934") + max_val = float("0.083863") + mean = float("2.44045e-05") + std = float("0.0200104") + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0858913") + max_val = float("0.0875675") + mean = float("7.88015e-05") + std = float("0.0200333") + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0870827") + max_val = float("0.0826644") + mean = float("-2.05482e-05") + std = float("0.0201242") + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [1024, 256] + dtype = "float32" + min_val = float("-0.0941032") + max_val = float("0.0954322") + mean = float("-7.11082e-05") + std = float("0.0200199") + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [256, 1024] + dtype = "float32" + min_val = float("-0.0878246") + max_val = float("0.0919332") + mean = float("7.8638e-06") + std = float("0.0199992") + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0801872") + max_val = float("0.0857553") + mean = float("-4.2129e-05") + std = float("0.0200287") + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0892028") + max_val = float("0.0916614") + mean = float("0.0001113") + std = float("0.0200058") + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0991396") + max_val = float("0.082638") + mean = float("-8.56341e-05") + std = float("0.0199301") + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0844577") + max_val = float("0.0827848") + mean = float("4.38257e-05") + std = float("0.0200001") + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [1024, 256] + dtype = "float32" + min_val = float("-0.0923979") + max_val = float("0.0904005") + mean = float("-3.77005e-05") + std = float("0.0199908") + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [256, 1024] + dtype = "float32" + min_val = float("-0.0882584") + max_val = float("0.0877762") + mean = float("-1.75656e-05") + std = float("0.0200201") + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0822925") + max_val = float("0.0875768") + mean = float("1.52994e-05") + std = float("0.0199601") + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0895438") + max_val = float("0.0859235") + mean = float("-5.44366e-06") + std = float("0.0201604") + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0890748") + max_val = float("0.0862449") + mean = float("7.97982e-05") + std = float("0.0200604") + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.087208") + max_val = float("0.0786357") + mean = float("-4.88752e-05") + std = float("0.0200425") + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [256] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [1024, 256] + dtype = "float32" + min_val = float("-0.0866046") + max_val = float("0.0924769") + mean = float("-4.01066e-05") + std = float("0.0200228") + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [1024] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [256, 1024] + dtype = "float32" + min_val = float("-0.0940549") + max_val = float("0.109") + mean = float("5.60555e-05") + std = float("0.0199651") + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0829032") + max_val = float("0.0876175") + mean = float("4.96584e-06") + std = float("0.0199533") + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0831773") + max_val = float("0.0865148") + mean = float("5.39856e-05") + std = float("0.0200013") + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0810151") + max_val = float("0.086267") + mean = float("0.000125149") + std = float("0.0200685") + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [256, 256] + dtype = "float32" + min_val = float("-0.0841221") + max_val = float("0.0972278") + mean = float("-7.86e-05") + std = float("0.0199391") + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [256] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [128, 256] + dtype = "float32" + min_val = float("-0.0780359") + max_val = float("0.095123") + mean = float("-5.76041e-05") + std = float("0.0200838") + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [128] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [128] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [2, 128] + dtype = "float32" + min_val = float("-0.052416") + max_val = float("0.0509609") + mean = float("0.000415213") + std = float("0.0193658") + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [512, 128] + dtype = "float32" + min_val = float("-0.0830311") + max_val = float("0.0854718") + mean = float("5.67462e-05") + std = float("0.0200551") + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [30522, 128] + dtype = "float32" + min_val = float("-0.106418") + max_val = float("0.106936") + mean = float("-9.99232e-06") + std = float("0.0199994") + data = None diff --git a/paddle_samples/PaddleNLP/ernie-ctm/graph_hash.txt b/paddle_samples/PaddleNLP/ernie-ctm/graph_hash.txt new file mode 100644 index 000000000..3d0be90df --- /dev/null +++ b/paddle_samples/PaddleNLP/ernie-ctm/graph_hash.txt @@ -0,0 +1 @@ +d459e0fab438bcc391197bb9a3ac31ceb9a284c18ff00c20188782a13e30062a \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/ernie-ctm/graph_net.json b/paddle_samples/PaddleNLP/ernie-ctm/graph_net.json new file mode 100644 index 000000000..2f1cf7342 --- /dev/null +++ b/paddle_samples/PaddleNLP/ernie-ctm/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "ernie-ctm", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/ernie-ctm/input_meta.py b/paddle_samples/PaddleNLP/ernie-ctm/input_meta.py new file mode 100644 index 000000000..331389962 --- /dev/null +++ b/paddle_samples/PaddleNLP/ernie-ctm/input_meta.py @@ -0,0 +1,16 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 93] + dtype = "int64" + min_val = 98 + max_val = 170 + data = None + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 93] + dtype = "int64" + min_val = 0 + max_val = 0 + data = None diff --git a/paddle_samples/PaddleNLP/ernie-ctm/model.py b/paddle_samples/PaddleNLP/ernie-ctm/model.py new file mode 100644 index 000000000..dc5bd512e --- /dev/null +++ b/paddle_samples/PaddleNLP/ernie-ctm/model.py @@ -0,0 +1,2309 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + parameter_199, + parameter_200, + parameter_201, + parameter_202, + parameter_203, + parameter_204, + data_0, + data_1, + ): + # pd_op.full: (xi64) <- () + full_0 = paddle._C_ops.full( + [], float("0"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.equal: (1x93xb) <- (1x93xi64, xi64) + equal_0 = paddle._C_ops.equal(data_0, full_0) + del full_0 + + # pd_op.cast: (1x93xf32) <- (1x93xb) + cast_0 = paddle._C_ops.cast(equal_0, paddle.float32) + del equal_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x93xf32) <- (1x93xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_1, float("0"), True) + del cast_0, full_1 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_0 = [1, 2] + + # pd_op.unsqueeze: (1x1x1x93xf32) <- (1x93xf32, 2xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(scale_0, full_int_array_0) + del full_int_array_0, scale_0 + + # pd_op.full: (2xi64) <- () + full_2 = paddle._C_ops.full( + [2], float("0"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.full: (1xi64) <- () + full_3 = paddle._C_ops.full( + [1], float("1"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xi64) <- () + full_4 = paddle._C_ops.full( + [1], float("91"), paddle.int64, paddle.core.CPUPlace() + ) + + # pd_op.full: (1xi32) <- () + full_5 = paddle._C_ops.full( + [1], float("91"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.linspace: (-1xi64) <- (1xi64, 1xi64, 1xi32) + linspace_0 = paddle._C_ops.linspace( + full_3, + full_4, + full_5, + paddle.int64, + paddle.framework._current_expected_place(), + ) + del full_3, full_4, full_5 + + # pd_op.full: (1xi32) <- () + full_6 = paddle._C_ops.full( + [1], float("0"), paddle.int32, paddle.core.CPUPlace() + ) + + # builtin.combine: ([2xi64, -1xi64]) <- (2xi64, -1xi64) + combine_0 = [full_2, linspace_0] + del full_2, linspace_0 + + # pd_op.concat: (-1xi64) <- ([2xi64, -1xi64], 1xi32) + concat_0 = paddle._C_ops.concat(combine_0, full_6) + del combine_0, full_6 + + # pd_op.embedding: (1x93x128xf32) <- (1x93xi64, 23000x128xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_204, 0, False) + del data_0, parameter_204 + + # pd_op.embedding: (-1x128xf32) <- (-1xi64, 512x128xf32) + embedding_1 = paddle._C_ops.embedding(concat_0, parameter_203, -1, False) + del concat_0, parameter_203 + + # pd_op.embedding: (1x93x128xf32) <- (1x93xi64, 2x128xf32) + embedding_2 = paddle._C_ops.embedding(data_1, parameter_202, -1, False) + del data_1, parameter_202 + + # pd_op.add: (1x93x128xf32) <- (1x93x128xf32, 1x93x128xf32) + add_1 = paddle._C_ops.add(embedding_0, embedding_2) + del embedding_0, embedding_2 + + # pd_op.add: (1x93x128xf32) <- (1x93x128xf32, -1x128xf32) + add_2 = paddle._C_ops.add(add_1, embedding_1) + del add_1, embedding_1 + + # pd_op.layer_norm: (1x93x128xf32, 1x93xf32, 1x93xf32) <- (1x93x128xf32, 128xf32, 128xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_2, parameter_201, parameter_200, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_2, parameter_200, parameter_201 + + # pd_op.full: (1xf32) <- () + full_7 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (1x93x128xf32, 1x93x128xui8) <- (1x93x128xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + layer_norm_0, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del layer_norm_0 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x128xf32, 128x768xf32) + matmul_0 = paddle._C_ops.matmul(dropout_0, parameter_199, False, False) + del dropout_0, parameter_199 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_3 = paddle._C_ops.add(matmul_0, parameter_198) + del matmul_0, parameter_198 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_1 = paddle._C_ops.matmul(add_3, parameter_197, False, False) + del parameter_197 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_4 = paddle._C_ops.add(matmul_1, parameter_196) + del matmul_1, parameter_196 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_1 = [0, 0, 12, 64] + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(add_4, full_int_array_1) + del add_4 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_2 = paddle._C_ops.matmul(add_3, parameter_195, False, False) + del parameter_195 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_5 = paddle._C_ops.add(matmul_2, parameter_194) + del matmul_2, parameter_194 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_3 = paddle._C_ops.matmul(add_3, parameter_193, False, False) + del parameter_193 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_6 = paddle._C_ops.add(matmul_3, parameter_192) + del matmul_3, parameter_192 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(add_5, full_int_array_1) + del add_5 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(add_6, full_int_array_1) + del add_6 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.full: (1xf32) <- () + full_8 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x93x64xf32) <- (1x12x93x64xf32, 1xf32) + scale_1 = paddle._C_ops.scale(transpose_0, full_8, float("0"), True) + del transpose_0 + + # pd_op.matmul: (1x12x93x93xf32) <- (1x12x93x64xf32, 1x12x93x64xf32) + matmul_4 = paddle._C_ops.matmul(scale_1, transpose_1, False, True) + del scale_1, transpose_1 + + # pd_op.add: (1x12x93x93xf32) <- (1x12x93x93xf32, 1x1x1x93xf32) + add_7 = paddle._C_ops.add(matmul_4, unsqueeze_0) + del matmul_4 + + # pd_op.softmax: (1x12x93x93xf32) <- (1x12x93x93xf32) + softmax_0 = paddle._C_ops.softmax(add_7, -1) + del add_7 + + # pd_op.dropout: (1x12x93x93xf32, 1x12x93x93xui8) <- (1x12x93x93xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # pd_op.matmul: (1x12x93x64xf32) <- (1x12x93x93xf32, 1x12x93x64xf32) + matmul_5 = paddle._C_ops.matmul(dropout_2, transpose_2, False, False) + del dropout_2, transpose_2 + + # pd_op.transpose: (1x93x12x64xf32) <- (1x12x93x64xf32) + transpose_3 = paddle._C_ops.transpose(matmul_5, [0, 2, 1, 3]) + del matmul_5 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_2 = [0, 0, 768] + + # pd_op.reshape: (1x93x768xf32) <- (1x93x12x64xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_2) + del transpose_3 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_6 = paddle._C_ops.matmul(reshape_3, parameter_191, False, False) + del parameter_191, reshape_3 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_8 = paddle._C_ops.add(matmul_6, parameter_190) + del matmul_6, parameter_190 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_8, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_8 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_9 = paddle._C_ops.add(add_3, dropout_4) + del add_3, dropout_4 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_9, parameter_185, parameter_184, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_9, parameter_184, parameter_185 + + # pd_op.matmul: (1x93x3072xf32) <- (1x93x768xf32, 768x3072xf32) + matmul_7 = paddle._C_ops.matmul(layer_norm_3, parameter_189, False, False) + del parameter_189 + + # pd_op.add: (1x93x3072xf32) <- (1x93x3072xf32, 3072xf32) + add_10 = paddle._C_ops.add(matmul_7, parameter_188) + del matmul_7, parameter_188 + + # pd_op.gelu: (1x93x3072xf32) <- (1x93x3072xf32) + gelu_0 = paddle._C_ops.gelu(add_10, True) + del add_10 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x3072xf32, 3072x768xf32) + matmul_8 = paddle._C_ops.matmul(gelu_0, parameter_187, False, False) + del gelu_0, parameter_187 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_11 = paddle._C_ops.add(matmul_8, parameter_186) + del matmul_8, parameter_186 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_11, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_11 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_12 = paddle._C_ops.add(layer_norm_3, dropout_6) + del dropout_6, layer_norm_3 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_12, parameter_183, parameter_182, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_12, parameter_182, parameter_183 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_9 = paddle._C_ops.matmul(layer_norm_6, parameter_181, False, False) + del parameter_181 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_13 = paddle._C_ops.add(matmul_9, parameter_180) + del matmul_9, parameter_180 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(add_13, full_int_array_1) + del add_13 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_6, parameter_179, False, False) + del parameter_179 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_14 = paddle._C_ops.add(matmul_10, parameter_178) + del matmul_10, parameter_178 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_11 = paddle._C_ops.matmul(layer_norm_6, parameter_177, False, False) + del parameter_177 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_15 = paddle._C_ops.add(matmul_11, parameter_176) + del matmul_11, parameter_176 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(add_14, full_int_array_1) + del add_14 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_15, full_int_array_1) + del add_15 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.scale: (1x12x93x64xf32) <- (1x12x93x64xf32, 1xf32) + scale_2 = paddle._C_ops.scale(transpose_4, full_8, float("0"), True) + del transpose_4 + + # pd_op.matmul: (1x12x93x93xf32) <- (1x12x93x64xf32, 1x12x93x64xf32) + matmul_12 = paddle._C_ops.matmul(scale_2, transpose_5, False, True) + del scale_2, transpose_5 + + # pd_op.add: (1x12x93x93xf32) <- (1x12x93x93xf32, 1x1x1x93xf32) + add_16 = paddle._C_ops.add(matmul_12, unsqueeze_0) + del matmul_12 + + # pd_op.softmax: (1x12x93x93xf32) <- (1x12x93x93xf32) + softmax_1 = paddle._C_ops.softmax(add_16, -1) + del add_16 + + # pd_op.dropout: (1x12x93x93xf32, 1x12x93x93xui8) <- (1x12x93x93xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # pd_op.matmul: (1x12x93x64xf32) <- (1x12x93x93xf32, 1x12x93x64xf32) + matmul_13 = paddle._C_ops.matmul(dropout_8, transpose_6, False, False) + del dropout_8, transpose_6 + + # pd_op.transpose: (1x93x12x64xf32) <- (1x12x93x64xf32) + transpose_7 = paddle._C_ops.transpose(matmul_13, [0, 2, 1, 3]) + del matmul_13 + + # pd_op.reshape: (1x93x768xf32) <- (1x93x12x64xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_2) + del transpose_7 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_14 = paddle._C_ops.matmul(reshape_7, parameter_175, False, False) + del parameter_175, reshape_7 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_17 = paddle._C_ops.add(matmul_14, parameter_174) + del matmul_14, parameter_174 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_17, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_17 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_18 = paddle._C_ops.add(layer_norm_6, dropout_10) + del dropout_10, layer_norm_6 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_18, parameter_169, parameter_168, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_18, parameter_168, parameter_169 + + # pd_op.matmul: (1x93x3072xf32) <- (1x93x768xf32, 768x3072xf32) + matmul_15 = paddle._C_ops.matmul(layer_norm_9, parameter_173, False, False) + del parameter_173 + + # pd_op.add: (1x93x3072xf32) <- (1x93x3072xf32, 3072xf32) + add_19 = paddle._C_ops.add(matmul_15, parameter_172) + del matmul_15, parameter_172 + + # pd_op.gelu: (1x93x3072xf32) <- (1x93x3072xf32) + gelu_1 = paddle._C_ops.gelu(add_19, False) + del add_19 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x3072xf32, 3072x768xf32) + matmul_16 = paddle._C_ops.matmul(gelu_1, parameter_171, False, False) + del gelu_1, parameter_171 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_20 = paddle._C_ops.add(matmul_16, parameter_170) + del matmul_16, parameter_170 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_20, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_20 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_21 = paddle._C_ops.add(layer_norm_9, dropout_12) + del dropout_12, layer_norm_9 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_21, parameter_167, parameter_166, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_21, parameter_166, parameter_167 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_12, parameter_165, False, False) + del parameter_165 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_22 = paddle._C_ops.add(matmul_17, parameter_164) + del matmul_17, parameter_164 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(add_22, full_int_array_1) + del add_22 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_12, parameter_163, False, False) + del parameter_163 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_23 = paddle._C_ops.add(matmul_18, parameter_162) + del matmul_18, parameter_162 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_19 = paddle._C_ops.matmul(layer_norm_12, parameter_161, False, False) + del parameter_161 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_24 = paddle._C_ops.add(matmul_19, parameter_160) + del matmul_19, parameter_160 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_23, full_int_array_1) + del add_23 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(add_24, full_int_array_1) + del add_24 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.scale: (1x12x93x64xf32) <- (1x12x93x64xf32, 1xf32) + scale_3 = paddle._C_ops.scale(transpose_8, full_8, float("0"), True) + del transpose_8 + + # pd_op.matmul: (1x12x93x93xf32) <- (1x12x93x64xf32, 1x12x93x64xf32) + matmul_20 = paddle._C_ops.matmul(scale_3, transpose_9, False, True) + del scale_3, transpose_9 + + # pd_op.add: (1x12x93x93xf32) <- (1x12x93x93xf32, 1x1x1x93xf32) + add_25 = paddle._C_ops.add(matmul_20, unsqueeze_0) + del matmul_20 + + # pd_op.softmax: (1x12x93x93xf32) <- (1x12x93x93xf32) + softmax_2 = paddle._C_ops.softmax(add_25, -1) + del add_25 + + # pd_op.dropout: (1x12x93x93xf32, 1x12x93x93xui8) <- (1x12x93x93xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # pd_op.matmul: (1x12x93x64xf32) <- (1x12x93x93xf32, 1x12x93x64xf32) + matmul_21 = paddle._C_ops.matmul(dropout_14, transpose_10, False, False) + del dropout_14, transpose_10 + + # pd_op.transpose: (1x93x12x64xf32) <- (1x12x93x64xf32) + transpose_11 = paddle._C_ops.transpose(matmul_21, [0, 2, 1, 3]) + del matmul_21 + + # pd_op.reshape: (1x93x768xf32) <- (1x93x12x64xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_2) + del transpose_11 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_22 = paddle._C_ops.matmul(reshape_11, parameter_159, False, False) + del parameter_159, reshape_11 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_26 = paddle._C_ops.add(matmul_22, parameter_158) + del matmul_22, parameter_158 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_26, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_26 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_27 = paddle._C_ops.add(layer_norm_12, dropout_16) + del dropout_16, layer_norm_12 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_27, parameter_153, parameter_152, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_27, parameter_152, parameter_153 + + # pd_op.matmul: (1x93x3072xf32) <- (1x93x768xf32, 768x3072xf32) + matmul_23 = paddle._C_ops.matmul(layer_norm_15, parameter_157, False, False) + del parameter_157 + + # pd_op.add: (1x93x3072xf32) <- (1x93x3072xf32, 3072xf32) + add_28 = paddle._C_ops.add(matmul_23, parameter_156) + del matmul_23, parameter_156 + + # pd_op.gelu: (1x93x3072xf32) <- (1x93x3072xf32) + gelu_2 = paddle._C_ops.gelu(add_28, False) + del add_28 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x3072xf32, 3072x768xf32) + matmul_24 = paddle._C_ops.matmul(gelu_2, parameter_155, False, False) + del gelu_2, parameter_155 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_29 = paddle._C_ops.add(matmul_24, parameter_154) + del matmul_24, parameter_154 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_29, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_29 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_30 = paddle._C_ops.add(layer_norm_15, dropout_18) + del dropout_18, layer_norm_15 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_30, parameter_151, parameter_150, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_30, parameter_150, parameter_151 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_18, parameter_149, False, False) + del parameter_149 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_31 = paddle._C_ops.add(matmul_25, parameter_148) + del matmul_25, parameter_148 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(add_31, full_int_array_1) + del add_31 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_18, parameter_147, False, False) + del parameter_147 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_32 = paddle._C_ops.add(matmul_26, parameter_146) + del matmul_26, parameter_146 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_27 = paddle._C_ops.matmul(layer_norm_18, parameter_145, False, False) + del parameter_145 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_33 = paddle._C_ops.add(matmul_27, parameter_144) + del matmul_27, parameter_144 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(add_32, full_int_array_1) + del add_32 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(add_33, full_int_array_1) + del add_33 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.scale: (1x12x93x64xf32) <- (1x12x93x64xf32, 1xf32) + scale_4 = paddle._C_ops.scale(transpose_12, full_8, float("0"), True) + del transpose_12 + + # pd_op.matmul: (1x12x93x93xf32) <- (1x12x93x64xf32, 1x12x93x64xf32) + matmul_28 = paddle._C_ops.matmul(scale_4, transpose_13, False, True) + del scale_4, transpose_13 + + # pd_op.add: (1x12x93x93xf32) <- (1x12x93x93xf32, 1x1x1x93xf32) + add_34 = paddle._C_ops.add(matmul_28, unsqueeze_0) + del matmul_28 + + # pd_op.softmax: (1x12x93x93xf32) <- (1x12x93x93xf32) + softmax_3 = paddle._C_ops.softmax(add_34, -1) + del add_34 + + # pd_op.dropout: (1x12x93x93xf32, 1x12x93x93xui8) <- (1x12x93x93xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # pd_op.matmul: (1x12x93x64xf32) <- (1x12x93x93xf32, 1x12x93x64xf32) + matmul_29 = paddle._C_ops.matmul(dropout_20, transpose_14, False, False) + del dropout_20, transpose_14 + + # pd_op.transpose: (1x93x12x64xf32) <- (1x12x93x64xf32) + transpose_15 = paddle._C_ops.transpose(matmul_29, [0, 2, 1, 3]) + del matmul_29 + + # pd_op.reshape: (1x93x768xf32) <- (1x93x12x64xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_2) + del transpose_15 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_30 = paddle._C_ops.matmul(reshape_15, parameter_143, False, False) + del parameter_143, reshape_15 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_35 = paddle._C_ops.add(matmul_30, parameter_142) + del matmul_30, parameter_142 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_35, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_35 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_36 = paddle._C_ops.add(layer_norm_18, dropout_22) + del dropout_22, layer_norm_18 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_36, parameter_137, parameter_136, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_36, parameter_136, parameter_137 + + # pd_op.matmul: (1x93x3072xf32) <- (1x93x768xf32, 768x3072xf32) + matmul_31 = paddle._C_ops.matmul(layer_norm_21, parameter_141, False, False) + del parameter_141 + + # pd_op.add: (1x93x3072xf32) <- (1x93x3072xf32, 3072xf32) + add_37 = paddle._C_ops.add(matmul_31, parameter_140) + del matmul_31, parameter_140 + + # pd_op.gelu: (1x93x3072xf32) <- (1x93x3072xf32) + gelu_3 = paddle._C_ops.gelu(add_37, False) + del add_37 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x3072xf32, 3072x768xf32) + matmul_32 = paddle._C_ops.matmul(gelu_3, parameter_139, False, False) + del gelu_3, parameter_139 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_38 = paddle._C_ops.add(matmul_32, parameter_138) + del matmul_32, parameter_138 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_38, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_38 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_39 = paddle._C_ops.add(layer_norm_21, dropout_24) + del dropout_24, layer_norm_21 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_39, parameter_135, parameter_134, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_39, parameter_134, parameter_135 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_33 = paddle._C_ops.matmul(layer_norm_24, parameter_133, False, False) + del parameter_133 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_40 = paddle._C_ops.add(matmul_33, parameter_132) + del matmul_33, parameter_132 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(add_40, full_int_array_1) + del add_40 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_16 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_24, parameter_131, False, False) + del parameter_131 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_41 = paddle._C_ops.add(matmul_34, parameter_130) + del matmul_34, parameter_130 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_35 = paddle._C_ops.matmul(layer_norm_24, parameter_129, False, False) + del parameter_129 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_42 = paddle._C_ops.add(matmul_35, parameter_128) + del matmul_35, parameter_128 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(add_41, full_int_array_1) + del add_41 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_42, full_int_array_1) + del add_42 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.scale: (1x12x93x64xf32) <- (1x12x93x64xf32, 1xf32) + scale_5 = paddle._C_ops.scale(transpose_16, full_8, float("0"), True) + del transpose_16 + + # pd_op.matmul: (1x12x93x93xf32) <- (1x12x93x64xf32, 1x12x93x64xf32) + matmul_36 = paddle._C_ops.matmul(scale_5, transpose_17, False, True) + del scale_5, transpose_17 + + # pd_op.add: (1x12x93x93xf32) <- (1x12x93x93xf32, 1x1x1x93xf32) + add_43 = paddle._C_ops.add(matmul_36, unsqueeze_0) + del matmul_36 + + # pd_op.softmax: (1x12x93x93xf32) <- (1x12x93x93xf32) + softmax_4 = paddle._C_ops.softmax(add_43, -1) + del add_43 + + # pd_op.dropout: (1x12x93x93xf32, 1x12x93x93xui8) <- (1x12x93x93xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # pd_op.matmul: (1x12x93x64xf32) <- (1x12x93x93xf32, 1x12x93x64xf32) + matmul_37 = paddle._C_ops.matmul(dropout_26, transpose_18, False, False) + del dropout_26, transpose_18 + + # pd_op.transpose: (1x93x12x64xf32) <- (1x12x93x64xf32) + transpose_19 = paddle._C_ops.transpose(matmul_37, [0, 2, 1, 3]) + del matmul_37 + + # pd_op.reshape: (1x93x768xf32) <- (1x93x12x64xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_19, full_int_array_2) + del transpose_19 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_38 = paddle._C_ops.matmul(reshape_19, parameter_127, False, False) + del parameter_127, reshape_19 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_44 = paddle._C_ops.add(matmul_38, parameter_126) + del matmul_38, parameter_126 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_44, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_44 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_45 = paddle._C_ops.add(layer_norm_24, dropout_28) + del dropout_28, layer_norm_24 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_45, parameter_121, parameter_120, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_45, parameter_120, parameter_121 + + # pd_op.matmul: (1x93x3072xf32) <- (1x93x768xf32, 768x3072xf32) + matmul_39 = paddle._C_ops.matmul(layer_norm_27, parameter_125, False, False) + del parameter_125 + + # pd_op.add: (1x93x3072xf32) <- (1x93x3072xf32, 3072xf32) + add_46 = paddle._C_ops.add(matmul_39, parameter_124) + del matmul_39, parameter_124 + + # pd_op.gelu: (1x93x3072xf32) <- (1x93x3072xf32) + gelu_4 = paddle._C_ops.gelu(add_46, False) + del add_46 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x3072xf32, 3072x768xf32) + matmul_40 = paddle._C_ops.matmul(gelu_4, parameter_123, False, False) + del gelu_4, parameter_123 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_47 = paddle._C_ops.add(matmul_40, parameter_122) + del matmul_40, parameter_122 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_47, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_47 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_48 = paddle._C_ops.add(layer_norm_27, dropout_30) + del dropout_30, layer_norm_27 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_48, parameter_119, parameter_118, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_48, parameter_118, parameter_119 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_41 = paddle._C_ops.matmul(layer_norm_30, parameter_117, False, False) + del parameter_117 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_49 = paddle._C_ops.add(matmul_41, parameter_116) + del matmul_41, parameter_116 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(add_49, full_int_array_1) + del add_49 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_20 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_30, parameter_115, False, False) + del parameter_115 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_50 = paddle._C_ops.add(matmul_42, parameter_114) + del matmul_42, parameter_114 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_43 = paddle._C_ops.matmul(layer_norm_30, parameter_113, False, False) + del parameter_113 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_51 = paddle._C_ops.add(matmul_43, parameter_112) + del matmul_43, parameter_112 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(add_50, full_int_array_1) + del add_50 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(add_51, full_int_array_1) + del add_51 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.scale: (1x12x93x64xf32) <- (1x12x93x64xf32, 1xf32) + scale_6 = paddle._C_ops.scale(transpose_20, full_8, float("0"), True) + del transpose_20 + + # pd_op.matmul: (1x12x93x93xf32) <- (1x12x93x64xf32, 1x12x93x64xf32) + matmul_44 = paddle._C_ops.matmul(scale_6, transpose_21, False, True) + del scale_6, transpose_21 + + # pd_op.add: (1x12x93x93xf32) <- (1x12x93x93xf32, 1x1x1x93xf32) + add_52 = paddle._C_ops.add(matmul_44, unsqueeze_0) + del matmul_44 + + # pd_op.softmax: (1x12x93x93xf32) <- (1x12x93x93xf32) + softmax_5 = paddle._C_ops.softmax(add_52, -1) + del add_52 + + # pd_op.dropout: (1x12x93x93xf32, 1x12x93x93xui8) <- (1x12x93x93xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # pd_op.matmul: (1x12x93x64xf32) <- (1x12x93x93xf32, 1x12x93x64xf32) + matmul_45 = paddle._C_ops.matmul(dropout_32, transpose_22, False, False) + del dropout_32, transpose_22 + + # pd_op.transpose: (1x93x12x64xf32) <- (1x12x93x64xf32) + transpose_23 = paddle._C_ops.transpose(matmul_45, [0, 2, 1, 3]) + del matmul_45 + + # pd_op.reshape: (1x93x768xf32) <- (1x93x12x64xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_23, full_int_array_2) + del transpose_23 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_46 = paddle._C_ops.matmul(reshape_23, parameter_111, False, False) + del parameter_111, reshape_23 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_53 = paddle._C_ops.add(matmul_46, parameter_110) + del matmul_46, parameter_110 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_53, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_53 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_54 = paddle._C_ops.add(layer_norm_30, dropout_34) + del dropout_34, layer_norm_30 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_54, parameter_105, parameter_104, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_54, parameter_104, parameter_105 + + # pd_op.matmul: (1x93x3072xf32) <- (1x93x768xf32, 768x3072xf32) + matmul_47 = paddle._C_ops.matmul(layer_norm_33, parameter_109, False, False) + del parameter_109 + + # pd_op.add: (1x93x3072xf32) <- (1x93x3072xf32, 3072xf32) + add_55 = paddle._C_ops.add(matmul_47, parameter_108) + del matmul_47, parameter_108 + + # pd_op.gelu: (1x93x3072xf32) <- (1x93x3072xf32) + gelu_5 = paddle._C_ops.gelu(add_55, False) + del add_55 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x3072xf32, 3072x768xf32) + matmul_48 = paddle._C_ops.matmul(gelu_5, parameter_107, False, False) + del gelu_5, parameter_107 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_56 = paddle._C_ops.add(matmul_48, parameter_106) + del matmul_48, parameter_106 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_56, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_56 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_57 = paddle._C_ops.add(layer_norm_33, dropout_36) + del dropout_36, layer_norm_33 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_57, parameter_103, parameter_102, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_57, parameter_102, parameter_103 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(layer_norm_36, parameter_101, False, False) + del parameter_101 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_58 = paddle._C_ops.add(matmul_49, parameter_100) + del matmul_49, parameter_100 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_58, full_int_array_1) + del add_58 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_24 = paddle._C_ops.transpose(reshape_24, [0, 2, 1, 3]) + del reshape_24 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_36, parameter_99, False, False) + del parameter_99 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_59 = paddle._C_ops.add(matmul_50, parameter_98) + del matmul_50, parameter_98 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_51 = paddle._C_ops.matmul(layer_norm_36, parameter_97, False, False) + del parameter_97 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_60 = paddle._C_ops.add(matmul_51, parameter_96) + del matmul_51, parameter_96 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_59, full_int_array_1) + del add_59 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_25 = paddle._C_ops.transpose(reshape_25, [0, 2, 1, 3]) + del reshape_25 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_60, full_int_array_1) + del add_60 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_26 = paddle._C_ops.transpose(reshape_26, [0, 2, 1, 3]) + del reshape_26 + + # pd_op.scale: (1x12x93x64xf32) <- (1x12x93x64xf32, 1xf32) + scale_7 = paddle._C_ops.scale(transpose_24, full_8, float("0"), True) + del transpose_24 + + # pd_op.matmul: (1x12x93x93xf32) <- (1x12x93x64xf32, 1x12x93x64xf32) + matmul_52 = paddle._C_ops.matmul(scale_7, transpose_25, False, True) + del scale_7, transpose_25 + + # pd_op.add: (1x12x93x93xf32) <- (1x12x93x93xf32, 1x1x1x93xf32) + add_61 = paddle._C_ops.add(matmul_52, unsqueeze_0) + del matmul_52 + + # pd_op.softmax: (1x12x93x93xf32) <- (1x12x93x93xf32) + softmax_6 = paddle._C_ops.softmax(add_61, -1) + del add_61 + + # pd_op.dropout: (1x12x93x93xf32, 1x12x93x93xui8) <- (1x12x93x93xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # pd_op.matmul: (1x12x93x64xf32) <- (1x12x93x93xf32, 1x12x93x64xf32) + matmul_53 = paddle._C_ops.matmul(dropout_38, transpose_26, False, False) + del dropout_38, transpose_26 + + # pd_op.transpose: (1x93x12x64xf32) <- (1x12x93x64xf32) + transpose_27 = paddle._C_ops.transpose(matmul_53, [0, 2, 1, 3]) + del matmul_53 + + # pd_op.reshape: (1x93x768xf32) <- (1x93x12x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(transpose_27, full_int_array_2) + del transpose_27 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_54 = paddle._C_ops.matmul(reshape_27, parameter_95, False, False) + del parameter_95, reshape_27 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_62 = paddle._C_ops.add(matmul_54, parameter_94) + del matmul_54, parameter_94 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_62, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_62 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_63 = paddle._C_ops.add(layer_norm_36, dropout_40) + del dropout_40, layer_norm_36 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_63, parameter_89, parameter_88, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_63, parameter_88, parameter_89 + + # pd_op.matmul: (1x93x3072xf32) <- (1x93x768xf32, 768x3072xf32) + matmul_55 = paddle._C_ops.matmul(layer_norm_39, parameter_93, False, False) + del parameter_93 + + # pd_op.add: (1x93x3072xf32) <- (1x93x3072xf32, 3072xf32) + add_64 = paddle._C_ops.add(matmul_55, parameter_92) + del matmul_55, parameter_92 + + # pd_op.gelu: (1x93x3072xf32) <- (1x93x3072xf32) + gelu_6 = paddle._C_ops.gelu(add_64, False) + del add_64 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x3072xf32, 3072x768xf32) + matmul_56 = paddle._C_ops.matmul(gelu_6, parameter_91, False, False) + del gelu_6, parameter_91 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_65 = paddle._C_ops.add(matmul_56, parameter_90) + del matmul_56, parameter_90 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_65, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_65 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_66 = paddle._C_ops.add(layer_norm_39, dropout_42) + del dropout_42, layer_norm_39 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_66, parameter_87, parameter_86, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_66, parameter_86, parameter_87 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_57 = paddle._C_ops.matmul(layer_norm_42, parameter_85, False, False) + del parameter_85 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_67 = paddle._C_ops.add(matmul_57, parameter_84) + del matmul_57, parameter_84 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(add_67, full_int_array_1) + del add_67 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_28 = paddle._C_ops.transpose(reshape_28, [0, 2, 1, 3]) + del reshape_28 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_58 = paddle._C_ops.matmul(layer_norm_42, parameter_83, False, False) + del parameter_83 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_68 = paddle._C_ops.add(matmul_58, parameter_82) + del matmul_58, parameter_82 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_59 = paddle._C_ops.matmul(layer_norm_42, parameter_81, False, False) + del parameter_81 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_69 = paddle._C_ops.add(matmul_59, parameter_80) + del matmul_59, parameter_80 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(add_68, full_int_array_1) + del add_68 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_29 = paddle._C_ops.transpose(reshape_29, [0, 2, 1, 3]) + del reshape_29 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_69, full_int_array_1) + del add_69 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_30 = paddle._C_ops.transpose(reshape_30, [0, 2, 1, 3]) + del reshape_30 + + # pd_op.scale: (1x12x93x64xf32) <- (1x12x93x64xf32, 1xf32) + scale_8 = paddle._C_ops.scale(transpose_28, full_8, float("0"), True) + del transpose_28 + + # pd_op.matmul: (1x12x93x93xf32) <- (1x12x93x64xf32, 1x12x93x64xf32) + matmul_60 = paddle._C_ops.matmul(scale_8, transpose_29, False, True) + del scale_8, transpose_29 + + # pd_op.add: (1x12x93x93xf32) <- (1x12x93x93xf32, 1x1x1x93xf32) + add_70 = paddle._C_ops.add(matmul_60, unsqueeze_0) + del matmul_60 + + # pd_op.softmax: (1x12x93x93xf32) <- (1x12x93x93xf32) + softmax_7 = paddle._C_ops.softmax(add_70, -1) + del add_70 + + # pd_op.dropout: (1x12x93x93xf32, 1x12x93x93xui8) <- (1x12x93x93xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # pd_op.matmul: (1x12x93x64xf32) <- (1x12x93x93xf32, 1x12x93x64xf32) + matmul_61 = paddle._C_ops.matmul(dropout_44, transpose_30, False, False) + del dropout_44, transpose_30 + + # pd_op.transpose: (1x93x12x64xf32) <- (1x12x93x64xf32) + transpose_31 = paddle._C_ops.transpose(matmul_61, [0, 2, 1, 3]) + del matmul_61 + + # pd_op.reshape: (1x93x768xf32) <- (1x93x12x64xf32, 3xi64) + reshape_31 = paddle._C_ops.reshape(transpose_31, full_int_array_2) + del transpose_31 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_62 = paddle._C_ops.matmul(reshape_31, parameter_79, False, False) + del parameter_79, reshape_31 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_71 = paddle._C_ops.add(matmul_62, parameter_78) + del matmul_62, parameter_78 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_71, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_71 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_72 = paddle._C_ops.add(layer_norm_42, dropout_46) + del dropout_46, layer_norm_42 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_72, parameter_73, parameter_72, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_72, parameter_72, parameter_73 + + # pd_op.matmul: (1x93x3072xf32) <- (1x93x768xf32, 768x3072xf32) + matmul_63 = paddle._C_ops.matmul(layer_norm_45, parameter_77, False, False) + del parameter_77 + + # pd_op.add: (1x93x3072xf32) <- (1x93x3072xf32, 3072xf32) + add_73 = paddle._C_ops.add(matmul_63, parameter_76) + del matmul_63, parameter_76 + + # pd_op.gelu: (1x93x3072xf32) <- (1x93x3072xf32) + gelu_7 = paddle._C_ops.gelu(add_73, False) + del add_73 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x3072xf32, 3072x768xf32) + matmul_64 = paddle._C_ops.matmul(gelu_7, parameter_75, False, False) + del gelu_7, parameter_75 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_74 = paddle._C_ops.add(matmul_64, parameter_74) + del matmul_64, parameter_74 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_74, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_74 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_75 = paddle._C_ops.add(layer_norm_45, dropout_48) + del dropout_48, layer_norm_45 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_75, parameter_71, parameter_70, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_75, parameter_70, parameter_71 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_48, parameter_69, False, False) + del parameter_69 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_76 = paddle._C_ops.add(matmul_65, parameter_68) + del matmul_65, parameter_68 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(add_76, full_int_array_1) + del add_76 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_32 = paddle._C_ops.transpose(reshape_32, [0, 2, 1, 3]) + del reshape_32 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_48, parameter_67, False, False) + del parameter_67 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_77 = paddle._C_ops.add(matmul_66, parameter_66) + del matmul_66, parameter_66 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_67 = paddle._C_ops.matmul(layer_norm_48, parameter_65, False, False) + del parameter_65 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_78 = paddle._C_ops.add(matmul_67, parameter_64) + del matmul_67, parameter_64 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(add_77, full_int_array_1) + del add_77 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_33 = paddle._C_ops.transpose(reshape_33, [0, 2, 1, 3]) + del reshape_33 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_78, full_int_array_1) + del add_78 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_34 = paddle._C_ops.transpose(reshape_34, [0, 2, 1, 3]) + del reshape_34 + + # pd_op.scale: (1x12x93x64xf32) <- (1x12x93x64xf32, 1xf32) + scale_9 = paddle._C_ops.scale(transpose_32, full_8, float("0"), True) + del transpose_32 + + # pd_op.matmul: (1x12x93x93xf32) <- (1x12x93x64xf32, 1x12x93x64xf32) + matmul_68 = paddle._C_ops.matmul(scale_9, transpose_33, False, True) + del scale_9, transpose_33 + + # pd_op.add: (1x12x93x93xf32) <- (1x12x93x93xf32, 1x1x1x93xf32) + add_79 = paddle._C_ops.add(matmul_68, unsqueeze_0) + del matmul_68 + + # pd_op.softmax: (1x12x93x93xf32) <- (1x12x93x93xf32) + softmax_8 = paddle._C_ops.softmax(add_79, -1) + del add_79 + + # pd_op.dropout: (1x12x93x93xf32, 1x12x93x93xui8) <- (1x12x93x93xf32, None, 1xf32) + dropout_50, dropout_51 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # pd_op.matmul: (1x12x93x64xf32) <- (1x12x93x93xf32, 1x12x93x64xf32) + matmul_69 = paddle._C_ops.matmul(dropout_50, transpose_34, False, False) + del dropout_50, transpose_34 + + # pd_op.transpose: (1x93x12x64xf32) <- (1x12x93x64xf32) + transpose_35 = paddle._C_ops.transpose(matmul_69, [0, 2, 1, 3]) + del matmul_69 + + # pd_op.reshape: (1x93x768xf32) <- (1x93x12x64xf32, 3xi64) + reshape_35 = paddle._C_ops.reshape(transpose_35, full_int_array_2) + del transpose_35 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_70 = paddle._C_ops.matmul(reshape_35, parameter_63, False, False) + del parameter_63, reshape_35 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_80 = paddle._C_ops.add(matmul_70, parameter_62) + del matmul_70, parameter_62 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_52, dropout_53 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_80, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_80 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_81 = paddle._C_ops.add(layer_norm_48, dropout_52) + del dropout_52, layer_norm_48 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_81, parameter_57, parameter_56, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_81, parameter_56, parameter_57 + + # pd_op.matmul: (1x93x3072xf32) <- (1x93x768xf32, 768x3072xf32) + matmul_71 = paddle._C_ops.matmul(layer_norm_51, parameter_61, False, False) + del parameter_61 + + # pd_op.add: (1x93x3072xf32) <- (1x93x3072xf32, 3072xf32) + add_82 = paddle._C_ops.add(matmul_71, parameter_60) + del matmul_71, parameter_60 + + # pd_op.gelu: (1x93x3072xf32) <- (1x93x3072xf32) + gelu_8 = paddle._C_ops.gelu(add_82, False) + del add_82 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x3072xf32, 3072x768xf32) + matmul_72 = paddle._C_ops.matmul(gelu_8, parameter_59, False, False) + del gelu_8, parameter_59 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_83 = paddle._C_ops.add(matmul_72, parameter_58) + del matmul_72, parameter_58 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_54, dropout_55 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_83, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_83 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_84 = paddle._C_ops.add(layer_norm_51, dropout_54) + del dropout_54, layer_norm_51 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_84, parameter_55, parameter_54, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_84, parameter_54, parameter_55 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_73 = paddle._C_ops.matmul(layer_norm_54, parameter_53, False, False) + del parameter_53 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_85 = paddle._C_ops.add(matmul_73, parameter_52) + del matmul_73, parameter_52 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(add_85, full_int_array_1) + del add_85 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_36 = paddle._C_ops.transpose(reshape_36, [0, 2, 1, 3]) + del reshape_36 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_74 = paddle._C_ops.matmul(layer_norm_54, parameter_51, False, False) + del parameter_51 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_86 = paddle._C_ops.add(matmul_74, parameter_50) + del matmul_74, parameter_50 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_75 = paddle._C_ops.matmul(layer_norm_54, parameter_49, False, False) + del parameter_49 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_87 = paddle._C_ops.add(matmul_75, parameter_48) + del matmul_75, parameter_48 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_86, full_int_array_1) + del add_86 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_37 = paddle._C_ops.transpose(reshape_37, [0, 2, 1, 3]) + del reshape_37 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(add_87, full_int_array_1) + del add_87 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_38 = paddle._C_ops.transpose(reshape_38, [0, 2, 1, 3]) + del reshape_38 + + # pd_op.scale: (1x12x93x64xf32) <- (1x12x93x64xf32, 1xf32) + scale_10 = paddle._C_ops.scale(transpose_36, full_8, float("0"), True) + del transpose_36 + + # pd_op.matmul: (1x12x93x93xf32) <- (1x12x93x64xf32, 1x12x93x64xf32) + matmul_76 = paddle._C_ops.matmul(scale_10, transpose_37, False, True) + del scale_10, transpose_37 + + # pd_op.add: (1x12x93x93xf32) <- (1x12x93x93xf32, 1x1x1x93xf32) + add_88 = paddle._C_ops.add(matmul_76, unsqueeze_0) + del matmul_76 + + # pd_op.softmax: (1x12x93x93xf32) <- (1x12x93x93xf32) + softmax_9 = paddle._C_ops.softmax(add_88, -1) + del add_88 + + # pd_op.dropout: (1x12x93x93xf32, 1x12x93x93xui8) <- (1x12x93x93xf32, None, 1xf32) + dropout_56, dropout_57 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # pd_op.matmul: (1x12x93x64xf32) <- (1x12x93x93xf32, 1x12x93x64xf32) + matmul_77 = paddle._C_ops.matmul(dropout_56, transpose_38, False, False) + del dropout_56, transpose_38 + + # pd_op.transpose: (1x93x12x64xf32) <- (1x12x93x64xf32) + transpose_39 = paddle._C_ops.transpose(matmul_77, [0, 2, 1, 3]) + del matmul_77 + + # pd_op.reshape: (1x93x768xf32) <- (1x93x12x64xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(transpose_39, full_int_array_2) + del transpose_39 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_78 = paddle._C_ops.matmul(reshape_39, parameter_47, False, False) + del parameter_47, reshape_39 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_89 = paddle._C_ops.add(matmul_78, parameter_46) + del matmul_78, parameter_46 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_58, dropout_59 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_89, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_89 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_90 = paddle._C_ops.add(layer_norm_54, dropout_58) + del dropout_58, layer_norm_54 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_90, parameter_41, parameter_40, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_90, parameter_40, parameter_41 + + # pd_op.matmul: (1x93x3072xf32) <- (1x93x768xf32, 768x3072xf32) + matmul_79 = paddle._C_ops.matmul(layer_norm_57, parameter_45, False, False) + del parameter_45 + + # pd_op.add: (1x93x3072xf32) <- (1x93x3072xf32, 3072xf32) + add_91 = paddle._C_ops.add(matmul_79, parameter_44) + del matmul_79, parameter_44 + + # pd_op.gelu: (1x93x3072xf32) <- (1x93x3072xf32) + gelu_9 = paddle._C_ops.gelu(add_91, False) + del add_91 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x3072xf32, 3072x768xf32) + matmul_80 = paddle._C_ops.matmul(gelu_9, parameter_43, False, False) + del gelu_9, parameter_43 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_92 = paddle._C_ops.add(matmul_80, parameter_42) + del matmul_80, parameter_42 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_60, dropout_61 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_92, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_92 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_93 = paddle._C_ops.add(layer_norm_57, dropout_60) + del dropout_60, layer_norm_57 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_93, parameter_39, parameter_38, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_93, parameter_38, parameter_39 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_81 = paddle._C_ops.matmul(layer_norm_60, parameter_37, False, False) + del parameter_37 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_94 = paddle._C_ops.add(matmul_81, parameter_36) + del matmul_81, parameter_36 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(add_94, full_int_array_1) + del add_94 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_40 = paddle._C_ops.transpose(reshape_40, [0, 2, 1, 3]) + del reshape_40 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_82 = paddle._C_ops.matmul(layer_norm_60, parameter_35, False, False) + del parameter_35 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_95 = paddle._C_ops.add(matmul_82, parameter_34) + del matmul_82, parameter_34 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_83 = paddle._C_ops.matmul(layer_norm_60, parameter_33, False, False) + del parameter_33 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_96 = paddle._C_ops.add(matmul_83, parameter_32) + del matmul_83, parameter_32 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_95, full_int_array_1) + del add_95 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_41 = paddle._C_ops.transpose(reshape_41, [0, 2, 1, 3]) + del reshape_41 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(add_96, full_int_array_1) + del add_96 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_42 = paddle._C_ops.transpose(reshape_42, [0, 2, 1, 3]) + del reshape_42 + + # pd_op.scale: (1x12x93x64xf32) <- (1x12x93x64xf32, 1xf32) + scale_11 = paddle._C_ops.scale(transpose_40, full_8, float("0"), True) + del transpose_40 + + # pd_op.matmul: (1x12x93x93xf32) <- (1x12x93x64xf32, 1x12x93x64xf32) + matmul_84 = paddle._C_ops.matmul(scale_11, transpose_41, False, True) + del scale_11, transpose_41 + + # pd_op.add: (1x12x93x93xf32) <- (1x12x93x93xf32, 1x1x1x93xf32) + add_97 = paddle._C_ops.add(matmul_84, unsqueeze_0) + del matmul_84 + + # pd_op.softmax: (1x12x93x93xf32) <- (1x12x93x93xf32) + softmax_10 = paddle._C_ops.softmax(add_97, -1) + del add_97 + + # pd_op.dropout: (1x12x93x93xf32, 1x12x93x93xui8) <- (1x12x93x93xf32, None, 1xf32) + dropout_62, dropout_63 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # pd_op.matmul: (1x12x93x64xf32) <- (1x12x93x93xf32, 1x12x93x64xf32) + matmul_85 = paddle._C_ops.matmul(dropout_62, transpose_42, False, False) + del dropout_62, transpose_42 + + # pd_op.transpose: (1x93x12x64xf32) <- (1x12x93x64xf32) + transpose_43 = paddle._C_ops.transpose(matmul_85, [0, 2, 1, 3]) + del matmul_85 + + # pd_op.reshape: (1x93x768xf32) <- (1x93x12x64xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_43, full_int_array_2) + del transpose_43 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_86 = paddle._C_ops.matmul(reshape_43, parameter_31, False, False) + del parameter_31, reshape_43 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_98 = paddle._C_ops.add(matmul_86, parameter_30) + del matmul_86, parameter_30 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_64, dropout_65 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_98, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_98 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_99 = paddle._C_ops.add(layer_norm_60, dropout_64) + del dropout_64, layer_norm_60 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_99, parameter_25, parameter_24, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_99, parameter_24, parameter_25 + + # pd_op.matmul: (1x93x3072xf32) <- (1x93x768xf32, 768x3072xf32) + matmul_87 = paddle._C_ops.matmul(layer_norm_63, parameter_29, False, False) + del parameter_29 + + # pd_op.add: (1x93x3072xf32) <- (1x93x3072xf32, 3072xf32) + add_100 = paddle._C_ops.add(matmul_87, parameter_28) + del matmul_87, parameter_28 + + # pd_op.gelu: (1x93x3072xf32) <- (1x93x3072xf32) + gelu_10 = paddle._C_ops.gelu(add_100, False) + del add_100 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x3072xf32, 3072x768xf32) + matmul_88 = paddle._C_ops.matmul(gelu_10, parameter_27, False, False) + del gelu_10, parameter_27 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_101 = paddle._C_ops.add(matmul_88, parameter_26) + del matmul_88, parameter_26 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_66, dropout_67 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_101, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_101 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_102 = paddle._C_ops.add(layer_norm_63, dropout_66) + del dropout_66, layer_norm_63 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_102, parameter_23, parameter_22, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_102, parameter_22, parameter_23 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_89 = paddle._C_ops.matmul(layer_norm_66, parameter_21, False, False) + del parameter_21 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_103 = paddle._C_ops.add(matmul_89, parameter_20) + del matmul_89, parameter_20 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_103, full_int_array_1) + del add_103 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_44 = paddle._C_ops.transpose(reshape_44, [0, 2, 1, 3]) + del reshape_44 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_66, parameter_19, False, False) + del parameter_19 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_104 = paddle._C_ops.add(matmul_90, parameter_18) + del matmul_90, parameter_18 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_91 = paddle._C_ops.matmul(layer_norm_66, parameter_17, False, False) + del parameter_17 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_105 = paddle._C_ops.add(matmul_91, parameter_16) + del matmul_91, parameter_16 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(add_104, full_int_array_1) + del add_104 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_45 = paddle._C_ops.transpose(reshape_45, [0, 2, 1, 3]) + del reshape_45 + + # pd_op.reshape: (1x93x12x64xf32) <- (1x93x768xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(add_105, full_int_array_1) + del add_105, full_int_array_1 + + # pd_op.transpose: (1x12x93x64xf32) <- (1x93x12x64xf32) + transpose_46 = paddle._C_ops.transpose(reshape_46, [0, 2, 1, 3]) + del reshape_46 + + # pd_op.scale: (1x12x93x64xf32) <- (1x12x93x64xf32, 1xf32) + scale_12 = paddle._C_ops.scale(transpose_44, full_8, float("0"), True) + del full_8, transpose_44 + + # pd_op.matmul: (1x12x93x93xf32) <- (1x12x93x64xf32, 1x12x93x64xf32) + matmul_92 = paddle._C_ops.matmul(scale_12, transpose_45, False, True) + del scale_12, transpose_45 + + # pd_op.add: (1x12x93x93xf32) <- (1x12x93x93xf32, 1x1x1x93xf32) + add_106 = paddle._C_ops.add(matmul_92, unsqueeze_0) + del matmul_92, unsqueeze_0 + + # pd_op.softmax: (1x12x93x93xf32) <- (1x12x93x93xf32) + softmax_11 = paddle._C_ops.softmax(add_106, -1) + del add_106 + + # pd_op.dropout: (1x12x93x93xf32, 1x12x93x93xui8) <- (1x12x93x93xf32, None, 1xf32) + dropout_68, dropout_69 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # pd_op.matmul: (1x12x93x64xf32) <- (1x12x93x93xf32, 1x12x93x64xf32) + matmul_93 = paddle._C_ops.matmul(dropout_68, transpose_46, False, False) + del dropout_68, transpose_46 + + # pd_op.transpose: (1x93x12x64xf32) <- (1x12x93x64xf32) + transpose_47 = paddle._C_ops.transpose(matmul_93, [0, 2, 1, 3]) + del matmul_93 + + # pd_op.reshape: (1x93x768xf32) <- (1x93x12x64xf32, 3xi64) + reshape_47 = paddle._C_ops.reshape(transpose_47, full_int_array_2) + del full_int_array_2, transpose_47 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x768xf32, 768x768xf32) + matmul_94 = paddle._C_ops.matmul(reshape_47, parameter_15, False, False) + del parameter_15, reshape_47 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_107 = paddle._C_ops.add(matmul_94, parameter_14) + del matmul_94, parameter_14 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_70, dropout_71 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_107, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_107 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_108 = paddle._C_ops.add(layer_norm_66, dropout_70) + del dropout_70, layer_norm_66 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_108, parameter_9, parameter_8, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_108, parameter_8, parameter_9 + + # pd_op.matmul: (1x93x3072xf32) <- (1x93x768xf32, 768x3072xf32) + matmul_95 = paddle._C_ops.matmul(layer_norm_69, parameter_13, False, False) + del parameter_13 + + # pd_op.add: (1x93x3072xf32) <- (1x93x3072xf32, 3072xf32) + add_109 = paddle._C_ops.add(matmul_95, parameter_12) + del matmul_95, parameter_12 + + # pd_op.gelu: (1x93x3072xf32) <- (1x93x3072xf32) + gelu_11 = paddle._C_ops.gelu(add_109, False) + del add_109 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x3072xf32, 3072x768xf32) + matmul_96 = paddle._C_ops.matmul(gelu_11, parameter_11, False, False) + del gelu_11, parameter_11 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_110 = paddle._C_ops.add(matmul_96, parameter_10) + del matmul_96, parameter_10 + + # pd_op.dropout: (1x93x768xf32, 1x93x768xui8) <- (1x93x768xf32, None, 1xf32) + dropout_72, dropout_73 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_110, None, full_7, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_110, full_7 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 1x93x768xf32) + add_111 = paddle._C_ops.add(layer_norm_69, dropout_72) + del dropout_72, layer_norm_69 + + # pd_op.layer_norm: (1x93x768xf32, 1x93xf32, 1x93xf32) <- (1x93x768xf32, 768xf32, 768xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_111, parameter_7, parameter_6, float("1e-12"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_111, parameter_6, parameter_7 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_4 = [1] + + # pd_op.slice: (1x768xf32) <- (1x93x768xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + layer_norm_72, [1], full_int_array_3, full_int_array_4, [1], [1] + ) + del full_int_array_3 + + # pd_op.matmul: (1x768xf32) <- (1x768xf32, 768x768xf32) + matmul_97 = paddle._C_ops.matmul(slice_0, parameter_5, False, False) + del parameter_5, slice_0 + + # pd_op.add: (1x768xf32) <- (1x768xf32, 768xf32) + add_112 = paddle._C_ops.add(matmul_97, parameter_4) + del matmul_97, parameter_4 + + # pd_op.tanh: (1x768xf32) <- (1x768xf32) + tanh_0 = paddle._C_ops.tanh(add_112) + del add_112 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_5 = [2] + + # pd_op.slice: (1x768xf32) <- (1x93x768xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + layer_norm_72, [1], full_int_array_4, full_int_array_5, [1], [1] + ) + del full_int_array_5 + + # pd_op.unsqueeze: (1x1x768xf32) <- (1x768xf32, 1xi64) + unsqueeze_1 = paddle._C_ops.unsqueeze(slice_1, full_int_array_4) + del full_int_array_4, slice_1 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_6 = [1, 93, 768] + + # pd_op.expand: (1x93x768xf32) <- (1x1x768xf32, 3xi64) + expand_0 = paddle._C_ops.expand(unsqueeze_1, full_int_array_6) + del full_int_array_6, unsqueeze_1 + + # pd_op.full: (1xi32) <- () + full_9 = paddle._C_ops.full( + [1], float("2"), paddle.int32, paddle.core.CPUPlace() + ) + + # builtin.combine: ([1x93x768xf32, 1x93x768xf32]) <- (1x93x768xf32, 1x93x768xf32) + combine_1 = [layer_norm_72, expand_0] + del layer_norm_72 + + # pd_op.concat: (1x93x1536xf32) <- ([1x93x768xf32, 1x93x768xf32], 1xi32) + concat_1 = paddle._C_ops.concat(combine_1, full_9) + del combine_1, full_9 + + # pd_op.matmul: (1x93x3072xf32) <- (1x93x1536xf32, 1536x3072xf32) + matmul_98 = paddle._C_ops.matmul(concat_1, parameter_3, False, False) + del concat_1, parameter_3 + + # pd_op.add: (1x93x3072xf32) <- (1x93x3072xf32, 3072xf32) + add_113 = paddle._C_ops.add(matmul_98, parameter_2) + del matmul_98, parameter_2 + + # pd_op.matmul: (1x93x768xf32) <- (1x93x3072xf32, 3072x768xf32) + matmul_99 = paddle._C_ops.matmul(add_113, parameter_1, False, False) + del add_113, parameter_1 + + # pd_op.add: (1x93x768xf32) <- (1x93x768xf32, 768xf32) + add_0 = paddle._C_ops.add(matmul_99, parameter_0) + del expand_0, matmul_99, parameter_0 + + return add_0, tanh_0 diff --git a/paddle_samples/PaddleNLP/ernie-ctm/weight_meta.py b/paddle_samples/PaddleNLP/ernie-ctm/weight_meta.py new file mode 100644 index 000000000..81739acac --- /dev/null +++ b/paddle_samples/PaddleNLP/ernie-ctm/weight_meta.py @@ -0,0 +1,1824 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.100677") + max_val = float("0.0990477") + mean = float("3.78378e-06") + std = float("0.0199946") + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [1536, 3072] + dtype = "float32" + min_val = float("-0.11274") + max_val = float("0.104661") + mean = float("-1.7358e-05") + std = float("0.019997") + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.10639") + max_val = float("0.0952795") + mean = float("4.00056e-05") + std = float("0.0200022") + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.10291") + max_val = float("0.0956335") + mean = float("-8.81711e-06") + std = float("0.0200037") + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.103084") + max_val = float("0.10384") + mean = float("-1.85919e-05") + std = float("0.0199937") + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0906918") + max_val = float("0.0925839") + mean = float("-2.21858e-05") + std = float("0.0200113") + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0999938") + max_val = float("0.0907395") + mean = float("3.66501e-05") + std = float("0.0199921") + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0914937") + max_val = float("0.0948347") + mean = float("2.17193e-05") + std = float("0.0200057") + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0902838") + max_val = float("0.0959529") + mean = float("-2.68245e-05") + std = float("0.0200077") + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0953044") + max_val = float("0.0988985") + mean = float("-4.99307e-07") + std = float("0.0200185") + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.107138") + max_val = float("0.0968174") + mean = float("1.85069e-05") + std = float("0.0200009") + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0953081") + max_val = float("0.0879186") + mean = float("3.66298e-05") + std = float("0.0200262") + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0919527") + max_val = float("0.0886559") + mean = float("6.45068e-06") + std = float("0.0200242") + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0996471") + max_val = float("0.0904262") + mean = float("4.48326e-05") + std = float("0.0200094") + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0948469") + max_val = float("0.0969067") + mean = float("1.81627e-05") + std = float("0.019993") + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.100499") + max_val = float("0.103792") + mean = float("1.22675e-05") + std = float("0.0200077") + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0996765") + max_val = float("0.0961822") + mean = float("2.68019e-05") + std = float("0.0199909") + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.10085") + max_val = float("0.0971578") + mean = float("-1.3161e-05") + std = float("0.0199973") + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0918916") + max_val = float("0.0946076") + mean = float("-6.11713e-06") + std = float("0.0200031") + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0987784") + max_val = float("0.0900638") + mean = float("-1.39678e-05") + std = float("0.0200097") + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0921494") + max_val = float("0.0918088") + mean = float("-1.53373e-05") + std = float("0.0200102") + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0998782") + max_val = float("0.110628") + mean = float("-4.74923e-06") + std = float("0.0199895") + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0991749") + max_val = float("0.0999238") + mean = float("2.47725e-05") + std = float("0.0200055") + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0872345") + max_val = float("0.0951828") + mean = float("2.1155e-05") + std = float("0.0199576") + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0942368") + max_val = float("0.101678") + mean = float("-8.1635e-06") + std = float("0.0199941") + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0982251") + max_val = float("0.0950276") + mean = float("-2.64649e-05") + std = float("0.020011") + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0910418") + max_val = float("0.0947895") + mean = float("-3.57002e-05") + std = float("0.0200252") + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0987136") + max_val = float("0.0974542") + mean = float("-4.15601e-05") + std = float("0.0200005") + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.100247") + max_val = float("0.10304") + mean = float("-6.29197e-06") + std = float("0.0199987") + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0947961") + max_val = float("0.0909334") + mean = float("-4.25004e-05") + std = float("0.019992") + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0915061") + max_val = float("0.0922597") + mean = float("4.89863e-05") + std = float("0.020003") + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0902696") + max_val = float("0.0987771") + mean = float("-2.35637e-05") + std = float("0.0199667") + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0920389") + max_val = float("0.0967754") + mean = float("-4.72734e-05") + std = float("0.0199681") + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0999639") + max_val = float("0.100037") + mean = float("-1.34425e-05") + std = float("0.0199944") + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.101671") + max_val = float("0.102812") + mean = float("4.58826e-06") + std = float("0.0199886") + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0880888") + max_val = float("0.0942423") + mean = float("-1.79859e-05") + std = float("0.0199986") + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0923793") + max_val = float("0.0905648") + mean = float("-1.76195e-05") + std = float("0.020004") + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.085552") + max_val = float("0.0972841") + mean = float("4.28535e-05") + std = float("0.0200407") + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0919455") + max_val = float("0.101761") + mean = float("-1.41767e-06") + std = float("0.0200085") + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0948803") + max_val = float("0.103711") + mean = float("2.1837e-05") + std = float("0.0200073") + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.105335") + max_val = float("0.0968467") + mean = float("-2.8995e-06") + std = float("0.0200153") + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0960928") + max_val = float("0.0904254") + mean = float("2.73888e-05") + std = float("0.0200064") + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0965505") + max_val = float("0.0967066") + mean = float("4.95441e-06") + std = float("0.0199662") + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0942086") + max_val = float("0.0949561") + mean = float("-9.88654e-06") + std = float("0.0199787") + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0916935") + max_val = float("0.092474") + mean = float("-3.12797e-05") + std = float("0.0199915") + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0962105") + max_val = float("0.0934882") + mean = float("-1.88661e-06") + std = float("0.0199911") + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0988319") + max_val = float("0.103595") + mean = float("3.68369e-06") + std = float("0.0200119") + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0918376") + max_val = float("0.1089") + mean = float("-1.80137e-05") + std = float("0.0200312") + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.091416") + max_val = float("0.0963752") + mean = float("-1.77608e-05") + std = float("0.020041") + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.102912") + max_val = float("0.0980264") + mean = float("-1.11452e-05") + std = float("0.0200166") + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0870133") + max_val = float("0.102589") + mean = float("-7.30883e-06") + std = float("0.0199923") + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.105249") + max_val = float("0.0980139") + mean = float("-1.21053e-06") + std = float("0.0200094") + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0954325") + max_val = float("0.0970396") + mean = float("2.96343e-07") + std = float("0.0199921") + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0893702") + max_val = float("0.0928326") + mean = float("7.15342e-06") + std = float("0.020021") + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.107207") + max_val = float("0.0929387") + mean = float("-2.33798e-05") + std = float("0.020003") + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.101201") + max_val = float("0.112451") + mean = float("-2.23732e-05") + std = float("0.0199967") + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0963177") + max_val = float("0.0970858") + mean = float("-1.2978e-05") + std = float("0.0200092") + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.099477") + max_val = float("0.0987894") + mean = float("1.47888e-05") + std = float("0.0199982") + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.101134") + max_val = float("0.100252") + mean = float("-2.09072e-05") + std = float("0.0199948") + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0858202") + max_val = float("0.0956743") + mean = float("2.65366e-05") + std = float("0.0200289") + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0941286") + max_val = float("0.105916") + mean = float("-4.54664e-05") + std = float("0.0200043") + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0950527") + max_val = float("0.0953847") + mean = float("2.44363e-05") + std = float("0.0199937") + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.102228") + max_val = float("0.092131") + mean = float("1.33433e-06") + std = float("0.0199798") + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.105763") + max_val = float("0.0974083") + mean = float("3.30872e-07") + std = float("0.0200033") + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.102685") + max_val = float("0.102409") + mean = float("-5.46151e-06") + std = float("0.0200032") + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0864148") + max_val = float("0.109471") + mean = float("-8.38497e-06") + std = float("0.0199996") + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0954966") + max_val = float("0.0927692") + mean = float("-2.37909e-06") + std = float("0.0200157") + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0948879") + max_val = float("0.11081") + mean = float("2.8939e-05") + std = float("0.0199765") + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.086991") + max_val = float("0.0896926") + mean = float("-1.27053e-05") + std = float("0.0199721") + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.104928") + max_val = float("0.102829") + mean = float("-2.54071e-06") + std = float("0.0200067") + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.10489") + max_val = float("0.0950402") + mean = float("-8.14132e-06") + std = float("0.020018") + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0885931") + max_val = float("0.0870393") + mean = float("-6.03537e-05") + std = float("0.0199722") + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0952406") + max_val = float("0.105954") + mean = float("4.45018e-05") + std = float("0.0200017") + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0971826") + max_val = float("0.099243") + mean = float("-3.47508e-05") + std = float("0.0200035") + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.088388") + max_val = float("0.08974") + mean = float("3.32492e-05") + std = float("0.0200055") + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_199: + name = "parameter_199" + shape = [128, 768] + dtype = "float32" + min_val = float("-0.0946447") + max_val = float("0.0792513") + mean = float("6.92627e-05") + std = float("0.020008") + data = None + + +class Program_weight_tensor_parameter_200: + name = "parameter_200" + shape = [128] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_201: + name = "parameter_201" + shape = [128] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_202: + name = "parameter_202" + shape = [2, 128] + dtype = "float32" + min_val = float("-0.061231") + max_val = float("0.0491345") + mean = float("-0.000736232") + std = float("0.0201544") + data = None + + +class Program_weight_tensor_parameter_203: + name = "parameter_203" + shape = [512, 128] + dtype = "float32" + min_val = float("-0.0804825") + max_val = float("0.0814414") + mean = float("7.82948e-05") + std = float("0.0199542") + data = None + + +class Program_weight_tensor_parameter_204: + name = "parameter_204" + shape = [23000, 128] + dtype = "float32" + min_val = float("-0.107211") + max_val = float("0.106111") + mean = float("4.06582e-06") + std = float("0.0199964") + data = None diff --git a/paddle_samples/PaddleNLP/ernie-gram-zh/graph_hash.txt b/paddle_samples/PaddleNLP/ernie-gram-zh/graph_hash.txt new file mode 100644 index 000000000..efdf6fca8 --- /dev/null +++ b/paddle_samples/PaddleNLP/ernie-gram-zh/graph_hash.txt @@ -0,0 +1 @@ +ffa40f527e044a5367cf6be1c8817b149ed5c60e6e46d5fec121981f50e84f73 \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/ernie-gram-zh/graph_net.json b/paddle_samples/PaddleNLP/ernie-gram-zh/graph_net.json new file mode 100644 index 000000000..3e2cb5b44 --- /dev/null +++ b/paddle_samples/PaddleNLP/ernie-gram-zh/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "ernie-gram-zh", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/ernie-gram-zh/input_meta.py b/paddle_samples/PaddleNLP/ernie-gram-zh/input_meta.py new file mode 100644 index 000000000..97eb8a799 --- /dev/null +++ b/paddle_samples/PaddleNLP/ernie-gram-zh/input_meta.py @@ -0,0 +1,12 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 11] + dtype = "int64" + data = [1, 811, 1257, 175, 29, 502, 130, 706, 3619, 12046, 2] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 11] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/ernie-gram-zh/model.py b/paddle_samples/PaddleNLP/ernie-gram-zh/model.py new file mode 100644 index 000000000..418bf364c --- /dev/null +++ b/paddle_samples/PaddleNLP/ernie-gram-zh/model.py @@ -0,0 +1,2224 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + parameter_197, + parameter_198, + data_0, + data_1, + ): + # pd_op.full: (xi64) <- () + full_0 = paddle._C_ops.full( + [], float("0"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.equal: (1x11xb) <- (1x11xi64, xi64) + equal_0 = paddle._C_ops.equal(data_0, full_0) + del full_0 + + # pd_op.cast: (1x11xf32) <- (1x11xb) + cast_0 = paddle._C_ops.cast(equal_0, paddle.float32) + del equal_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x11xf32) <- (1x11xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_1, float("0"), True) + del cast_0, full_1 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_0 = [1, 2] + + # pd_op.unsqueeze: (1x1x1x11xf32) <- (1x11xf32, 2xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(scale_0, full_int_array_0) + del full_int_array_0, scale_0 + + # pd_op.embedding: (1x11x768xf32) <- (1x11xi64, 18018x768xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_198, 0, False) + del data_0, parameter_198 + + # pd_op.full: (1x11xi64) <- () + full_2 = paddle._C_ops.full( + [1, 11], + float("1"), + paddle.int64, + paddle.framework._current_expected_place(), + ) + + # pd_op.full: (1xi32) <- () + full_3 = paddle._C_ops.full( + [1], float("1"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.cumsum: (1x11xi64) <- (1x11xi64, 1xi32) + cumsum_0 = paddle._C_ops.cumsum(full_2, full_3, False, False, False) + del full_3 + + # pd_op.subtract: (1x11xi64) <- (1x11xi64, 1x11xi64) + subtract_0 = paddle._C_ops.subtract(cumsum_0, full_2) + del cumsum_0, full_2 + + # pd_op.embedding: (1x11x768xf32) <- (1x11xi64, 512x768xf32) + embedding_1 = paddle._C_ops.embedding(subtract_0, parameter_197, -1, False) + del parameter_197, subtract_0 + + # pd_op.embedding: (1x11x768xf32) <- (1x11xi64, 2x768xf32) + embedding_2 = paddle._C_ops.embedding(data_1, parameter_196, -1, False) + del data_1, parameter_196 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_1 = paddle._C_ops.add(add_0, embedding_2) + del add_0, embedding_2 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_0, layer_norm_1, layer_norm_2 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_1, parameter_195, parameter_194, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_1, parameter_194, parameter_195 + + # pd_op.full: (1xf32) <- () + full_4 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + layer_norm_0, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del layer_norm_0 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_0 = paddle._C_ops.matmul(dropout_0, parameter_193, False, False) + del parameter_193 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_192) + del matmul_0, parameter_192 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_1 = [0, 0, 12, 64] + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(add_2, full_int_array_1) + del add_2 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_1 = paddle._C_ops.matmul(dropout_0, parameter_191, False, False) + del parameter_191 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_3 = paddle._C_ops.add(matmul_1, parameter_190) + del matmul_1, parameter_190 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_2 = paddle._C_ops.matmul(dropout_0, parameter_189, False, False) + del parameter_189 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_4 = paddle._C_ops.add(matmul_2, parameter_188) + del matmul_2, parameter_188 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(add_3, full_int_array_1) + del add_3 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(add_4, full_int_array_1) + del add_4 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.full: (1xf32) <- () + full_5 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_1 = paddle._C_ops.scale(transpose_0, full_5, float("0"), True) + del transpose_0 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_3 = paddle._C_ops.matmul(scale_1, transpose_1, False, True) + del scale_1, transpose_1 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_5 = paddle._C_ops.add(matmul_3, unsqueeze_0) + del matmul_3 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_0 = paddle._C_ops.softmax(add_5, -1) + del add_5 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_4 = paddle._C_ops.matmul(dropout_2, transpose_2, False, False) + del dropout_2, transpose_2 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_3 = paddle._C_ops.transpose(matmul_4, [0, 2, 1, 3]) + del matmul_4 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_2 = [0, 0, 768] + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_2) + del transpose_3 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_5 = paddle._C_ops.matmul(reshape_3, parameter_187, False, False) + del parameter_187, reshape_3 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_6 = paddle._C_ops.add(matmul_5, parameter_186) + del matmul_5, parameter_186 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_6, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_6 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_7 = paddle._C_ops.add(dropout_0, dropout_4) + del dropout_0, dropout_4 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_3, layer_norm_4, layer_norm_5 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_7, parameter_181, parameter_180, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_7, parameter_180, parameter_181 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_6 = paddle._C_ops.matmul(layer_norm_3, parameter_185, False, False) + del parameter_185 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_8 = paddle._C_ops.add(matmul_6, parameter_184) + del matmul_6, parameter_184 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_0 = paddle._C_ops.gelu(add_8, False) + del add_8 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_7 = paddle._C_ops.matmul(gelu_0, parameter_183, False, False) + del gelu_0, parameter_183 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_9 = paddle._C_ops.add(matmul_7, parameter_182) + del matmul_7, parameter_182 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_9, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_9 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_10 = paddle._C_ops.add(layer_norm_3, dropout_6) + del dropout_6, layer_norm_3 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_6, layer_norm_7, layer_norm_8 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_10, parameter_179, parameter_178, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_10, parameter_178, parameter_179 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_6, parameter_177, False, False) + del parameter_177 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_11 = paddle._C_ops.add(matmul_8, parameter_176) + del matmul_8, parameter_176 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(add_11, full_int_array_1) + del add_11 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_9 = paddle._C_ops.matmul(layer_norm_6, parameter_175, False, False) + del parameter_175 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_174) + del matmul_9, parameter_174 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_6, parameter_173, False, False) + del parameter_173 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_13 = paddle._C_ops.add(matmul_10, parameter_172) + del matmul_10, parameter_172 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(add_12, full_int_array_1) + del add_12 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_13, full_int_array_1) + del add_13 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_2 = paddle._C_ops.scale(transpose_4, full_5, float("0"), True) + del transpose_4 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_11 = paddle._C_ops.matmul(scale_2, transpose_5, False, True) + del scale_2, transpose_5 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_14 = paddle._C_ops.add(matmul_11, unsqueeze_0) + del matmul_11 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_1 = paddle._C_ops.softmax(add_14, -1) + del add_14 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_12 = paddle._C_ops.matmul(dropout_8, transpose_6, False, False) + del dropout_8, transpose_6 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_7 = paddle._C_ops.transpose(matmul_12, [0, 2, 1, 3]) + del matmul_12 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_2) + del transpose_7 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_13 = paddle._C_ops.matmul(reshape_7, parameter_171, False, False) + del parameter_171, reshape_7 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_15 = paddle._C_ops.add(matmul_13, parameter_170) + del matmul_13, parameter_170 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_15, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_15 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_16 = paddle._C_ops.add(layer_norm_6, dropout_10) + del dropout_10, layer_norm_6 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_9, layer_norm_10, layer_norm_11 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_16, parameter_165, parameter_164, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_16, parameter_164, parameter_165 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_9, parameter_169, False, False) + del parameter_169 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_17 = paddle._C_ops.add(matmul_14, parameter_168) + del matmul_14, parameter_168 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_1 = paddle._C_ops.gelu(add_17, False) + del add_17 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_15 = paddle._C_ops.matmul(gelu_1, parameter_167, False, False) + del gelu_1, parameter_167 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_18 = paddle._C_ops.add(matmul_15, parameter_166) + del matmul_15, parameter_166 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_18, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_18 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_19 = paddle._C_ops.add(layer_norm_9, dropout_12) + del dropout_12, layer_norm_9 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_12, layer_norm_13, layer_norm_14 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_19, parameter_163, parameter_162, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_19, parameter_162, parameter_163 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_16 = paddle._C_ops.matmul(layer_norm_12, parameter_161, False, False) + del parameter_161 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_20 = paddle._C_ops.add(matmul_16, parameter_160) + del matmul_16, parameter_160 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(add_20, full_int_array_1) + del add_20 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_12, parameter_159, False, False) + del parameter_159 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_158) + del matmul_17, parameter_158 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_12, parameter_157, False, False) + del parameter_157 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_156) + del matmul_18, parameter_156 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_21, full_int_array_1) + del add_21 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(add_22, full_int_array_1) + del add_22 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_3 = paddle._C_ops.scale(transpose_8, full_5, float("0"), True) + del transpose_8 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_19 = paddle._C_ops.matmul(scale_3, transpose_9, False, True) + del scale_3, transpose_9 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_23 = paddle._C_ops.add(matmul_19, unsqueeze_0) + del matmul_19 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_2 = paddle._C_ops.softmax(add_23, -1) + del add_23 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_20 = paddle._C_ops.matmul(dropout_14, transpose_10, False, False) + del dropout_14, transpose_10 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_11 = paddle._C_ops.transpose(matmul_20, [0, 2, 1, 3]) + del matmul_20 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_2) + del transpose_11 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_21 = paddle._C_ops.matmul(reshape_11, parameter_155, False, False) + del parameter_155, reshape_11 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_24 = paddle._C_ops.add(matmul_21, parameter_154) + del matmul_21, parameter_154 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_24, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_24 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_25 = paddle._C_ops.add(layer_norm_12, dropout_16) + del dropout_16, layer_norm_12 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_15, layer_norm_16, layer_norm_17 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_25, parameter_149, parameter_148, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_25, parameter_148, parameter_149 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_22 = paddle._C_ops.matmul(layer_norm_15, parameter_153, False, False) + del parameter_153 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_26 = paddle._C_ops.add(matmul_22, parameter_152) + del matmul_22, parameter_152 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_2 = paddle._C_ops.gelu(add_26, False) + del add_26 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_23 = paddle._C_ops.matmul(gelu_2, parameter_151, False, False) + del gelu_2, parameter_151 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_27 = paddle._C_ops.add(matmul_23, parameter_150) + del matmul_23, parameter_150 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_27, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_27 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_28 = paddle._C_ops.add(layer_norm_15, dropout_18) + del dropout_18, layer_norm_15 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_18, layer_norm_19, layer_norm_20 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_28, parameter_147, parameter_146, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_28, parameter_146, parameter_147 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_24 = paddle._C_ops.matmul(layer_norm_18, parameter_145, False, False) + del parameter_145 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_29 = paddle._C_ops.add(matmul_24, parameter_144) + del matmul_24, parameter_144 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(add_29, full_int_array_1) + del add_29 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_18, parameter_143, False, False) + del parameter_143 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_30 = paddle._C_ops.add(matmul_25, parameter_142) + del matmul_25, parameter_142 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_18, parameter_141, False, False) + del parameter_141 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_31 = paddle._C_ops.add(matmul_26, parameter_140) + del matmul_26, parameter_140 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(add_30, full_int_array_1) + del add_30 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(add_31, full_int_array_1) + del add_31 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_4 = paddle._C_ops.scale(transpose_12, full_5, float("0"), True) + del transpose_12 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_27 = paddle._C_ops.matmul(scale_4, transpose_13, False, True) + del scale_4, transpose_13 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_32 = paddle._C_ops.add(matmul_27, unsqueeze_0) + del matmul_27 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_3 = paddle._C_ops.softmax(add_32, -1) + del add_32 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_28 = paddle._C_ops.matmul(dropout_20, transpose_14, False, False) + del dropout_20, transpose_14 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_15 = paddle._C_ops.transpose(matmul_28, [0, 2, 1, 3]) + del matmul_28 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_2) + del transpose_15 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_29 = paddle._C_ops.matmul(reshape_15, parameter_139, False, False) + del parameter_139, reshape_15 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_33 = paddle._C_ops.add(matmul_29, parameter_138) + del matmul_29, parameter_138 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_33, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_33 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_34 = paddle._C_ops.add(layer_norm_18, dropout_22) + del dropout_22, layer_norm_18 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_21, layer_norm_22, layer_norm_23 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_34, parameter_133, parameter_132, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_34, parameter_132, parameter_133 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_21, parameter_137, False, False) + del parameter_137 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_35 = paddle._C_ops.add(matmul_30, parameter_136) + del matmul_30, parameter_136 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_3 = paddle._C_ops.gelu(add_35, False) + del add_35 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_31 = paddle._C_ops.matmul(gelu_3, parameter_135, False, False) + del gelu_3, parameter_135 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_36 = paddle._C_ops.add(matmul_31, parameter_134) + del matmul_31, parameter_134 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_36, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_36 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_37 = paddle._C_ops.add(layer_norm_21, dropout_24) + del dropout_24, layer_norm_21 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_24, layer_norm_25, layer_norm_26 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_37, parameter_131, parameter_130, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_37, parameter_130, parameter_131 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_32 = paddle._C_ops.matmul(layer_norm_24, parameter_129, False, False) + del parameter_129 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_38 = paddle._C_ops.add(matmul_32, parameter_128) + del matmul_32, parameter_128 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(add_38, full_int_array_1) + del add_38 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_16 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_33 = paddle._C_ops.matmul(layer_norm_24, parameter_127, False, False) + del parameter_127 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_39 = paddle._C_ops.add(matmul_33, parameter_126) + del matmul_33, parameter_126 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_24, parameter_125, False, False) + del parameter_125 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_40 = paddle._C_ops.add(matmul_34, parameter_124) + del matmul_34, parameter_124 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(add_39, full_int_array_1) + del add_39 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_40, full_int_array_1) + del add_40 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_5 = paddle._C_ops.scale(transpose_16, full_5, float("0"), True) + del transpose_16 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_35 = paddle._C_ops.matmul(scale_5, transpose_17, False, True) + del scale_5, transpose_17 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_41 = paddle._C_ops.add(matmul_35, unsqueeze_0) + del matmul_35 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_4 = paddle._C_ops.softmax(add_41, -1) + del add_41 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_36 = paddle._C_ops.matmul(dropout_26, transpose_18, False, False) + del dropout_26, transpose_18 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_19 = paddle._C_ops.transpose(matmul_36, [0, 2, 1, 3]) + del matmul_36 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_19, full_int_array_2) + del transpose_19 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_37 = paddle._C_ops.matmul(reshape_19, parameter_123, False, False) + del parameter_123, reshape_19 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_42 = paddle._C_ops.add(matmul_37, parameter_122) + del matmul_37, parameter_122 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_42, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_42 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_43 = paddle._C_ops.add(layer_norm_24, dropout_28) + del dropout_28, layer_norm_24 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_27, layer_norm_28, layer_norm_29 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_43, parameter_117, parameter_116, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_43, parameter_116, parameter_117 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_38 = paddle._C_ops.matmul(layer_norm_27, parameter_121, False, False) + del parameter_121 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_44 = paddle._C_ops.add(matmul_38, parameter_120) + del matmul_38, parameter_120 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_4 = paddle._C_ops.gelu(add_44, False) + del add_44 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_39 = paddle._C_ops.matmul(gelu_4, parameter_119, False, False) + del gelu_4, parameter_119 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_45 = paddle._C_ops.add(matmul_39, parameter_118) + del matmul_39, parameter_118 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_45, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_45 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_46 = paddle._C_ops.add(layer_norm_27, dropout_30) + del dropout_30, layer_norm_27 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_30, layer_norm_31, layer_norm_32 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_46, parameter_115, parameter_114, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_46, parameter_114, parameter_115 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_30, parameter_113, False, False) + del parameter_113 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_47 = paddle._C_ops.add(matmul_40, parameter_112) + del matmul_40, parameter_112 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(add_47, full_int_array_1) + del add_47 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_20 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_41 = paddle._C_ops.matmul(layer_norm_30, parameter_111, False, False) + del parameter_111 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_48 = paddle._C_ops.add(matmul_41, parameter_110) + del matmul_41, parameter_110 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_30, parameter_109, False, False) + del parameter_109 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_49 = paddle._C_ops.add(matmul_42, parameter_108) + del matmul_42, parameter_108 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(add_48, full_int_array_1) + del add_48 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(add_49, full_int_array_1) + del add_49 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_6 = paddle._C_ops.scale(transpose_20, full_5, float("0"), True) + del transpose_20 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_43 = paddle._C_ops.matmul(scale_6, transpose_21, False, True) + del scale_6, transpose_21 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_50 = paddle._C_ops.add(matmul_43, unsqueeze_0) + del matmul_43 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_5 = paddle._C_ops.softmax(add_50, -1) + del add_50 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_44 = paddle._C_ops.matmul(dropout_32, transpose_22, False, False) + del dropout_32, transpose_22 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_23 = paddle._C_ops.transpose(matmul_44, [0, 2, 1, 3]) + del matmul_44 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_23, full_int_array_2) + del transpose_23 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_45 = paddle._C_ops.matmul(reshape_23, parameter_107, False, False) + del parameter_107, reshape_23 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_51 = paddle._C_ops.add(matmul_45, parameter_106) + del matmul_45, parameter_106 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_51, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_51 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_52 = paddle._C_ops.add(layer_norm_30, dropout_34) + del dropout_34, layer_norm_30 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_33, layer_norm_34, layer_norm_35 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_52, parameter_101, parameter_100, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_52, parameter_100, parameter_101 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_46 = paddle._C_ops.matmul(layer_norm_33, parameter_105, False, False) + del parameter_105 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_53 = paddle._C_ops.add(matmul_46, parameter_104) + del matmul_46, parameter_104 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_5 = paddle._C_ops.gelu(add_53, False) + del add_53 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_47 = paddle._C_ops.matmul(gelu_5, parameter_103, False, False) + del gelu_5, parameter_103 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_54 = paddle._C_ops.add(matmul_47, parameter_102) + del matmul_47, parameter_102 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_54, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_54 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_55 = paddle._C_ops.add(layer_norm_33, dropout_36) + del dropout_36, layer_norm_33 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_36, layer_norm_37, layer_norm_38 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_55, parameter_99, parameter_98, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_55, parameter_98, parameter_99 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_48 = paddle._C_ops.matmul(layer_norm_36, parameter_97, False, False) + del parameter_97 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_56 = paddle._C_ops.add(matmul_48, parameter_96) + del matmul_48, parameter_96 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_56, full_int_array_1) + del add_56 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_24 = paddle._C_ops.transpose(reshape_24, [0, 2, 1, 3]) + del reshape_24 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(layer_norm_36, parameter_95, False, False) + del parameter_95 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_57 = paddle._C_ops.add(matmul_49, parameter_94) + del matmul_49, parameter_94 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_36, parameter_93, False, False) + del parameter_93 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_58 = paddle._C_ops.add(matmul_50, parameter_92) + del matmul_50, parameter_92 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_57, full_int_array_1) + del add_57 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_25 = paddle._C_ops.transpose(reshape_25, [0, 2, 1, 3]) + del reshape_25 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_58, full_int_array_1) + del add_58 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_26 = paddle._C_ops.transpose(reshape_26, [0, 2, 1, 3]) + del reshape_26 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_7 = paddle._C_ops.scale(transpose_24, full_5, float("0"), True) + del transpose_24 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_51 = paddle._C_ops.matmul(scale_7, transpose_25, False, True) + del scale_7, transpose_25 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_59 = paddle._C_ops.add(matmul_51, unsqueeze_0) + del matmul_51 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_6 = paddle._C_ops.softmax(add_59, -1) + del add_59 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_52 = paddle._C_ops.matmul(dropout_38, transpose_26, False, False) + del dropout_38, transpose_26 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_27 = paddle._C_ops.transpose(matmul_52, [0, 2, 1, 3]) + del matmul_52 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(transpose_27, full_int_array_2) + del transpose_27 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_53 = paddle._C_ops.matmul(reshape_27, parameter_91, False, False) + del parameter_91, reshape_27 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_60 = paddle._C_ops.add(matmul_53, parameter_90) + del matmul_53, parameter_90 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_60, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_60 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_61 = paddle._C_ops.add(layer_norm_36, dropout_40) + del dropout_40, layer_norm_36 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_39, layer_norm_40, layer_norm_41 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_61, parameter_85, parameter_84, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_61, parameter_84, parameter_85 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_54 = paddle._C_ops.matmul(layer_norm_39, parameter_89, False, False) + del parameter_89 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_62 = paddle._C_ops.add(matmul_54, parameter_88) + del matmul_54, parameter_88 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_6 = paddle._C_ops.gelu(add_62, False) + del add_62 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_55 = paddle._C_ops.matmul(gelu_6, parameter_87, False, False) + del gelu_6, parameter_87 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_63 = paddle._C_ops.add(matmul_55, parameter_86) + del matmul_55, parameter_86 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_63, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_63 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_64 = paddle._C_ops.add(layer_norm_39, dropout_42) + del dropout_42, layer_norm_39 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_42, layer_norm_43, layer_norm_44 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_64, parameter_83, parameter_82, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_64, parameter_82, parameter_83 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_56 = paddle._C_ops.matmul(layer_norm_42, parameter_81, False, False) + del parameter_81 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_65 = paddle._C_ops.add(matmul_56, parameter_80) + del matmul_56, parameter_80 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(add_65, full_int_array_1) + del add_65 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_28 = paddle._C_ops.transpose(reshape_28, [0, 2, 1, 3]) + del reshape_28 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_57 = paddle._C_ops.matmul(layer_norm_42, parameter_79, False, False) + del parameter_79 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_66 = paddle._C_ops.add(matmul_57, parameter_78) + del matmul_57, parameter_78 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_58 = paddle._C_ops.matmul(layer_norm_42, parameter_77, False, False) + del parameter_77 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_67 = paddle._C_ops.add(matmul_58, parameter_76) + del matmul_58, parameter_76 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(add_66, full_int_array_1) + del add_66 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_29 = paddle._C_ops.transpose(reshape_29, [0, 2, 1, 3]) + del reshape_29 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_67, full_int_array_1) + del add_67 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_30 = paddle._C_ops.transpose(reshape_30, [0, 2, 1, 3]) + del reshape_30 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_8 = paddle._C_ops.scale(transpose_28, full_5, float("0"), True) + del transpose_28 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_59 = paddle._C_ops.matmul(scale_8, transpose_29, False, True) + del scale_8, transpose_29 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_68 = paddle._C_ops.add(matmul_59, unsqueeze_0) + del matmul_59 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_7 = paddle._C_ops.softmax(add_68, -1) + del add_68 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_60 = paddle._C_ops.matmul(dropout_44, transpose_30, False, False) + del dropout_44, transpose_30 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_31 = paddle._C_ops.transpose(matmul_60, [0, 2, 1, 3]) + del matmul_60 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_31 = paddle._C_ops.reshape(transpose_31, full_int_array_2) + del transpose_31 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_61 = paddle._C_ops.matmul(reshape_31, parameter_75, False, False) + del parameter_75, reshape_31 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_69 = paddle._C_ops.add(matmul_61, parameter_74) + del matmul_61, parameter_74 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_69, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_69 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_70 = paddle._C_ops.add(layer_norm_42, dropout_46) + del dropout_46, layer_norm_42 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_45, layer_norm_46, layer_norm_47 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_70, parameter_69, parameter_68, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_70, parameter_68, parameter_69 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_62 = paddle._C_ops.matmul(layer_norm_45, parameter_73, False, False) + del parameter_73 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_71 = paddle._C_ops.add(matmul_62, parameter_72) + del matmul_62, parameter_72 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_7 = paddle._C_ops.gelu(add_71, False) + del add_71 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_63 = paddle._C_ops.matmul(gelu_7, parameter_71, False, False) + del gelu_7, parameter_71 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_72 = paddle._C_ops.add(matmul_63, parameter_70) + del matmul_63, parameter_70 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_72, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_72 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_73 = paddle._C_ops.add(layer_norm_45, dropout_48) + del dropout_48, layer_norm_45 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_48, layer_norm_49, layer_norm_50 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_67, parameter_66, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_73, parameter_66, parameter_67 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_64 = paddle._C_ops.matmul(layer_norm_48, parameter_65, False, False) + del parameter_65 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_74 = paddle._C_ops.add(matmul_64, parameter_64) + del matmul_64, parameter_64 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(add_74, full_int_array_1) + del add_74 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_32 = paddle._C_ops.transpose(reshape_32, [0, 2, 1, 3]) + del reshape_32 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_48, parameter_63, False, False) + del parameter_63 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_75 = paddle._C_ops.add(matmul_65, parameter_62) + del matmul_65, parameter_62 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_48, parameter_61, False, False) + del parameter_61 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_76 = paddle._C_ops.add(matmul_66, parameter_60) + del matmul_66, parameter_60 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(add_75, full_int_array_1) + del add_75 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_33 = paddle._C_ops.transpose(reshape_33, [0, 2, 1, 3]) + del reshape_33 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_76, full_int_array_1) + del add_76 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_34 = paddle._C_ops.transpose(reshape_34, [0, 2, 1, 3]) + del reshape_34 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_9 = paddle._C_ops.scale(transpose_32, full_5, float("0"), True) + del transpose_32 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_67 = paddle._C_ops.matmul(scale_9, transpose_33, False, True) + del scale_9, transpose_33 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_77 = paddle._C_ops.add(matmul_67, unsqueeze_0) + del matmul_67 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_8 = paddle._C_ops.softmax(add_77, -1) + del add_77 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_50, dropout_51 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_68 = paddle._C_ops.matmul(dropout_50, transpose_34, False, False) + del dropout_50, transpose_34 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_35 = paddle._C_ops.transpose(matmul_68, [0, 2, 1, 3]) + del matmul_68 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_35 = paddle._C_ops.reshape(transpose_35, full_int_array_2) + del transpose_35 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_69 = paddle._C_ops.matmul(reshape_35, parameter_59, False, False) + del parameter_59, reshape_35 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_78 = paddle._C_ops.add(matmul_69, parameter_58) + del matmul_69, parameter_58 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_52, dropout_53 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_78, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_78 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_79 = paddle._C_ops.add(layer_norm_48, dropout_52) + del dropout_52, layer_norm_48 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_51, layer_norm_52, layer_norm_53 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_79, parameter_53, parameter_52, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_79, parameter_52, parameter_53 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_51, parameter_57, False, False) + del parameter_57 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_80 = paddle._C_ops.add(matmul_70, parameter_56) + del matmul_70, parameter_56 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_8 = paddle._C_ops.gelu(add_80, False) + del add_80 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_71 = paddle._C_ops.matmul(gelu_8, parameter_55, False, False) + del gelu_8, parameter_55 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_81 = paddle._C_ops.add(matmul_71, parameter_54) + del matmul_71, parameter_54 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_54, dropout_55 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_81, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_81 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_82 = paddle._C_ops.add(layer_norm_51, dropout_54) + del dropout_54, layer_norm_51 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_54, layer_norm_55, layer_norm_56 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_82, parameter_51, parameter_50, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_82, parameter_50, parameter_51 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_72 = paddle._C_ops.matmul(layer_norm_54, parameter_49, False, False) + del parameter_49 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_83 = paddle._C_ops.add(matmul_72, parameter_48) + del matmul_72, parameter_48 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(add_83, full_int_array_1) + del add_83 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_36 = paddle._C_ops.transpose(reshape_36, [0, 2, 1, 3]) + del reshape_36 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_73 = paddle._C_ops.matmul(layer_norm_54, parameter_47, False, False) + del parameter_47 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_84 = paddle._C_ops.add(matmul_73, parameter_46) + del matmul_73, parameter_46 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_74 = paddle._C_ops.matmul(layer_norm_54, parameter_45, False, False) + del parameter_45 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_85 = paddle._C_ops.add(matmul_74, parameter_44) + del matmul_74, parameter_44 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_84, full_int_array_1) + del add_84 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_37 = paddle._C_ops.transpose(reshape_37, [0, 2, 1, 3]) + del reshape_37 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(add_85, full_int_array_1) + del add_85 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_38 = paddle._C_ops.transpose(reshape_38, [0, 2, 1, 3]) + del reshape_38 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_10 = paddle._C_ops.scale(transpose_36, full_5, float("0"), True) + del transpose_36 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_75 = paddle._C_ops.matmul(scale_10, transpose_37, False, True) + del scale_10, transpose_37 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_86 = paddle._C_ops.add(matmul_75, unsqueeze_0) + del matmul_75 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_9 = paddle._C_ops.softmax(add_86, -1) + del add_86 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_56, dropout_57 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_76 = paddle._C_ops.matmul(dropout_56, transpose_38, False, False) + del dropout_56, transpose_38 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_39 = paddle._C_ops.transpose(matmul_76, [0, 2, 1, 3]) + del matmul_76 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(transpose_39, full_int_array_2) + del transpose_39 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_77 = paddle._C_ops.matmul(reshape_39, parameter_43, False, False) + del parameter_43, reshape_39 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_87 = paddle._C_ops.add(matmul_77, parameter_42) + del matmul_77, parameter_42 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_58, dropout_59 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_87, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_87 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_88 = paddle._C_ops.add(layer_norm_54, dropout_58) + del dropout_58, layer_norm_54 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_57, layer_norm_58, layer_norm_59 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_88, parameter_37, parameter_36, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_88, parameter_36, parameter_37 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_78 = paddle._C_ops.matmul(layer_norm_57, parameter_41, False, False) + del parameter_41 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_89 = paddle._C_ops.add(matmul_78, parameter_40) + del matmul_78, parameter_40 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_9 = paddle._C_ops.gelu(add_89, False) + del add_89 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_79 = paddle._C_ops.matmul(gelu_9, parameter_39, False, False) + del gelu_9, parameter_39 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_90 = paddle._C_ops.add(matmul_79, parameter_38) + del matmul_79, parameter_38 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_60, dropout_61 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_90, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_90 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_91 = paddle._C_ops.add(layer_norm_57, dropout_60) + del dropout_60, layer_norm_57 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_60, layer_norm_61, layer_norm_62 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_91, parameter_35, parameter_34, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_91, parameter_34, parameter_35 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_80 = paddle._C_ops.matmul(layer_norm_60, parameter_33, False, False) + del parameter_33 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_92 = paddle._C_ops.add(matmul_80, parameter_32) + del matmul_80, parameter_32 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(add_92, full_int_array_1) + del add_92 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_40 = paddle._C_ops.transpose(reshape_40, [0, 2, 1, 3]) + del reshape_40 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_81 = paddle._C_ops.matmul(layer_norm_60, parameter_31, False, False) + del parameter_31 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_93 = paddle._C_ops.add(matmul_81, parameter_30) + del matmul_81, parameter_30 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_82 = paddle._C_ops.matmul(layer_norm_60, parameter_29, False, False) + del parameter_29 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_94 = paddle._C_ops.add(matmul_82, parameter_28) + del matmul_82, parameter_28 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_93, full_int_array_1) + del add_93 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_41 = paddle._C_ops.transpose(reshape_41, [0, 2, 1, 3]) + del reshape_41 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(add_94, full_int_array_1) + del add_94 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_42 = paddle._C_ops.transpose(reshape_42, [0, 2, 1, 3]) + del reshape_42 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_11 = paddle._C_ops.scale(transpose_40, full_5, float("0"), True) + del transpose_40 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_83 = paddle._C_ops.matmul(scale_11, transpose_41, False, True) + del scale_11, transpose_41 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_95 = paddle._C_ops.add(matmul_83, unsqueeze_0) + del matmul_83 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_10 = paddle._C_ops.softmax(add_95, -1) + del add_95 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_62, dropout_63 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_84 = paddle._C_ops.matmul(dropout_62, transpose_42, False, False) + del dropout_62, transpose_42 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_43 = paddle._C_ops.transpose(matmul_84, [0, 2, 1, 3]) + del matmul_84 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_43, full_int_array_2) + del transpose_43 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_85 = paddle._C_ops.matmul(reshape_43, parameter_27, False, False) + del parameter_27, reshape_43 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_96 = paddle._C_ops.add(matmul_85, parameter_26) + del matmul_85, parameter_26 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_64, dropout_65 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_96, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_96 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_97 = paddle._C_ops.add(layer_norm_60, dropout_64) + del dropout_64, layer_norm_60 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_63, layer_norm_64, layer_norm_65 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_97, parameter_21, parameter_20, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_97, parameter_20, parameter_21 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_86 = paddle._C_ops.matmul(layer_norm_63, parameter_25, False, False) + del parameter_25 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_98 = paddle._C_ops.add(matmul_86, parameter_24) + del matmul_86, parameter_24 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_10 = paddle._C_ops.gelu(add_98, False) + del add_98 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_87 = paddle._C_ops.matmul(gelu_10, parameter_23, False, False) + del gelu_10, parameter_23 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_99 = paddle._C_ops.add(matmul_87, parameter_22) + del matmul_87, parameter_22 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_66, dropout_67 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_99, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_99 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_100 = paddle._C_ops.add(layer_norm_63, dropout_66) + del dropout_66, layer_norm_63 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_66, layer_norm_67, layer_norm_68 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_100, parameter_19, parameter_18, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_100, parameter_18, parameter_19 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_88 = paddle._C_ops.matmul(layer_norm_66, parameter_17, False, False) + del parameter_17 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_101 = paddle._C_ops.add(matmul_88, parameter_16) + del matmul_88, parameter_16 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_101, full_int_array_1) + del add_101 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_44 = paddle._C_ops.transpose(reshape_44, [0, 2, 1, 3]) + del reshape_44 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_89 = paddle._C_ops.matmul(layer_norm_66, parameter_15, False, False) + del parameter_15 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_102 = paddle._C_ops.add(matmul_89, parameter_14) + del matmul_89, parameter_14 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_66, parameter_13, False, False) + del parameter_13 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_103 = paddle._C_ops.add(matmul_90, parameter_12) + del matmul_90, parameter_12 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(add_102, full_int_array_1) + del add_102 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_45 = paddle._C_ops.transpose(reshape_45, [0, 2, 1, 3]) + del reshape_45 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(add_103, full_int_array_1) + del add_103, full_int_array_1 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_46 = paddle._C_ops.transpose(reshape_46, [0, 2, 1, 3]) + del reshape_46 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_12 = paddle._C_ops.scale(transpose_44, full_5, float("0"), True) + del full_5, transpose_44 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_91 = paddle._C_ops.matmul(scale_12, transpose_45, False, True) + del scale_12, transpose_45 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_104 = paddle._C_ops.add(matmul_91, unsqueeze_0) + del matmul_91, unsqueeze_0 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_11 = paddle._C_ops.softmax(add_104, -1) + del add_104 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_68, dropout_69 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_92 = paddle._C_ops.matmul(dropout_68, transpose_46, False, False) + del dropout_68, transpose_46 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_47 = paddle._C_ops.transpose(matmul_92, [0, 2, 1, 3]) + del matmul_92 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_47 = paddle._C_ops.reshape(transpose_47, full_int_array_2) + del full_int_array_2, transpose_47 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_93 = paddle._C_ops.matmul(reshape_47, parameter_11, False, False) + del parameter_11, reshape_47 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_105 = paddle._C_ops.add(matmul_93, parameter_10) + del matmul_93, parameter_10 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_70, dropout_71 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_105, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_105 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_106 = paddle._C_ops.add(layer_norm_66, dropout_70) + del dropout_70, layer_norm_66 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_69, layer_norm_70, layer_norm_71 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_106, parameter_5, parameter_4, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_106, parameter_4, parameter_5 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_94 = paddle._C_ops.matmul(layer_norm_69, parameter_9, False, False) + del parameter_9 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_107 = paddle._C_ops.add(matmul_94, parameter_8) + del matmul_94, parameter_8 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_11 = paddle._C_ops.gelu(add_107, False) + del add_107 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_95 = paddle._C_ops.matmul(gelu_11, parameter_7, False, False) + del gelu_11, parameter_7 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_108 = paddle._C_ops.add(matmul_95, parameter_6) + del matmul_95, parameter_6 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_72, dropout_73 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_108, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_108, full_4 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_109 = paddle._C_ops.add(layer_norm_69, dropout_72) + del dropout_72, layer_norm_69 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_72, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_109, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_109, parameter_2, parameter_3 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_4 = [1] + + # pd_op.slice: (1x768xf32) <- (1x11x768xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + layer_norm_72, [1], full_int_array_3, full_int_array_4, [1], [1] + ) + del full_int_array_3, full_int_array_4 + + # pd_op.matmul: (1x768xf32) <- (1x768xf32, 768x768xf32) + matmul_96 = paddle._C_ops.matmul(slice_0, parameter_1, False, False) + del parameter_1, slice_0 + + # pd_op.add: (1x768xf32) <- (1x768xf32, 768xf32) + add_110 = paddle._C_ops.add(matmul_96, parameter_0) + del matmul_96, parameter_0 + + # pd_op.tanh: (1x768xf32) <- (1x768xf32) + tanh_0 = paddle._C_ops.tanh(add_110) + del add_110, layer_norm_72 + + return tanh_0 diff --git a/paddle_samples/PaddleNLP/ernie-gram-zh/weight_meta.py b/paddle_samples/PaddleNLP/ernie-gram-zh/weight_meta.py new file mode 100644 index 000000000..58120ef98 --- /dev/null +++ b/paddle_samples/PaddleNLP/ernie-gram-zh/weight_meta.py @@ -0,0 +1,1770 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0888121") + max_val = float("0.0880472") + mean = float("1.62074e-05") + std = float("0.0199506") + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0997563") + max_val = float("0.0935391") + mean = float("-1.54512e-05") + std = float("0.0199899") + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0972375") + max_val = float("0.100367") + mean = float("5.26718e-06") + std = float("0.0200041") + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.115282") + max_val = float("0.0880326") + mean = float("-5.66501e-06") + std = float("0.0199711") + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0905731") + max_val = float("0.0906429") + mean = float("-7.73268e-06") + std = float("0.0200481") + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0900394") + max_val = float("0.0929846") + mean = float("-3.38269e-05") + std = float("0.0199784") + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0915604") + max_val = float("0.0957816") + mean = float("-1.35644e-06") + std = float("0.0200033") + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0986318") + max_val = float("0.0994095") + mean = float("-1.60123e-06") + std = float("0.0199932") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.104593") + max_val = float("0.102425") + mean = float("-8.65881e-06") + std = float("0.0200152") + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0901251") + max_val = float("0.096399") + mean = float("-3.57921e-05") + std = float("0.0200069") + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0928503") + max_val = float("0.0939488") + mean = float("-1.21843e-05") + std = float("0.0199934") + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0972346") + max_val = float("0.102535") + mean = float("-1.21455e-06") + std = float("0.0199838") + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0905138") + max_val = float("0.100944") + mean = float("3.78929e-05") + std = float("0.019979") + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.101564") + max_val = float("0.0956425") + mean = float("8.55257e-06") + std = float("0.0200096") + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.102917") + max_val = float("0.102208") + mean = float("7.95049e-07") + std = float("0.0200008") + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0900881") + max_val = float("0.10106") + mean = float("-6.21459e-06") + std = float("0.0199999") + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0919531") + max_val = float("0.0908819") + mean = float("-1.52962e-05") + std = float("0.0200191") + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0957036") + max_val = float("0.09592") + mean = float("2.81344e-05") + std = float("0.0199851") + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.100397") + max_val = float("0.0971732") + mean = float("-1.56729e-05") + std = float("0.0200039") + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0972447") + max_val = float("0.108281") + mean = float("-1.1584e-05") + std = float("0.019999") + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.106463") + max_val = float("0.102685") + mean = float("-2.00892e-05") + std = float("0.0200088") + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0921487") + max_val = float("0.0978615") + mean = float("-1.75836e-05") + std = float("0.0200032") + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0896329") + max_val = float("0.0940948") + mean = float("2.10358e-05") + std = float("0.0200042") + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0988143") + max_val = float("0.088909") + mean = float("-2.55087e-05") + std = float("0.0200006") + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0924013") + max_val = float("0.0915012") + mean = float("5.44326e-05") + std = float("0.0200392") + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0974268") + max_val = float("0.0966983") + mean = float("-1.30598e-05") + std = float("0.0199984") + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.110017") + max_val = float("0.0990718") + mean = float("-9.77441e-06") + std = float("0.0200027") + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0880099") + max_val = float("0.107238") + mean = float("-3.18655e-05") + std = float("0.020008") + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0941442") + max_val = float("0.0925118") + mean = float("6.58423e-05") + std = float("0.0200186") + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0925122") + max_val = float("0.0912297") + mean = float("4.75043e-05") + std = float("0.0200069") + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0926295") + max_val = float("0.101095") + mean = float("-2.29628e-05") + std = float("0.0199769") + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.103857") + max_val = float("0.1023") + mean = float("-1.35451e-05") + std = float("0.0199918") + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.103046") + max_val = float("0.101894") + mean = float("-9.36662e-06") + std = float("0.0199897") + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0910526") + max_val = float("0.0908537") + mean = float("3.97507e-06") + std = float("0.0199922") + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0950187") + max_val = float("0.104339") + mean = float("-2.14693e-05") + std = float("0.0200418") + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0924853") + max_val = float("0.100047") + mean = float("-4.1643e-05") + std = float("0.0199926") + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0956022") + max_val = float("0.0912834") + mean = float("-1.88756e-05") + std = float("0.0199878") + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.10136") + max_val = float("0.0992792") + mean = float("-5.86826e-06") + std = float("0.0199928") + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.103741") + max_val = float("0.10835") + mean = float("1.3642e-05") + std = float("0.0199954") + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0964836") + max_val = float("0.0895779") + mean = float("-7.31771e-06") + std = float("0.0200275") + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0910589") + max_val = float("0.0922254") + mean = float("-2.24479e-06") + std = float("0.0200163") + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.105058") + max_val = float("0.0986385") + mean = float("2.41645e-05") + std = float("0.0200163") + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.089869") + max_val = float("0.106751") + mean = float("1.38282e-05") + std = float("0.0200116") + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0988304") + max_val = float("0.100557") + mean = float("-9.2662e-06") + std = float("0.0199967") + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.10579") + max_val = float("0.0941784") + mean = float("-2.67396e-06") + std = float("0.0199938") + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.107744") + max_val = float("0.0898887") + mean = float("3.66545e-05") + std = float("0.0200133") + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0899738") + max_val = float("0.0958153") + mean = float("2.50443e-05") + std = float("0.0199915") + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0950905") + max_val = float("0.0946626") + mean = float("-3.89119e-05") + std = float("0.0199732") + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0953153") + max_val = float("0.0918745") + mean = float("-9.85787e-06") + std = float("0.0199888") + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0947203") + max_val = float("0.0976485") + mean = float("4.51611e-06") + std = float("0.0199753") + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0996598") + max_val = float("0.098573") + mean = float("8.31306e-06") + std = float("0.0200164") + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0981243") + max_val = float("0.0921272") + mean = float("1.99392e-05") + std = float("0.0199899") + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0977595") + max_val = float("0.0908841") + mean = float("2.49358e-05") + std = float("0.0199684") + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0940697") + max_val = float("0.10518") + mean = float("-1.01569e-05") + std = float("0.0200063") + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.105592") + max_val = float("0.0944129") + mean = float("-2.57319e-05") + std = float("0.0199786") + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0995278") + max_val = float("0.100069") + mean = float("-1.96541e-05") + std = float("0.0199956") + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0989112") + max_val = float("0.0925988") + mean = float("1.81555e-06") + std = float("0.0200081") + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0955542") + max_val = float("0.092858") + mean = float("-8.00356e-06") + std = float("0.0200386") + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0894128") + max_val = float("0.0963778") + mean = float("3.06534e-05") + std = float("0.0200003") + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0925576") + max_val = float("0.101936") + mean = float("-4.76003e-06") + std = float("0.0199842") + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0927511") + max_val = float("0.0938363") + mean = float("-3.26922e-05") + std = float("0.0199862") + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0963259") + max_val = float("0.104654") + mean = float("7.02479e-06") + std = float("0.0200105") + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.099152") + max_val = float("0.103503") + mean = float("1.43268e-07") + std = float("0.020005") + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0946687") + max_val = float("0.0940675") + mean = float("-5.76674e-06") + std = float("0.0200095") + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.106899") + max_val = float("0.0900412") + mean = float("-4.23075e-05") + std = float("0.0200222") + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0962878") + max_val = float("0.0921862") + mean = float("4.48585e-06") + std = float("0.0199581") + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.088952") + max_val = float("0.088294") + mean = float("-1.43014e-05") + std = float("0.0199625") + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0996871") + max_val = float("0.0975421") + mean = float("-1.29844e-05") + std = float("0.0200118") + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0955995") + max_val = float("0.0990562") + mean = float("5.10856e-06") + std = float("0.0199808") + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.090602") + max_val = float("0.0917768") + mean = float("-3.8517e-05") + std = float("0.0200176") + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.109577") + max_val = float("0.0878127") + mean = float("1.1274e-05") + std = float("0.0199866") + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0989113") + max_val = float("0.0862598") + mean = float("7.88497e-06") + std = float("0.0199618") + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0950673") + max_val = float("0.10026") + mean = float("-3.89659e-05") + std = float("0.0199851") + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [2, 768] + dtype = "float32" + min_val = float("-0.0673018") + max_val = float("0.0671388") + mean = float("0.000128192") + std = float("0.0202725") + data = None + + +class Program_weight_tensor_parameter_197: + name = "parameter_197" + shape = [512, 768] + dtype = "float32" + min_val = float("-0.0982403") + max_val = float("0.0916195") + mean = float("-3.67761e-06") + std = float("0.0199772") + data = None + + +class Program_weight_tensor_parameter_198: + name = "parameter_198" + shape = [18018, 768] + dtype = "float32" + min_val = float("-0.0986178") + max_val = float("0.108491") + mean = float("2.91324e-06") + std = float("0.0199986") + data = None diff --git a/paddle_samples/PaddleNLP/ernie-health-chinese/graph_hash.txt b/paddle_samples/PaddleNLP/ernie-health-chinese/graph_hash.txt new file mode 100644 index 000000000..bc320368e --- /dev/null +++ b/paddle_samples/PaddleNLP/ernie-health-chinese/graph_hash.txt @@ -0,0 +1 @@ +f9c18eae1b452cd264536e374d2221cc6717fd9019f0fb6f92b722d9a7f2b98b \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/ernie-health-chinese/graph_net.json b/paddle_samples/PaddleNLP/ernie-health-chinese/graph_net.json new file mode 100644 index 000000000..e6d0c03d7 --- /dev/null +++ b/paddle_samples/PaddleNLP/ernie-health-chinese/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "ernie-health-chinese", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/ernie-health-chinese/input_meta.py b/paddle_samples/PaddleNLP/ernie-health-chinese/input_meta.py new file mode 100644 index 000000000..bf0849df7 --- /dev/null +++ b/paddle_samples/PaddleNLP/ernie-health-chinese/input_meta.py @@ -0,0 +1,12 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 11] + dtype = "int64" + data = [2, 1034, 1435, 349, 29, 738, 244, 1436, 5273, 117, 3] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 11] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/ernie-health-chinese/model.py b/paddle_samples/PaddleNLP/ernie-health-chinese/model.py new file mode 100644 index 000000000..0cce867e4 --- /dev/null +++ b/paddle_samples/PaddleNLP/ernie-health-chinese/model.py @@ -0,0 +1,2201 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + parameter_194, + parameter_195, + parameter_196, + data_0, + data_1, + ): + # pd_op.full: (xi64) <- () + full_0 = paddle._C_ops.full( + [], float("0"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.equal: (1x11xb) <- (1x11xi64, xi64) + equal_0 = paddle._C_ops.equal(data_0, full_0) + del full_0 + + # pd_op.cast: (1x11xf32) <- (1x11xb) + cast_0 = paddle._C_ops.cast(equal_0, paddle.float32) + del equal_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x11xf32) <- (1x11xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_1, float("0"), True) + del cast_0, full_1 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_0 = [1, 2] + + # pd_op.unsqueeze: (1x1x1x11xf32) <- (1x11xf32, 2xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(scale_0, full_int_array_0) + del full_int_array_0, scale_0 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.full_like: (1x11xi64) <- (1x11xi64, 1xf32) + full_like_0 = paddle._C_ops.full_like( + data_0, full_2, paddle.int64, paddle.framework._current_expected_place() + ) + del full_2 + + # pd_op.full: (1xi32) <- () + full_3 = paddle._C_ops.full( + [1], float("-1"), paddle.int32, paddle.core.CPUPlace() + ) + + # pd_op.cumsum: (1x11xi64) <- (1x11xi64, 1xi32) + cumsum_0 = paddle._C_ops.cumsum(full_like_0, full_3, False, False, False) + del full_3 + + # pd_op.subtract: (1x11xi64) <- (1x11xi64, 1x11xi64) + subtract_0 = paddle._C_ops.subtract(cumsum_0, full_like_0) + del cumsum_0, full_like_0 + + # pd_op.embedding: (1x11x768xf32) <- (1x11xi64, 22608x768xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_196, -1, False) + del data_0, parameter_196 + + # pd_op.embedding: (1x11x768xf32) <- (1x11xi64, 512x768xf32) + embedding_1 = paddle._C_ops.embedding(subtract_0, parameter_195, -1, False) + del parameter_195, subtract_0 + + # pd_op.embedding: (1x11x768xf32) <- (1x11xi64, 2x768xf32) + embedding_2 = paddle._C_ops.embedding(data_1, parameter_194, -1, False) + del data_1, parameter_194 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_1 = paddle._C_ops.add(add_0, embedding_2) + del add_0, embedding_2 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_1, layer_norm_2, layer_norm_3 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_1, parameter_193, parameter_192, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_1, parameter_192, parameter_193 + + # pd_op.full: (1xf32) <- () + full_4 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + layer_norm_1, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del layer_norm_1 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_0 = paddle._C_ops.matmul(dropout_0, parameter_191, False, False) + del parameter_191 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_2 = paddle._C_ops.add(matmul_0, parameter_190) + del matmul_0, parameter_190 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_1 = [0, 0, 12, 64] + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(add_2, full_int_array_1) + del add_2 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_1 = paddle._C_ops.matmul(dropout_0, parameter_189, False, False) + del parameter_189 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_3 = paddle._C_ops.add(matmul_1, parameter_188) + del matmul_1, parameter_188 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_2 = paddle._C_ops.matmul(dropout_0, parameter_187, False, False) + del parameter_187 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_4 = paddle._C_ops.add(matmul_2, parameter_186) + del matmul_2, parameter_186 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(add_3, full_int_array_1) + del add_3 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(add_4, full_int_array_1) + del add_4 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.full: (1xf32) <- () + full_5 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_1 = paddle._C_ops.scale(transpose_0, full_5, float("0"), True) + del transpose_0 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_3 = paddle._C_ops.matmul(scale_1, transpose_1, False, True) + del scale_1, transpose_1 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_5 = paddle._C_ops.add(matmul_3, unsqueeze_0) + del matmul_3 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_0 = paddle._C_ops.softmax(add_5, -1) + del add_5 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_4 = paddle._C_ops.matmul(dropout_2, transpose_2, False, False) + del dropout_2, transpose_2 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_3 = paddle._C_ops.transpose(matmul_4, [0, 2, 1, 3]) + del matmul_4 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_2 = [0, 0, 768] + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_2) + del transpose_3 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_5 = paddle._C_ops.matmul(reshape_3, parameter_185, False, False) + del parameter_185, reshape_3 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_6 = paddle._C_ops.add(matmul_5, parameter_184) + del matmul_5, parameter_184 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_6, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_6 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_7 = paddle._C_ops.add(dropout_0, dropout_4) + del dropout_0, dropout_4 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_4, layer_norm_5, layer_norm_6 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_7, parameter_179, parameter_178, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_7, parameter_178, parameter_179 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_6 = paddle._C_ops.matmul(layer_norm_4, parameter_183, False, False) + del parameter_183 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_8 = paddle._C_ops.add(matmul_6, parameter_182) + del matmul_6, parameter_182 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_0 = paddle._C_ops.gelu(add_8, False) + del add_8 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_7 = paddle._C_ops.matmul(gelu_0, parameter_181, False, False) + del gelu_0, parameter_181 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_9 = paddle._C_ops.add(matmul_7, parameter_180) + del matmul_7, parameter_180 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_9, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_9 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_10 = paddle._C_ops.add(layer_norm_4, dropout_6) + del dropout_6, layer_norm_4 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_7, layer_norm_8, layer_norm_9 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_10, parameter_177, parameter_176, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_10, parameter_176, parameter_177 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_8 = paddle._C_ops.matmul(layer_norm_7, parameter_175, False, False) + del parameter_175 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_11 = paddle._C_ops.add(matmul_8, parameter_174) + del matmul_8, parameter_174 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(add_11, full_int_array_1) + del add_11 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_9 = paddle._C_ops.matmul(layer_norm_7, parameter_173, False, False) + del parameter_173 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_12 = paddle._C_ops.add(matmul_9, parameter_172) + del matmul_9, parameter_172 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_10 = paddle._C_ops.matmul(layer_norm_7, parameter_171, False, False) + del parameter_171 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_13 = paddle._C_ops.add(matmul_10, parameter_170) + del matmul_10, parameter_170 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(add_12, full_int_array_1) + del add_12 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(add_13, full_int_array_1) + del add_13 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_2 = paddle._C_ops.scale(transpose_4, full_5, float("0"), True) + del transpose_4 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_11 = paddle._C_ops.matmul(scale_2, transpose_5, False, True) + del scale_2, transpose_5 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_14 = paddle._C_ops.add(matmul_11, unsqueeze_0) + del matmul_11 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_1 = paddle._C_ops.softmax(add_14, -1) + del add_14 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_12 = paddle._C_ops.matmul(dropout_8, transpose_6, False, False) + del dropout_8, transpose_6 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_7 = paddle._C_ops.transpose(matmul_12, [0, 2, 1, 3]) + del matmul_12 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_2) + del transpose_7 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_13 = paddle._C_ops.matmul(reshape_7, parameter_169, False, False) + del parameter_169, reshape_7 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_15 = paddle._C_ops.add(matmul_13, parameter_168) + del matmul_13, parameter_168 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_15, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_15 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_16 = paddle._C_ops.add(layer_norm_7, dropout_10) + del dropout_10, layer_norm_7 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_10, layer_norm_11, layer_norm_12 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_16, parameter_163, parameter_162, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_16, parameter_162, parameter_163 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_14 = paddle._C_ops.matmul(layer_norm_10, parameter_167, False, False) + del parameter_167 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_17 = paddle._C_ops.add(matmul_14, parameter_166) + del matmul_14, parameter_166 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_1 = paddle._C_ops.gelu(add_17, False) + del add_17 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_15 = paddle._C_ops.matmul(gelu_1, parameter_165, False, False) + del gelu_1, parameter_165 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_18 = paddle._C_ops.add(matmul_15, parameter_164) + del matmul_15, parameter_164 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_18, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_18 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_19 = paddle._C_ops.add(layer_norm_10, dropout_12) + del dropout_12, layer_norm_10 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_13, layer_norm_14, layer_norm_15 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_19, parameter_161, parameter_160, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_19, parameter_160, parameter_161 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_16 = paddle._C_ops.matmul(layer_norm_13, parameter_159, False, False) + del parameter_159 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_20 = paddle._C_ops.add(matmul_16, parameter_158) + del matmul_16, parameter_158 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(add_20, full_int_array_1) + del add_20 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_17 = paddle._C_ops.matmul(layer_norm_13, parameter_157, False, False) + del parameter_157 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_21 = paddle._C_ops.add(matmul_17, parameter_156) + del matmul_17, parameter_156 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_18 = paddle._C_ops.matmul(layer_norm_13, parameter_155, False, False) + del parameter_155 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_22 = paddle._C_ops.add(matmul_18, parameter_154) + del matmul_18, parameter_154 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(add_21, full_int_array_1) + del add_21 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(add_22, full_int_array_1) + del add_22 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_3 = paddle._C_ops.scale(transpose_8, full_5, float("0"), True) + del transpose_8 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_19 = paddle._C_ops.matmul(scale_3, transpose_9, False, True) + del scale_3, transpose_9 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_23 = paddle._C_ops.add(matmul_19, unsqueeze_0) + del matmul_19 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_2 = paddle._C_ops.softmax(add_23, -1) + del add_23 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_20 = paddle._C_ops.matmul(dropout_14, transpose_10, False, False) + del dropout_14, transpose_10 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_11 = paddle._C_ops.transpose(matmul_20, [0, 2, 1, 3]) + del matmul_20 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_2) + del transpose_11 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_21 = paddle._C_ops.matmul(reshape_11, parameter_153, False, False) + del parameter_153, reshape_11 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_24 = paddle._C_ops.add(matmul_21, parameter_152) + del matmul_21, parameter_152 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_24, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_24 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_25 = paddle._C_ops.add(layer_norm_13, dropout_16) + del dropout_16, layer_norm_13 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_16, layer_norm_17, layer_norm_18 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_25, parameter_147, parameter_146, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_25, parameter_146, parameter_147 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_22 = paddle._C_ops.matmul(layer_norm_16, parameter_151, False, False) + del parameter_151 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_26 = paddle._C_ops.add(matmul_22, parameter_150) + del matmul_22, parameter_150 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_2 = paddle._C_ops.gelu(add_26, False) + del add_26 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_23 = paddle._C_ops.matmul(gelu_2, parameter_149, False, False) + del gelu_2, parameter_149 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_27 = paddle._C_ops.add(matmul_23, parameter_148) + del matmul_23, parameter_148 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_27, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_27 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_28 = paddle._C_ops.add(layer_norm_16, dropout_18) + del dropout_18, layer_norm_16 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_19, layer_norm_20, layer_norm_21 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_28, parameter_145, parameter_144, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_28, parameter_144, parameter_145 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_24 = paddle._C_ops.matmul(layer_norm_19, parameter_143, False, False) + del parameter_143 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_29 = paddle._C_ops.add(matmul_24, parameter_142) + del matmul_24, parameter_142 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(add_29, full_int_array_1) + del add_29 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_25 = paddle._C_ops.matmul(layer_norm_19, parameter_141, False, False) + del parameter_141 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_30 = paddle._C_ops.add(matmul_25, parameter_140) + del matmul_25, parameter_140 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_26 = paddle._C_ops.matmul(layer_norm_19, parameter_139, False, False) + del parameter_139 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_31 = paddle._C_ops.add(matmul_26, parameter_138) + del matmul_26, parameter_138 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(add_30, full_int_array_1) + del add_30 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(add_31, full_int_array_1) + del add_31 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_4 = paddle._C_ops.scale(transpose_12, full_5, float("0"), True) + del transpose_12 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_27 = paddle._C_ops.matmul(scale_4, transpose_13, False, True) + del scale_4, transpose_13 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_32 = paddle._C_ops.add(matmul_27, unsqueeze_0) + del matmul_27 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_3 = paddle._C_ops.softmax(add_32, -1) + del add_32 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_28 = paddle._C_ops.matmul(dropout_20, transpose_14, False, False) + del dropout_20, transpose_14 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_15 = paddle._C_ops.transpose(matmul_28, [0, 2, 1, 3]) + del matmul_28 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_2) + del transpose_15 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_29 = paddle._C_ops.matmul(reshape_15, parameter_137, False, False) + del parameter_137, reshape_15 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_33 = paddle._C_ops.add(matmul_29, parameter_136) + del matmul_29, parameter_136 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_33, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_33 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_34 = paddle._C_ops.add(layer_norm_19, dropout_22) + del dropout_22, layer_norm_19 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_22, layer_norm_23, layer_norm_24 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_34, parameter_131, parameter_130, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_34, parameter_130, parameter_131 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_30 = paddle._C_ops.matmul(layer_norm_22, parameter_135, False, False) + del parameter_135 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_35 = paddle._C_ops.add(matmul_30, parameter_134) + del matmul_30, parameter_134 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_3 = paddle._C_ops.gelu(add_35, False) + del add_35 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_31 = paddle._C_ops.matmul(gelu_3, parameter_133, False, False) + del gelu_3, parameter_133 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_36 = paddle._C_ops.add(matmul_31, parameter_132) + del matmul_31, parameter_132 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_36, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_36 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_37 = paddle._C_ops.add(layer_norm_22, dropout_24) + del dropout_24, layer_norm_22 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_25, layer_norm_26, layer_norm_27 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_37, parameter_129, parameter_128, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_37, parameter_128, parameter_129 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_32 = paddle._C_ops.matmul(layer_norm_25, parameter_127, False, False) + del parameter_127 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_38 = paddle._C_ops.add(matmul_32, parameter_126) + del matmul_32, parameter_126 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(add_38, full_int_array_1) + del add_38 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_16 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_33 = paddle._C_ops.matmul(layer_norm_25, parameter_125, False, False) + del parameter_125 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_39 = paddle._C_ops.add(matmul_33, parameter_124) + del matmul_33, parameter_124 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_34 = paddle._C_ops.matmul(layer_norm_25, parameter_123, False, False) + del parameter_123 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_40 = paddle._C_ops.add(matmul_34, parameter_122) + del matmul_34, parameter_122 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(add_39, full_int_array_1) + del add_39 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(add_40, full_int_array_1) + del add_40 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_5 = paddle._C_ops.scale(transpose_16, full_5, float("0"), True) + del transpose_16 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_35 = paddle._C_ops.matmul(scale_5, transpose_17, False, True) + del scale_5, transpose_17 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_41 = paddle._C_ops.add(matmul_35, unsqueeze_0) + del matmul_35 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_4 = paddle._C_ops.softmax(add_41, -1) + del add_41 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_36 = paddle._C_ops.matmul(dropout_26, transpose_18, False, False) + del dropout_26, transpose_18 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_19 = paddle._C_ops.transpose(matmul_36, [0, 2, 1, 3]) + del matmul_36 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_19, full_int_array_2) + del transpose_19 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_37 = paddle._C_ops.matmul(reshape_19, parameter_121, False, False) + del parameter_121, reshape_19 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_42 = paddle._C_ops.add(matmul_37, parameter_120) + del matmul_37, parameter_120 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_42, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_42 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_43 = paddle._C_ops.add(layer_norm_25, dropout_28) + del dropout_28, layer_norm_25 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_28, layer_norm_29, layer_norm_30 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_43, parameter_115, parameter_114, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_43, parameter_114, parameter_115 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_38 = paddle._C_ops.matmul(layer_norm_28, parameter_119, False, False) + del parameter_119 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_44 = paddle._C_ops.add(matmul_38, parameter_118) + del matmul_38, parameter_118 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_4 = paddle._C_ops.gelu(add_44, False) + del add_44 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_39 = paddle._C_ops.matmul(gelu_4, parameter_117, False, False) + del gelu_4, parameter_117 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_45 = paddle._C_ops.add(matmul_39, parameter_116) + del matmul_39, parameter_116 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_45, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_45 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_46 = paddle._C_ops.add(layer_norm_28, dropout_30) + del dropout_30, layer_norm_28 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_31, layer_norm_32, layer_norm_33 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_46, parameter_113, parameter_112, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_46, parameter_112, parameter_113 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_40 = paddle._C_ops.matmul(layer_norm_31, parameter_111, False, False) + del parameter_111 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_47 = paddle._C_ops.add(matmul_40, parameter_110) + del matmul_40, parameter_110 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(add_47, full_int_array_1) + del add_47 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_20 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_41 = paddle._C_ops.matmul(layer_norm_31, parameter_109, False, False) + del parameter_109 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_48 = paddle._C_ops.add(matmul_41, parameter_108) + del matmul_41, parameter_108 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_42 = paddle._C_ops.matmul(layer_norm_31, parameter_107, False, False) + del parameter_107 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_49 = paddle._C_ops.add(matmul_42, parameter_106) + del matmul_42, parameter_106 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(add_48, full_int_array_1) + del add_48 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(add_49, full_int_array_1) + del add_49 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_6 = paddle._C_ops.scale(transpose_20, full_5, float("0"), True) + del transpose_20 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_43 = paddle._C_ops.matmul(scale_6, transpose_21, False, True) + del scale_6, transpose_21 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_50 = paddle._C_ops.add(matmul_43, unsqueeze_0) + del matmul_43 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_5 = paddle._C_ops.softmax(add_50, -1) + del add_50 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_44 = paddle._C_ops.matmul(dropout_32, transpose_22, False, False) + del dropout_32, transpose_22 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_23 = paddle._C_ops.transpose(matmul_44, [0, 2, 1, 3]) + del matmul_44 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_23, full_int_array_2) + del transpose_23 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_45 = paddle._C_ops.matmul(reshape_23, parameter_105, False, False) + del parameter_105, reshape_23 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_51 = paddle._C_ops.add(matmul_45, parameter_104) + del matmul_45, parameter_104 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_51, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_51 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_52 = paddle._C_ops.add(layer_norm_31, dropout_34) + del dropout_34, layer_norm_31 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_34, layer_norm_35, layer_norm_36 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_52, parameter_99, parameter_98, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_52, parameter_98, parameter_99 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_46 = paddle._C_ops.matmul(layer_norm_34, parameter_103, False, False) + del parameter_103 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_53 = paddle._C_ops.add(matmul_46, parameter_102) + del matmul_46, parameter_102 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_5 = paddle._C_ops.gelu(add_53, False) + del add_53 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_47 = paddle._C_ops.matmul(gelu_5, parameter_101, False, False) + del gelu_5, parameter_101 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_54 = paddle._C_ops.add(matmul_47, parameter_100) + del matmul_47, parameter_100 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_54, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_54 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_55 = paddle._C_ops.add(layer_norm_34, dropout_36) + del dropout_36, layer_norm_34 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_37, layer_norm_38, layer_norm_39 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_55, parameter_97, parameter_96, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_55, parameter_96, parameter_97 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_48 = paddle._C_ops.matmul(layer_norm_37, parameter_95, False, False) + del parameter_95 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_56 = paddle._C_ops.add(matmul_48, parameter_94) + del matmul_48, parameter_94 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(add_56, full_int_array_1) + del add_56 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_24 = paddle._C_ops.transpose(reshape_24, [0, 2, 1, 3]) + del reshape_24 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(layer_norm_37, parameter_93, False, False) + del parameter_93 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_57 = paddle._C_ops.add(matmul_49, parameter_92) + del matmul_49, parameter_92 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_50 = paddle._C_ops.matmul(layer_norm_37, parameter_91, False, False) + del parameter_91 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_58 = paddle._C_ops.add(matmul_50, parameter_90) + del matmul_50, parameter_90 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(add_57, full_int_array_1) + del add_57 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_25 = paddle._C_ops.transpose(reshape_25, [0, 2, 1, 3]) + del reshape_25 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(add_58, full_int_array_1) + del add_58 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_26 = paddle._C_ops.transpose(reshape_26, [0, 2, 1, 3]) + del reshape_26 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_7 = paddle._C_ops.scale(transpose_24, full_5, float("0"), True) + del transpose_24 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_51 = paddle._C_ops.matmul(scale_7, transpose_25, False, True) + del scale_7, transpose_25 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_59 = paddle._C_ops.add(matmul_51, unsqueeze_0) + del matmul_51 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_6 = paddle._C_ops.softmax(add_59, -1) + del add_59 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_52 = paddle._C_ops.matmul(dropout_38, transpose_26, False, False) + del dropout_38, transpose_26 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_27 = paddle._C_ops.transpose(matmul_52, [0, 2, 1, 3]) + del matmul_52 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(transpose_27, full_int_array_2) + del transpose_27 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_53 = paddle._C_ops.matmul(reshape_27, parameter_89, False, False) + del parameter_89, reshape_27 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_60 = paddle._C_ops.add(matmul_53, parameter_88) + del matmul_53, parameter_88 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_60, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_60 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_61 = paddle._C_ops.add(layer_norm_37, dropout_40) + del dropout_40, layer_norm_37 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_40, layer_norm_41, layer_norm_42 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_61, parameter_83, parameter_82, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_61, parameter_82, parameter_83 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_54 = paddle._C_ops.matmul(layer_norm_40, parameter_87, False, False) + del parameter_87 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_62 = paddle._C_ops.add(matmul_54, parameter_86) + del matmul_54, parameter_86 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_6 = paddle._C_ops.gelu(add_62, False) + del add_62 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_55 = paddle._C_ops.matmul(gelu_6, parameter_85, False, False) + del gelu_6, parameter_85 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_63 = paddle._C_ops.add(matmul_55, parameter_84) + del matmul_55, parameter_84 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_63, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_63 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_64 = paddle._C_ops.add(layer_norm_40, dropout_42) + del dropout_42, layer_norm_40 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_43, layer_norm_44, layer_norm_45 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_64, parameter_81, parameter_80, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_64, parameter_80, parameter_81 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_56 = paddle._C_ops.matmul(layer_norm_43, parameter_79, False, False) + del parameter_79 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_65 = paddle._C_ops.add(matmul_56, parameter_78) + del matmul_56, parameter_78 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(add_65, full_int_array_1) + del add_65 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_28 = paddle._C_ops.transpose(reshape_28, [0, 2, 1, 3]) + del reshape_28 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_57 = paddle._C_ops.matmul(layer_norm_43, parameter_77, False, False) + del parameter_77 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_66 = paddle._C_ops.add(matmul_57, parameter_76) + del matmul_57, parameter_76 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_58 = paddle._C_ops.matmul(layer_norm_43, parameter_75, False, False) + del parameter_75 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_67 = paddle._C_ops.add(matmul_58, parameter_74) + del matmul_58, parameter_74 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(add_66, full_int_array_1) + del add_66 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_29 = paddle._C_ops.transpose(reshape_29, [0, 2, 1, 3]) + del reshape_29 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(add_67, full_int_array_1) + del add_67 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_30 = paddle._C_ops.transpose(reshape_30, [0, 2, 1, 3]) + del reshape_30 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_8 = paddle._C_ops.scale(transpose_28, full_5, float("0"), True) + del transpose_28 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_59 = paddle._C_ops.matmul(scale_8, transpose_29, False, True) + del scale_8, transpose_29 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_68 = paddle._C_ops.add(matmul_59, unsqueeze_0) + del matmul_59 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_7 = paddle._C_ops.softmax(add_68, -1) + del add_68 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_60 = paddle._C_ops.matmul(dropout_44, transpose_30, False, False) + del dropout_44, transpose_30 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_31 = paddle._C_ops.transpose(matmul_60, [0, 2, 1, 3]) + del matmul_60 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_31 = paddle._C_ops.reshape(transpose_31, full_int_array_2) + del transpose_31 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_61 = paddle._C_ops.matmul(reshape_31, parameter_73, False, False) + del parameter_73, reshape_31 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_69 = paddle._C_ops.add(matmul_61, parameter_72) + del matmul_61, parameter_72 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_69, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_69 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_70 = paddle._C_ops.add(layer_norm_43, dropout_46) + del dropout_46, layer_norm_43 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_46, layer_norm_47, layer_norm_48 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_70, parameter_67, parameter_66, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_70, parameter_66, parameter_67 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_62 = paddle._C_ops.matmul(layer_norm_46, parameter_71, False, False) + del parameter_71 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_71 = paddle._C_ops.add(matmul_62, parameter_70) + del matmul_62, parameter_70 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_7 = paddle._C_ops.gelu(add_71, False) + del add_71 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_63 = paddle._C_ops.matmul(gelu_7, parameter_69, False, False) + del gelu_7, parameter_69 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_72 = paddle._C_ops.add(matmul_63, parameter_68) + del matmul_63, parameter_68 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_72, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_72 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_73 = paddle._C_ops.add(layer_norm_46, dropout_48) + del dropout_48, layer_norm_46 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_49, layer_norm_50, layer_norm_51 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_73, parameter_65, parameter_64, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_73, parameter_64, parameter_65 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_64 = paddle._C_ops.matmul(layer_norm_49, parameter_63, False, False) + del parameter_63 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_74 = paddle._C_ops.add(matmul_64, parameter_62) + del matmul_64, parameter_62 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(add_74, full_int_array_1) + del add_74 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_32 = paddle._C_ops.transpose(reshape_32, [0, 2, 1, 3]) + del reshape_32 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_65 = paddle._C_ops.matmul(layer_norm_49, parameter_61, False, False) + del parameter_61 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_75 = paddle._C_ops.add(matmul_65, parameter_60) + del matmul_65, parameter_60 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_66 = paddle._C_ops.matmul(layer_norm_49, parameter_59, False, False) + del parameter_59 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_76 = paddle._C_ops.add(matmul_66, parameter_58) + del matmul_66, parameter_58 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(add_75, full_int_array_1) + del add_75 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_33 = paddle._C_ops.transpose(reshape_33, [0, 2, 1, 3]) + del reshape_33 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(add_76, full_int_array_1) + del add_76 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_34 = paddle._C_ops.transpose(reshape_34, [0, 2, 1, 3]) + del reshape_34 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_9 = paddle._C_ops.scale(transpose_32, full_5, float("0"), True) + del transpose_32 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_67 = paddle._C_ops.matmul(scale_9, transpose_33, False, True) + del scale_9, transpose_33 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_77 = paddle._C_ops.add(matmul_67, unsqueeze_0) + del matmul_67 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_8 = paddle._C_ops.softmax(add_77, -1) + del add_77 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_50, dropout_51 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_68 = paddle._C_ops.matmul(dropout_50, transpose_34, False, False) + del dropout_50, transpose_34 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_35 = paddle._C_ops.transpose(matmul_68, [0, 2, 1, 3]) + del matmul_68 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_35 = paddle._C_ops.reshape(transpose_35, full_int_array_2) + del transpose_35 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_69 = paddle._C_ops.matmul(reshape_35, parameter_57, False, False) + del parameter_57, reshape_35 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_78 = paddle._C_ops.add(matmul_69, parameter_56) + del matmul_69, parameter_56 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_52, dropout_53 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_78, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_78 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_79 = paddle._C_ops.add(layer_norm_49, dropout_52) + del dropout_52, layer_norm_49 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_52, layer_norm_53, layer_norm_54 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_79, parameter_51, parameter_50, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_79, parameter_50, parameter_51 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_70 = paddle._C_ops.matmul(layer_norm_52, parameter_55, False, False) + del parameter_55 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_80 = paddle._C_ops.add(matmul_70, parameter_54) + del matmul_70, parameter_54 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_8 = paddle._C_ops.gelu(add_80, False) + del add_80 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_71 = paddle._C_ops.matmul(gelu_8, parameter_53, False, False) + del gelu_8, parameter_53 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_81 = paddle._C_ops.add(matmul_71, parameter_52) + del matmul_71, parameter_52 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_54, dropout_55 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_81, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_81 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_82 = paddle._C_ops.add(layer_norm_52, dropout_54) + del dropout_54, layer_norm_52 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_55, layer_norm_56, layer_norm_57 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_82, parameter_49, parameter_48, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_82, parameter_48, parameter_49 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_72 = paddle._C_ops.matmul(layer_norm_55, parameter_47, False, False) + del parameter_47 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_83 = paddle._C_ops.add(matmul_72, parameter_46) + del matmul_72, parameter_46 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(add_83, full_int_array_1) + del add_83 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_36 = paddle._C_ops.transpose(reshape_36, [0, 2, 1, 3]) + del reshape_36 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_73 = paddle._C_ops.matmul(layer_norm_55, parameter_45, False, False) + del parameter_45 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_84 = paddle._C_ops.add(matmul_73, parameter_44) + del matmul_73, parameter_44 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_74 = paddle._C_ops.matmul(layer_norm_55, parameter_43, False, False) + del parameter_43 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_85 = paddle._C_ops.add(matmul_74, parameter_42) + del matmul_74, parameter_42 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(add_84, full_int_array_1) + del add_84 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_37 = paddle._C_ops.transpose(reshape_37, [0, 2, 1, 3]) + del reshape_37 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(add_85, full_int_array_1) + del add_85 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_38 = paddle._C_ops.transpose(reshape_38, [0, 2, 1, 3]) + del reshape_38 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_10 = paddle._C_ops.scale(transpose_36, full_5, float("0"), True) + del transpose_36 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_75 = paddle._C_ops.matmul(scale_10, transpose_37, False, True) + del scale_10, transpose_37 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_86 = paddle._C_ops.add(matmul_75, unsqueeze_0) + del matmul_75 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_9 = paddle._C_ops.softmax(add_86, -1) + del add_86 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_56, dropout_57 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_76 = paddle._C_ops.matmul(dropout_56, transpose_38, False, False) + del dropout_56, transpose_38 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_39 = paddle._C_ops.transpose(matmul_76, [0, 2, 1, 3]) + del matmul_76 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(transpose_39, full_int_array_2) + del transpose_39 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_77 = paddle._C_ops.matmul(reshape_39, parameter_41, False, False) + del parameter_41, reshape_39 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_87 = paddle._C_ops.add(matmul_77, parameter_40) + del matmul_77, parameter_40 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_58, dropout_59 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_87, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_87 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_88 = paddle._C_ops.add(layer_norm_55, dropout_58) + del dropout_58, layer_norm_55 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_58, layer_norm_59, layer_norm_60 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_88, parameter_35, parameter_34, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_88, parameter_34, parameter_35 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_78 = paddle._C_ops.matmul(layer_norm_58, parameter_39, False, False) + del parameter_39 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_89 = paddle._C_ops.add(matmul_78, parameter_38) + del matmul_78, parameter_38 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_9 = paddle._C_ops.gelu(add_89, False) + del add_89 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_79 = paddle._C_ops.matmul(gelu_9, parameter_37, False, False) + del gelu_9, parameter_37 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_90 = paddle._C_ops.add(matmul_79, parameter_36) + del matmul_79, parameter_36 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_60, dropout_61 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_90, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_90 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_91 = paddle._C_ops.add(layer_norm_58, dropout_60) + del dropout_60, layer_norm_58 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_61, layer_norm_62, layer_norm_63 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_91, parameter_33, parameter_32, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_91, parameter_32, parameter_33 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_80 = paddle._C_ops.matmul(layer_norm_61, parameter_31, False, False) + del parameter_31 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_92 = paddle._C_ops.add(matmul_80, parameter_30) + del matmul_80, parameter_30 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(add_92, full_int_array_1) + del add_92 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_40 = paddle._C_ops.transpose(reshape_40, [0, 2, 1, 3]) + del reshape_40 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_81 = paddle._C_ops.matmul(layer_norm_61, parameter_29, False, False) + del parameter_29 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_93 = paddle._C_ops.add(matmul_81, parameter_28) + del matmul_81, parameter_28 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_82 = paddle._C_ops.matmul(layer_norm_61, parameter_27, False, False) + del parameter_27 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_94 = paddle._C_ops.add(matmul_82, parameter_26) + del matmul_82, parameter_26 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(add_93, full_int_array_1) + del add_93 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_41 = paddle._C_ops.transpose(reshape_41, [0, 2, 1, 3]) + del reshape_41 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(add_94, full_int_array_1) + del add_94 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_42 = paddle._C_ops.transpose(reshape_42, [0, 2, 1, 3]) + del reshape_42 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_11 = paddle._C_ops.scale(transpose_40, full_5, float("0"), True) + del transpose_40 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_83 = paddle._C_ops.matmul(scale_11, transpose_41, False, True) + del scale_11, transpose_41 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_95 = paddle._C_ops.add(matmul_83, unsqueeze_0) + del matmul_83 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_10 = paddle._C_ops.softmax(add_95, -1) + del add_95 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_62, dropout_63 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_84 = paddle._C_ops.matmul(dropout_62, transpose_42, False, False) + del dropout_62, transpose_42 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_43 = paddle._C_ops.transpose(matmul_84, [0, 2, 1, 3]) + del matmul_84 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_43, full_int_array_2) + del transpose_43 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_85 = paddle._C_ops.matmul(reshape_43, parameter_25, False, False) + del parameter_25, reshape_43 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_96 = paddle._C_ops.add(matmul_85, parameter_24) + del matmul_85, parameter_24 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_64, dropout_65 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_96, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_96 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_97 = paddle._C_ops.add(layer_norm_61, dropout_64) + del dropout_64, layer_norm_61 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_64, layer_norm_65, layer_norm_66 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_97, parameter_19, parameter_18, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_97, parameter_18, parameter_19 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_86 = paddle._C_ops.matmul(layer_norm_64, parameter_23, False, False) + del parameter_23 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_98 = paddle._C_ops.add(matmul_86, parameter_22) + del matmul_86, parameter_22 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_10 = paddle._C_ops.gelu(add_98, False) + del add_98 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_87 = paddle._C_ops.matmul(gelu_10, parameter_21, False, False) + del gelu_10, parameter_21 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_99 = paddle._C_ops.add(matmul_87, parameter_20) + del matmul_87, parameter_20 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_66, dropout_67 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_99, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_99 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_100 = paddle._C_ops.add(layer_norm_64, dropout_66) + del dropout_66, layer_norm_64 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_67, layer_norm_68, layer_norm_69 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_100, parameter_17, parameter_16, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_100, parameter_16, parameter_17 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_88 = paddle._C_ops.matmul(layer_norm_67, parameter_15, False, False) + del parameter_15 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_101 = paddle._C_ops.add(matmul_88, parameter_14) + del matmul_88, parameter_14 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(add_101, full_int_array_1) + del add_101 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_44 = paddle._C_ops.transpose(reshape_44, [0, 2, 1, 3]) + del reshape_44 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_89 = paddle._C_ops.matmul(layer_norm_67, parameter_13, False, False) + del parameter_13 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_102 = paddle._C_ops.add(matmul_89, parameter_12) + del matmul_89, parameter_12 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_90 = paddle._C_ops.matmul(layer_norm_67, parameter_11, False, False) + del parameter_11 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_103 = paddle._C_ops.add(matmul_90, parameter_10) + del matmul_90, parameter_10 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(add_102, full_int_array_1) + del add_102 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_45 = paddle._C_ops.transpose(reshape_45, [0, 2, 1, 3]) + del reshape_45 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(add_103, full_int_array_1) + del add_103, full_int_array_1 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_46 = paddle._C_ops.transpose(reshape_46, [0, 2, 1, 3]) + del reshape_46 + + # pd_op.scale: (1x12x11x64xf32) <- (1x12x11x64xf32, 1xf32) + scale_12 = paddle._C_ops.scale(transpose_44, full_5, float("0"), True) + del full_5, transpose_44 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_91 = paddle._C_ops.matmul(scale_12, transpose_45, False, True) + del scale_12, transpose_45 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_104 = paddle._C_ops.add(matmul_91, unsqueeze_0) + del matmul_91, unsqueeze_0 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_11 = paddle._C_ops.softmax(add_104, -1) + del add_104 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_68, dropout_69 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_92 = paddle._C_ops.matmul(dropout_68, transpose_46, False, False) + del dropout_68, transpose_46 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_47 = paddle._C_ops.transpose(matmul_92, [0, 2, 1, 3]) + del matmul_92 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_47 = paddle._C_ops.reshape(transpose_47, full_int_array_2) + del full_int_array_2, transpose_47 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_93 = paddle._C_ops.matmul(reshape_47, parameter_9, False, False) + del parameter_9, reshape_47 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_105 = paddle._C_ops.add(matmul_93, parameter_8) + del matmul_93, parameter_8 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_70, dropout_71 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_105, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_105 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_106 = paddle._C_ops.add(layer_norm_67, dropout_70) + del dropout_70, layer_norm_67 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_70, layer_norm_71, layer_norm_72 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_106, parameter_3, parameter_2, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_106, parameter_2, parameter_3 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_94 = paddle._C_ops.matmul(layer_norm_70, parameter_7, False, False) + del parameter_7 + + # pd_op.add: (1x11x3072xf32) <- (1x11x3072xf32, 3072xf32) + add_107 = paddle._C_ops.add(matmul_94, parameter_6) + del matmul_94, parameter_6 + + # pd_op.gelu: (1x11x3072xf32) <- (1x11x3072xf32) + gelu_11 = paddle._C_ops.gelu(add_107, False) + del add_107 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_95 = paddle._C_ops.matmul(gelu_11, parameter_5, False, False) + del gelu_11, parameter_5 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 768xf32) + add_108 = paddle._C_ops.add(matmul_95, parameter_4) + del matmul_95, parameter_4 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_72, dropout_73 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + add_108, None, full_4, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del add_108, full_4 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_109 = paddle._C_ops.add(layer_norm_70, dropout_72) + del dropout_72, layer_norm_70 + + # pd_op.layer_norm: (1x11x768xf32, 1x11xf32, 1x11xf32) <- (1x11x768xf32, 768xf32, 768xf32) + layer_norm_0, layer_norm_73, layer_norm_74 = (lambda x, f: f(x))( + paddle._C_ops.layer_norm( + add_109, parameter_1, parameter_0, float("1e-05"), 2 + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None, None), + ) + del add_109, parameter_0, parameter_1 + + return layer_norm_0 diff --git a/paddle_samples/PaddleNLP/ernie-health-chinese/weight_meta.py b/paddle_samples/PaddleNLP/ernie-health-chinese/weight_meta.py new file mode 100644 index 000000000..6c9173dca --- /dev/null +++ b/paddle_samples/PaddleNLP/ernie-health-chinese/weight_meta.py @@ -0,0 +1,1752 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.102741") + max_val = float("0.108021") + mean = float("7.82573e-06") + std = float("0.0200099") + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.101641") + max_val = float("0.0998405") + mean = float("-6.74568e-06") + std = float("0.020007") + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.105647") + max_val = float("0.0956017") + mean = float("-2.04315e-05") + std = float("0.0199501") + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0930537") + max_val = float("0.100718") + mean = float("-6.52971e-05") + std = float("0.0200187") + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0980123") + max_val = float("0.0970038") + mean = float("-8.83202e-06") + std = float("0.0200107") + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0982096") + max_val = float("0.0893774") + mean = float("-1.09361e-05") + std = float("0.0199883") + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.104239") + max_val = float("0.107141") + mean = float("-3.87967e-07") + std = float("0.0199939") + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0912149") + max_val = float("0.101363") + mean = float("9.31909e-06") + std = float("0.0199994") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.095212") + max_val = float("0.0996964") + mean = float("3.89532e-05") + std = float("0.0199739") + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0917693") + max_val = float("0.0931138") + mean = float("-2.58082e-05") + std = float("0.0199951") + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.1007") + max_val = float("0.089315") + mean = float("9.11158e-06") + std = float("0.020017") + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0909005") + max_val = float("0.0994712") + mean = float("3.21886e-05") + std = float("0.0200158") + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.094897") + max_val = float("0.100287") + mean = float("6.08814e-06") + std = float("0.0200013") + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0973849") + max_val = float("0.096274") + mean = float("-6.94582e-06") + std = float("0.0200046") + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0908507") + max_val = float("0.0912806") + mean = float("2.39093e-05") + std = float("0.0199989") + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0890082") + max_val = float("0.0929461") + mean = float("1.4216e-05") + std = float("0.0200094") + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0885005") + max_val = float("0.0881785") + mean = float("2.05705e-05") + std = float("0.0199874") + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0958292") + max_val = float("0.09569") + mean = float("1.2428e-05") + std = float("0.0199827") + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.100346") + max_val = float("0.0965611") + mean = float("2.152e-05") + std = float("0.0200234") + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0963534") + max_val = float("0.105343") + mean = float("-2.26338e-05") + std = float("0.0200023") + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0959521") + max_val = float("0.0879162") + mean = float("3.19995e-05") + std = float("0.0199946") + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0984922") + max_val = float("0.0893779") + mean = float("-1.70907e-05") + std = float("0.019997") + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0941832") + max_val = float("0.089415") + mean = float("-4.35686e-05") + std = float("0.0199825") + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.101551") + max_val = float("0.0999757") + mean = float("-4.32624e-06") + std = float("0.0200085") + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0972448") + max_val = float("0.105746") + mean = float("2.20299e-06") + std = float("0.0199923") + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0964745") + max_val = float("0.0961339") + mean = float("1.32737e-05") + std = float("0.0200004") + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0870778") + max_val = float("0.097045") + mean = float("4.86557e-06") + std = float("0.0200064") + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0966711") + max_val = float("0.0870196") + mean = float("-1.63356e-05") + std = float("0.020023") + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0904189") + max_val = float("0.0922386") + mean = float("-9.62666e-06") + std = float("0.0200022") + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.100244") + max_val = float("0.103521") + mean = float("-1.05173e-05") + std = float("0.0199965") + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0965895") + max_val = float("0.101087") + mean = float("-7.52185e-06") + std = float("0.0199928") + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.102316") + max_val = float("0.0962738") + mean = float("5.55572e-06") + std = float("0.0200026") + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0948441") + max_val = float("0.099547") + mean = float("-1.46343e-05") + std = float("0.0199746") + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0935317") + max_val = float("0.0921286") + mean = float("-3.39984e-05") + std = float("0.0199904") + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0971379") + max_val = float("0.0934431") + mean = float("5.85866e-05") + std = float("0.0199898") + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0957619") + max_val = float("0.0966452") + mean = float("-1.87755e-05") + std = float("0.0199563") + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0982416") + max_val = float("0.1036") + mean = float("-2.65179e-06") + std = float("0.0200006") + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.108084") + max_val = float("0.0972697") + mean = float("-5.14484e-06") + std = float("0.0199914") + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0868353") + max_val = float("0.0909208") + mean = float("-1.8347e-06") + std = float("0.0199853") + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0995418") + max_val = float("0.0944594") + mean = float("5.72209e-05") + std = float("0.019973") + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0925599") + max_val = float("0.0976668") + mean = float("-1.27959e-05") + std = float("0.0199656") + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0888991") + max_val = float("0.0958887") + mean = float("-3.29339e-05") + std = float("0.0199961") + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0972573") + max_val = float("0.10228") + mean = float("7.3421e-06") + std = float("0.019992") + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0946717") + max_val = float("0.101567") + mean = float("8.41063e-06") + std = float("0.0200056") + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0922118") + max_val = float("0.102564") + mean = float("-5.85984e-06") + std = float("0.01998") + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0963435") + max_val = float("0.10249") + mean = float("-4.04958e-05") + std = float("0.019978") + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0920979") + max_val = float("0.103924") + mean = float("-2.64696e-05") + std = float("0.0200089") + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0873958") + max_val = float("0.0951119") + mean = float("6.50542e-07") + std = float("0.0200027") + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0980613") + max_val = float("0.105667") + mean = float("5.53548e-07") + std = float("0.0200045") + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0987146") + max_val = float("0.0986359") + mean = float("4.36449e-06") + std = float("0.0200014") + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0920514") + max_val = float("0.0889926") + mean = float("5.01891e-05") + std = float("0.0200083") + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0998734") + max_val = float("0.0949259") + mean = float("-1.96167e-05") + std = float("0.0199563") + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.102744") + max_val = float("0.0984038") + mean = float("2.25635e-05") + std = float("0.020019") + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.100534") + max_val = float("0.098306") + mean = float("-9.40017e-06") + std = float("0.0200185") + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.103373") + max_val = float("0.107834") + mean = float("7.58503e-06") + std = float("0.0199933") + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0983029") + max_val = float("0.0970882") + mean = float("-9.7085e-06") + std = float("0.0200067") + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0950486") + max_val = float("0.111857") + mean = float("1.06808e-05") + std = float("0.0200312") + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0952129") + max_val = float("0.0977506") + mean = float("-7.84295e-06") + std = float("0.020005") + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0992048") + max_val = float("0.109224") + mean = float("1.01304e-05") + std = float("0.020035") + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.092863") + max_val = float("0.097509") + mean = float("-6.82756e-06") + std = float("0.0199901") + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.102447") + max_val = float("0.099937") + mean = float("-4.25578e-06") + std = float("0.0199881") + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0976201") + max_val = float("0.0964299") + mean = float("-2.44031e-06") + std = float("0.0199825") + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0887422") + max_val = float("0.0944412") + mean = float("7.0904e-05") + std = float("0.0200133") + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0893754") + max_val = float("0.101748") + mean = float("-1.1841e-05") + std = float("0.0200159") + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.107605") + max_val = float("0.102168") + mean = float("-1.34912e-05") + std = float("0.0200075") + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0956196") + max_val = float("0.0969442") + mean = float("2.16026e-05") + std = float("0.0200288") + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0936633") + max_val = float("0.111719") + mean = float("6.49177e-06") + std = float("0.0199822") + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [3072] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.100067") + max_val = float("0.0956011") + mean = float("-3.41217e-06") + std = float("0.0200104") + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.093104") + max_val = float("0.0893299") + mean = float("1.18698e-05") + std = float("0.0199679") + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0898396") + max_val = float("0.0958932") + mean = float("2.4909e-05") + std = float("0.0199433") + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0902695") + max_val = float("0.0919827") + mean = float("6.93048e-06") + std = float("0.0199703") + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0913218") + max_val = float("0.0921588") + mean = float("8.66777e-06") + std = float("0.0200154") + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [768] + dtype = "float32" + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [768] + dtype = "float32" + min_val = float("1.0") + max_val = float("1.0") + mean = float("1.0") + data = None + + +class Program_weight_tensor_parameter_194: + name = "parameter_194" + shape = [2, 768] + dtype = "float32" + min_val = float("-0.0650292") + max_val = float("0.0591865") + mean = float("-3.44731e-05") + std = float("0.0201622") + data = None + + +class Program_weight_tensor_parameter_195: + name = "parameter_195" + shape = [512, 768] + dtype = "float32" + min_val = float("-0.096647") + max_val = float("0.0909206") + mean = float("1.09519e-05") + std = float("0.0200033") + data = None + + +class Program_weight_tensor_parameter_196: + name = "parameter_196" + shape = [22608, 768] + dtype = "float32" + min_val = float("-0.103867") + max_val = float("0.101789") + mean = float("6.00804e-07") + std = float("0.0200026") + data = None diff --git a/paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/graph_hash.txt b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/graph_hash.txt new file mode 100644 index 000000000..32c69ccc2 --- /dev/null +++ b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/graph_hash.txt @@ -0,0 +1 @@ +c00a88e54e10901ed03295d9c3f3738863de084bd8f6a57e5c2f39c322d57121 \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/graph_net.json b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/graph_net.json new file mode 100644 index 000000000..99729c9fb --- /dev/null +++ b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "roformer_v2_chinese_char_base", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/input_meta.py b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/input_meta.py new file mode 100644 index 000000000..b6791c33c --- /dev/null +++ b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/input_meta.py @@ -0,0 +1,12 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 11] + dtype = "int64" + data = [101, 3223, 6500, 421, 4179, 4331, 2008, 7263, 3055, 106, 102] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 11] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/model.py b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/model.py new file mode 100644 index 000000000..9d4933910 --- /dev/null +++ b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/model.py @@ -0,0 +1,3280 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + data_0, + data_1, + ): + # pd_op.full: (xi64) <- () + full_0 = paddle._C_ops.full( + [], float("0"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.equal: (1x11xb) <- (1x11xi64, xi64) + equal_0 = paddle._C_ops.equal(data_0, full_0) + del full_0 + + # pd_op.cast: (1x11xf32) <- (1x11xb) + cast_0 = paddle._C_ops.cast(equal_0, paddle.float32) + del equal_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x11xf32) <- (1x11xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_1, float("0"), True) + del cast_0, full_1 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_0 = [1, 2] + + # pd_op.unsqueeze: (1x1x1x11xf32) <- (1x11xf32, 2xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(scale_0, full_int_array_0) + del full_int_array_0, scale_0 + + # pd_op.embedding: (1x11x768xf32) <- (1x11xi64, 12000x768xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_97, -1, False) + del data_0, parameter_97 + + # pd_op.embedding: (1x11x768xf32) <- (1x11xi64, 2x768xf32) + embedding_1 = paddle._C_ops.embedding(data_1, parameter_96, -1, False) + del data_1, parameter_96 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_0 = paddle._C_ops.square(add_0) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [-1] + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_0 = paddle._C_ops.mean(square_0, full_int_array_1, True) + del square_0 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_1 = paddle._C_ops.scale(mean_0, full_2, float("1e-12"), True) + del mean_0 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_0 = paddle._C_ops.sqrt(scale_1) + del scale_1 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_1 = paddle._C_ops.divide(add_0, sqrt_0) + del add_0, sqrt_0 + + # pd_op.full: (1xf32) <- () + full_3 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + divide_1, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del divide_1 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_0 = paddle._C_ops.matmul(dropout_0, parameter_95, False, False) + del parameter_95 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_1 = paddle._C_ops.matmul(dropout_0, parameter_94, False, False) + del parameter_94 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_2 = paddle._C_ops.matmul(dropout_0, parameter_93, False, False) + del parameter_93 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_2 = [0, 0, 12, 64] + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(matmul_0, full_int_array_2) + del matmul_0 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(matmul_1, full_int_array_2) + del matmul_1 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(matmul_2, full_int_array_2) + del matmul_2 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_4 = [11] + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + parameter_23, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_23 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + parameter_22, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_22 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_5 = [2147483647] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_6 = [2] + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + transpose_0, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_7 = [1] + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + transpose_0, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_0 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_0 = paddle._C_ops.multiply(strided_slice_0, slice_1) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_1 = paddle._C_ops.multiply(strided_slice_1, slice_0) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_0 = paddle._C_ops.subtract(multiply_0, multiply_1) + del multiply_0, multiply_1 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_2 = paddle._C_ops.multiply(strided_slice_0, slice_0) + del strided_slice_0 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_3 = paddle._C_ops.multiply(strided_slice_1, slice_1) + del strided_slice_1 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_1 = paddle._C_ops.add(multiply_2, multiply_3) + del multiply_2, multiply_3 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_0 = [subtract_0, add_1] + del add_1, subtract_0 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_0 = paddle._C_ops.stack(combine_0, -1) + del combine_0 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_0 = paddle._C_ops.flatten(stack_0, 3, 4) + del stack_0 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + transpose_1, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + transpose_1, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_1 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_4 = paddle._C_ops.multiply(strided_slice_2, slice_1) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_5 = paddle._C_ops.multiply(strided_slice_3, slice_0) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_1 = paddle._C_ops.subtract(multiply_4, multiply_5) + del multiply_4, multiply_5 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_6 = paddle._C_ops.multiply(strided_slice_2, slice_0) + del slice_0, strided_slice_2 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_7 = paddle._C_ops.multiply(strided_slice_3, slice_1) + del slice_1, strided_slice_3 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_2 = paddle._C_ops.add(multiply_6, multiply_7) + del multiply_6, multiply_7 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_1 = [subtract_1, add_2] + del add_2, subtract_1 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_1 = paddle._C_ops.stack(combine_1, -1) + del combine_1 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_1 = paddle._C_ops.flatten(stack_1, 3, 4) + del stack_1 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_3 = paddle._C_ops.matmul(flatten_0, flatten_1, False, True) + del flatten_0, flatten_1 + + # pd_op.full: (1xf32) <- () + full_4 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_2 = paddle._C_ops.scale(matmul_3, full_4, float("0"), True) + del matmul_3 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_3 = paddle._C_ops.add(scale_2, unsqueeze_0) + del scale_2 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_0 = paddle._C_ops.softmax(add_3, -1) + del add_3 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_4 = paddle._C_ops.matmul(dropout_2, transpose_2, False, False) + del dropout_2, transpose_2 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_3 = paddle._C_ops.transpose(matmul_4, [0, 2, 1, 3]) + del matmul_4 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_8 = [0, 0, 768] + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_8) + del transpose_3 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_5 = paddle._C_ops.matmul(reshape_3, parameter_92, False, False) + del parameter_92, reshape_3 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_5, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_5 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_4 = paddle._C_ops.add(dropout_0, dropout_4) + del dropout_0, dropout_4 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_1 = paddle._C_ops.square(add_4) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_1 = paddle._C_ops.mean(square_1, full_int_array_1, True) + del square_1 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_3 = paddle._C_ops.scale(mean_1, full_2, float("1e-12"), True) + del mean_1 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_1 = paddle._C_ops.sqrt(scale_3) + del scale_3 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_2 = paddle._C_ops.divide(add_4, sqrt_1) + del add_4, sqrt_1 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_6 = paddle._C_ops.matmul(divide_2, parameter_91, False, False) + del parameter_91 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_0 = paddle._C_ops.relu(matmul_6) + del matmul_6 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_7 = paddle._C_ops.matmul(relu_0, parameter_90, False, False) + del parameter_90, relu_0 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_7, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_7 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_5 = paddle._C_ops.add(divide_2, dropout_6) + del divide_2, dropout_6 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_2 = paddle._C_ops.square(add_5) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_2 = paddle._C_ops.mean(square_2, full_int_array_1, True) + del square_2 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_4 = paddle._C_ops.scale(mean_2, full_2, float("1e-12"), True) + del mean_2 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_2 = paddle._C_ops.sqrt(scale_4) + del scale_4 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_3 = paddle._C_ops.divide(add_5, sqrt_2) + del add_5, sqrt_2 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_8 = paddle._C_ops.matmul(divide_3, parameter_89, False, False) + del parameter_89 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_9 = paddle._C_ops.matmul(divide_3, parameter_88, False, False) + del parameter_88 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_10 = paddle._C_ops.matmul(divide_3, parameter_87, False, False) + del parameter_87 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(matmul_8, full_int_array_2) + del matmul_8 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(matmul_9, full_int_array_2) + del matmul_9 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(matmul_10, full_int_array_2) + del matmul_10 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + parameter_21, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_21 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + parameter_20, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_20 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + transpose_4, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + transpose_4, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_4 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_8 = paddle._C_ops.multiply(strided_slice_4, slice_3) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_9 = paddle._C_ops.multiply(strided_slice_5, slice_2) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_2 = paddle._C_ops.subtract(multiply_8, multiply_9) + del multiply_8, multiply_9 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_10 = paddle._C_ops.multiply(strided_slice_4, slice_2) + del strided_slice_4 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_11 = paddle._C_ops.multiply(strided_slice_5, slice_3) + del strided_slice_5 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_6 = paddle._C_ops.add(multiply_10, multiply_11) + del multiply_10, multiply_11 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_2 = [subtract_2, add_6] + del add_6, subtract_2 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_2 = paddle._C_ops.stack(combine_2, -1) + del combine_2 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_2 = paddle._C_ops.flatten(stack_2, 3, 4) + del stack_2 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + transpose_5, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + transpose_5, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_5 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_12 = paddle._C_ops.multiply(strided_slice_6, slice_3) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_13 = paddle._C_ops.multiply(strided_slice_7, slice_2) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_3 = paddle._C_ops.subtract(multiply_12, multiply_13) + del multiply_12, multiply_13 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_14 = paddle._C_ops.multiply(strided_slice_6, slice_2) + del slice_2, strided_slice_6 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_15 = paddle._C_ops.multiply(strided_slice_7, slice_3) + del slice_3, strided_slice_7 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_7 = paddle._C_ops.add(multiply_14, multiply_15) + del multiply_14, multiply_15 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_3 = [subtract_3, add_7] + del add_7, subtract_3 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_3 = paddle._C_ops.stack(combine_3, -1) + del combine_3 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_3 = paddle._C_ops.flatten(stack_3, 3, 4) + del stack_3 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_11 = paddle._C_ops.matmul(flatten_2, flatten_3, False, True) + del flatten_2, flatten_3 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_5 = paddle._C_ops.scale(matmul_11, full_4, float("0"), True) + del matmul_11 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_8 = paddle._C_ops.add(scale_5, unsqueeze_0) + del scale_5 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_1 = paddle._C_ops.softmax(add_8, -1) + del add_8 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_12 = paddle._C_ops.matmul(dropout_8, transpose_6, False, False) + del dropout_8, transpose_6 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_7 = paddle._C_ops.transpose(matmul_12, [0, 2, 1, 3]) + del matmul_12 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_8) + del transpose_7 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_13 = paddle._C_ops.matmul(reshape_7, parameter_86, False, False) + del parameter_86, reshape_7 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_13, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_13 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_9 = paddle._C_ops.add(divide_3, dropout_10) + del divide_3, dropout_10 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_3 = paddle._C_ops.square(add_9) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_3 = paddle._C_ops.mean(square_3, full_int_array_1, True) + del square_3 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_6 = paddle._C_ops.scale(mean_3, full_2, float("1e-12"), True) + del mean_3 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_3 = paddle._C_ops.sqrt(scale_6) + del scale_6 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_4 = paddle._C_ops.divide(add_9, sqrt_3) + del add_9, sqrt_3 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_14 = paddle._C_ops.matmul(divide_4, parameter_85, False, False) + del parameter_85 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_1 = paddle._C_ops.relu(matmul_14) + del matmul_14 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_15 = paddle._C_ops.matmul(relu_1, parameter_84, False, False) + del parameter_84, relu_1 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_15, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_15 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_10 = paddle._C_ops.add(divide_4, dropout_12) + del divide_4, dropout_12 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_4 = paddle._C_ops.square(add_10) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_4 = paddle._C_ops.mean(square_4, full_int_array_1, True) + del square_4 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_7 = paddle._C_ops.scale(mean_4, full_2, float("1e-12"), True) + del mean_4 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_4 = paddle._C_ops.sqrt(scale_7) + del scale_7 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_5 = paddle._C_ops.divide(add_10, sqrt_4) + del add_10, sqrt_4 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_16 = paddle._C_ops.matmul(divide_5, parameter_83, False, False) + del parameter_83 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_17 = paddle._C_ops.matmul(divide_5, parameter_82, False, False) + del parameter_82 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_18 = paddle._C_ops.matmul(divide_5, parameter_81, False, False) + del parameter_81 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(matmul_16, full_int_array_2) + del matmul_16 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(matmul_17, full_int_array_2) + del matmul_17 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(matmul_18, full_int_array_2) + del matmul_18 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + parameter_19, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_19 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + parameter_18, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_18 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + transpose_8, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + transpose_8, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_8 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_16 = paddle._C_ops.multiply(strided_slice_8, slice_5) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_17 = paddle._C_ops.multiply(strided_slice_9, slice_4) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_4 = paddle._C_ops.subtract(multiply_16, multiply_17) + del multiply_16, multiply_17 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_18 = paddle._C_ops.multiply(strided_slice_8, slice_4) + del strided_slice_8 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_19 = paddle._C_ops.multiply(strided_slice_9, slice_5) + del strided_slice_9 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_11 = paddle._C_ops.add(multiply_18, multiply_19) + del multiply_18, multiply_19 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_4 = [subtract_4, add_11] + del add_11, subtract_4 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_4 = paddle._C_ops.stack(combine_4, -1) + del combine_4 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_4 = paddle._C_ops.flatten(stack_4, 3, 4) + del stack_4 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + transpose_9, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + transpose_9, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_9 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_20 = paddle._C_ops.multiply(strided_slice_10, slice_5) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_21 = paddle._C_ops.multiply(strided_slice_11, slice_4) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_5 = paddle._C_ops.subtract(multiply_20, multiply_21) + del multiply_20, multiply_21 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_22 = paddle._C_ops.multiply(strided_slice_10, slice_4) + del slice_4, strided_slice_10 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_23 = paddle._C_ops.multiply(strided_slice_11, slice_5) + del slice_5, strided_slice_11 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_12 = paddle._C_ops.add(multiply_22, multiply_23) + del multiply_22, multiply_23 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_5 = [subtract_5, add_12] + del add_12, subtract_5 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_5 = paddle._C_ops.stack(combine_5, -1) + del combine_5 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_5 = paddle._C_ops.flatten(stack_5, 3, 4) + del stack_5 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_19 = paddle._C_ops.matmul(flatten_4, flatten_5, False, True) + del flatten_4, flatten_5 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_8 = paddle._C_ops.scale(matmul_19, full_4, float("0"), True) + del matmul_19 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_13 = paddle._C_ops.add(scale_8, unsqueeze_0) + del scale_8 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_2 = paddle._C_ops.softmax(add_13, -1) + del add_13 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_20 = paddle._C_ops.matmul(dropout_14, transpose_10, False, False) + del dropout_14, transpose_10 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_11 = paddle._C_ops.transpose(matmul_20, [0, 2, 1, 3]) + del matmul_20 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_8) + del transpose_11 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_21 = paddle._C_ops.matmul(reshape_11, parameter_80, False, False) + del parameter_80, reshape_11 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_21, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_21 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_14 = paddle._C_ops.add(divide_5, dropout_16) + del divide_5, dropout_16 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_5 = paddle._C_ops.square(add_14) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_5 = paddle._C_ops.mean(square_5, full_int_array_1, True) + del square_5 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_9 = paddle._C_ops.scale(mean_5, full_2, float("1e-12"), True) + del mean_5 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_5 = paddle._C_ops.sqrt(scale_9) + del scale_9 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_6 = paddle._C_ops.divide(add_14, sqrt_5) + del add_14, sqrt_5 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_22 = paddle._C_ops.matmul(divide_6, parameter_79, False, False) + del parameter_79 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_2 = paddle._C_ops.relu(matmul_22) + del matmul_22 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_23 = paddle._C_ops.matmul(relu_2, parameter_78, False, False) + del parameter_78, relu_2 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_23, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_23 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_15 = paddle._C_ops.add(divide_6, dropout_18) + del divide_6, dropout_18 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_6 = paddle._C_ops.square(add_15) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_6 = paddle._C_ops.mean(square_6, full_int_array_1, True) + del square_6 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_10 = paddle._C_ops.scale(mean_6, full_2, float("1e-12"), True) + del mean_6 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_6 = paddle._C_ops.sqrt(scale_10) + del scale_10 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_7 = paddle._C_ops.divide(add_15, sqrt_6) + del add_15, sqrt_6 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_24 = paddle._C_ops.matmul(divide_7, parameter_77, False, False) + del parameter_77 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_25 = paddle._C_ops.matmul(divide_7, parameter_76, False, False) + del parameter_76 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_26 = paddle._C_ops.matmul(divide_7, parameter_75, False, False) + del parameter_75 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(matmul_24, full_int_array_2) + del matmul_24 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(matmul_25, full_int_array_2) + del matmul_25 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(matmul_26, full_int_array_2) + del matmul_26 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + parameter_17, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_17 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + parameter_16, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_16 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_12 = paddle._C_ops.strided_slice( + transpose_12, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_13 = paddle._C_ops.strided_slice( + transpose_12, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_12 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_24 = paddle._C_ops.multiply(strided_slice_12, slice_7) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_25 = paddle._C_ops.multiply(strided_slice_13, slice_6) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_6 = paddle._C_ops.subtract(multiply_24, multiply_25) + del multiply_24, multiply_25 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_26 = paddle._C_ops.multiply(strided_slice_12, slice_6) + del strided_slice_12 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_27 = paddle._C_ops.multiply(strided_slice_13, slice_7) + del strided_slice_13 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_16 = paddle._C_ops.add(multiply_26, multiply_27) + del multiply_26, multiply_27 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_6 = [subtract_6, add_16] + del add_16, subtract_6 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_6 = paddle._C_ops.stack(combine_6, -1) + del combine_6 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_6 = paddle._C_ops.flatten(stack_6, 3, 4) + del stack_6 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_14 = paddle._C_ops.strided_slice( + transpose_13, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_15 = paddle._C_ops.strided_slice( + transpose_13, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_13 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_28 = paddle._C_ops.multiply(strided_slice_14, slice_7) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_29 = paddle._C_ops.multiply(strided_slice_15, slice_6) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_7 = paddle._C_ops.subtract(multiply_28, multiply_29) + del multiply_28, multiply_29 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_30 = paddle._C_ops.multiply(strided_slice_14, slice_6) + del slice_6, strided_slice_14 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_31 = paddle._C_ops.multiply(strided_slice_15, slice_7) + del slice_7, strided_slice_15 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_17 = paddle._C_ops.add(multiply_30, multiply_31) + del multiply_30, multiply_31 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_7 = [subtract_7, add_17] + del add_17, subtract_7 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_7 = paddle._C_ops.stack(combine_7, -1) + del combine_7 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_7 = paddle._C_ops.flatten(stack_7, 3, 4) + del stack_7 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_27 = paddle._C_ops.matmul(flatten_6, flatten_7, False, True) + del flatten_6, flatten_7 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_11 = paddle._C_ops.scale(matmul_27, full_4, float("0"), True) + del matmul_27 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_18 = paddle._C_ops.add(scale_11, unsqueeze_0) + del scale_11 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_3 = paddle._C_ops.softmax(add_18, -1) + del add_18 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_28 = paddle._C_ops.matmul(dropout_20, transpose_14, False, False) + del dropout_20, transpose_14 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_15 = paddle._C_ops.transpose(matmul_28, [0, 2, 1, 3]) + del matmul_28 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_8) + del transpose_15 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_29 = paddle._C_ops.matmul(reshape_15, parameter_74, False, False) + del parameter_74, reshape_15 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_29, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_29 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_19 = paddle._C_ops.add(divide_7, dropout_22) + del divide_7, dropout_22 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_7 = paddle._C_ops.square(add_19) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_7 = paddle._C_ops.mean(square_7, full_int_array_1, True) + del square_7 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_12 = paddle._C_ops.scale(mean_7, full_2, float("1e-12"), True) + del mean_7 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_7 = paddle._C_ops.sqrt(scale_12) + del scale_12 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_8 = paddle._C_ops.divide(add_19, sqrt_7) + del add_19, sqrt_7 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_30 = paddle._C_ops.matmul(divide_8, parameter_73, False, False) + del parameter_73 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_3 = paddle._C_ops.relu(matmul_30) + del matmul_30 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_31 = paddle._C_ops.matmul(relu_3, parameter_72, False, False) + del parameter_72, relu_3 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_31, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_31 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_20 = paddle._C_ops.add(divide_8, dropout_24) + del divide_8, dropout_24 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_8 = paddle._C_ops.square(add_20) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_8 = paddle._C_ops.mean(square_8, full_int_array_1, True) + del square_8 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_13 = paddle._C_ops.scale(mean_8, full_2, float("1e-12"), True) + del mean_8 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_8 = paddle._C_ops.sqrt(scale_13) + del scale_13 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_9 = paddle._C_ops.divide(add_20, sqrt_8) + del add_20, sqrt_8 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_32 = paddle._C_ops.matmul(divide_9, parameter_71, False, False) + del parameter_71 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_33 = paddle._C_ops.matmul(divide_9, parameter_70, False, False) + del parameter_70 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_34 = paddle._C_ops.matmul(divide_9, parameter_69, False, False) + del parameter_69 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(matmul_32, full_int_array_2) + del matmul_32 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_16 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(matmul_33, full_int_array_2) + del matmul_33 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(matmul_34, full_int_array_2) + del matmul_34 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + parameter_15, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_15 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + parameter_14, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_14 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_16 = paddle._C_ops.strided_slice( + transpose_16, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_17 = paddle._C_ops.strided_slice( + transpose_16, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_16 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_32 = paddle._C_ops.multiply(strided_slice_16, slice_9) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_33 = paddle._C_ops.multiply(strided_slice_17, slice_8) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_8 = paddle._C_ops.subtract(multiply_32, multiply_33) + del multiply_32, multiply_33 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_34 = paddle._C_ops.multiply(strided_slice_16, slice_8) + del strided_slice_16 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_35 = paddle._C_ops.multiply(strided_slice_17, slice_9) + del strided_slice_17 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_21 = paddle._C_ops.add(multiply_34, multiply_35) + del multiply_34, multiply_35 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_8 = [subtract_8, add_21] + del add_21, subtract_8 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_8 = paddle._C_ops.stack(combine_8, -1) + del combine_8 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_8 = paddle._C_ops.flatten(stack_8, 3, 4) + del stack_8 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_18 = paddle._C_ops.strided_slice( + transpose_17, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_19 = paddle._C_ops.strided_slice( + transpose_17, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_17 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_36 = paddle._C_ops.multiply(strided_slice_18, slice_9) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_37 = paddle._C_ops.multiply(strided_slice_19, slice_8) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_9 = paddle._C_ops.subtract(multiply_36, multiply_37) + del multiply_36, multiply_37 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_38 = paddle._C_ops.multiply(strided_slice_18, slice_8) + del slice_8, strided_slice_18 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_39 = paddle._C_ops.multiply(strided_slice_19, slice_9) + del slice_9, strided_slice_19 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_22 = paddle._C_ops.add(multiply_38, multiply_39) + del multiply_38, multiply_39 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_9 = [subtract_9, add_22] + del add_22, subtract_9 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_9 = paddle._C_ops.stack(combine_9, -1) + del combine_9 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_9 = paddle._C_ops.flatten(stack_9, 3, 4) + del stack_9 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_35 = paddle._C_ops.matmul(flatten_8, flatten_9, False, True) + del flatten_8, flatten_9 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_14 = paddle._C_ops.scale(matmul_35, full_4, float("0"), True) + del matmul_35 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_23 = paddle._C_ops.add(scale_14, unsqueeze_0) + del scale_14 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_4 = paddle._C_ops.softmax(add_23, -1) + del add_23 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_36 = paddle._C_ops.matmul(dropout_26, transpose_18, False, False) + del dropout_26, transpose_18 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_19 = paddle._C_ops.transpose(matmul_36, [0, 2, 1, 3]) + del matmul_36 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_19, full_int_array_8) + del transpose_19 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_37 = paddle._C_ops.matmul(reshape_19, parameter_68, False, False) + del parameter_68, reshape_19 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_37, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_37 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_24 = paddle._C_ops.add(divide_9, dropout_28) + del divide_9, dropout_28 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_9 = paddle._C_ops.square(add_24) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_9 = paddle._C_ops.mean(square_9, full_int_array_1, True) + del square_9 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_15 = paddle._C_ops.scale(mean_9, full_2, float("1e-12"), True) + del mean_9 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_9 = paddle._C_ops.sqrt(scale_15) + del scale_15 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_10 = paddle._C_ops.divide(add_24, sqrt_9) + del add_24, sqrt_9 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_38 = paddle._C_ops.matmul(divide_10, parameter_67, False, False) + del parameter_67 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_4 = paddle._C_ops.relu(matmul_38) + del matmul_38 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_39 = paddle._C_ops.matmul(relu_4, parameter_66, False, False) + del parameter_66, relu_4 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_39, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_39 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_25 = paddle._C_ops.add(divide_10, dropout_30) + del divide_10, dropout_30 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_10 = paddle._C_ops.square(add_25) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_10 = paddle._C_ops.mean(square_10, full_int_array_1, True) + del square_10 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_16 = paddle._C_ops.scale(mean_10, full_2, float("1e-12"), True) + del mean_10 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_10 = paddle._C_ops.sqrt(scale_16) + del scale_16 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_11 = paddle._C_ops.divide(add_25, sqrt_10) + del add_25, sqrt_10 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_40 = paddle._C_ops.matmul(divide_11, parameter_65, False, False) + del parameter_65 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_41 = paddle._C_ops.matmul(divide_11, parameter_64, False, False) + del parameter_64 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_42 = paddle._C_ops.matmul(divide_11, parameter_63, False, False) + del parameter_63 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(matmul_40, full_int_array_2) + del matmul_40 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_20 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(matmul_41, full_int_array_2) + del matmul_41 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(matmul_42, full_int_array_2) + del matmul_42 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + parameter_13, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_13 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + parameter_12, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_12 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_20 = paddle._C_ops.strided_slice( + transpose_20, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_21 = paddle._C_ops.strided_slice( + transpose_20, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_20 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_40 = paddle._C_ops.multiply(strided_slice_20, slice_11) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_41 = paddle._C_ops.multiply(strided_slice_21, slice_10) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_10 = paddle._C_ops.subtract(multiply_40, multiply_41) + del multiply_40, multiply_41 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_42 = paddle._C_ops.multiply(strided_slice_20, slice_10) + del strided_slice_20 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_43 = paddle._C_ops.multiply(strided_slice_21, slice_11) + del strided_slice_21 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_26 = paddle._C_ops.add(multiply_42, multiply_43) + del multiply_42, multiply_43 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_10 = [subtract_10, add_26] + del add_26, subtract_10 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_10 = paddle._C_ops.stack(combine_10, -1) + del combine_10 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_10 = paddle._C_ops.flatten(stack_10, 3, 4) + del stack_10 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_22 = paddle._C_ops.strided_slice( + transpose_21, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_23 = paddle._C_ops.strided_slice( + transpose_21, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_21 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_44 = paddle._C_ops.multiply(strided_slice_22, slice_11) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_45 = paddle._C_ops.multiply(strided_slice_23, slice_10) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_11 = paddle._C_ops.subtract(multiply_44, multiply_45) + del multiply_44, multiply_45 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_46 = paddle._C_ops.multiply(strided_slice_22, slice_10) + del slice_10, strided_slice_22 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_47 = paddle._C_ops.multiply(strided_slice_23, slice_11) + del slice_11, strided_slice_23 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_27 = paddle._C_ops.add(multiply_46, multiply_47) + del multiply_46, multiply_47 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_11 = [subtract_11, add_27] + del add_27, subtract_11 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_11 = paddle._C_ops.stack(combine_11, -1) + del combine_11 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_11 = paddle._C_ops.flatten(stack_11, 3, 4) + del stack_11 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_43 = paddle._C_ops.matmul(flatten_10, flatten_11, False, True) + del flatten_10, flatten_11 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_17 = paddle._C_ops.scale(matmul_43, full_4, float("0"), True) + del matmul_43 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_28 = paddle._C_ops.add(scale_17, unsqueeze_0) + del scale_17 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_5 = paddle._C_ops.softmax(add_28, -1) + del add_28 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_44 = paddle._C_ops.matmul(dropout_32, transpose_22, False, False) + del dropout_32, transpose_22 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_23 = paddle._C_ops.transpose(matmul_44, [0, 2, 1, 3]) + del matmul_44 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_23, full_int_array_8) + del transpose_23 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_45 = paddle._C_ops.matmul(reshape_23, parameter_62, False, False) + del parameter_62, reshape_23 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_45, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_45 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_29 = paddle._C_ops.add(divide_11, dropout_34) + del divide_11, dropout_34 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_11 = paddle._C_ops.square(add_29) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_11 = paddle._C_ops.mean(square_11, full_int_array_1, True) + del square_11 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_18 = paddle._C_ops.scale(mean_11, full_2, float("1e-12"), True) + del mean_11 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_11 = paddle._C_ops.sqrt(scale_18) + del scale_18 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_12 = paddle._C_ops.divide(add_29, sqrt_11) + del add_29, sqrt_11 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_46 = paddle._C_ops.matmul(divide_12, parameter_61, False, False) + del parameter_61 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_5 = paddle._C_ops.relu(matmul_46) + del matmul_46 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_47 = paddle._C_ops.matmul(relu_5, parameter_60, False, False) + del parameter_60, relu_5 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_47, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_47 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_30 = paddle._C_ops.add(divide_12, dropout_36) + del divide_12, dropout_36 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_12 = paddle._C_ops.square(add_30) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_12 = paddle._C_ops.mean(square_12, full_int_array_1, True) + del square_12 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_19 = paddle._C_ops.scale(mean_12, full_2, float("1e-12"), True) + del mean_12 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_12 = paddle._C_ops.sqrt(scale_19) + del scale_19 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_13 = paddle._C_ops.divide(add_30, sqrt_12) + del add_30, sqrt_12 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_48 = paddle._C_ops.matmul(divide_13, parameter_59, False, False) + del parameter_59 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_49 = paddle._C_ops.matmul(divide_13, parameter_58, False, False) + del parameter_58 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_50 = paddle._C_ops.matmul(divide_13, parameter_57, False, False) + del parameter_57 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(matmul_48, full_int_array_2) + del matmul_48 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_24 = paddle._C_ops.transpose(reshape_24, [0, 2, 1, 3]) + del reshape_24 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(matmul_49, full_int_array_2) + del matmul_49 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_25 = paddle._C_ops.transpose(reshape_25, [0, 2, 1, 3]) + del reshape_25 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(matmul_50, full_int_array_2) + del matmul_50 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_26 = paddle._C_ops.transpose(reshape_26, [0, 2, 1, 3]) + del reshape_26 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + parameter_11, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_11 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + parameter_10, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_10 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_24 = paddle._C_ops.strided_slice( + transpose_24, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_25 = paddle._C_ops.strided_slice( + transpose_24, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_24 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_48 = paddle._C_ops.multiply(strided_slice_24, slice_13) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_49 = paddle._C_ops.multiply(strided_slice_25, slice_12) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_12 = paddle._C_ops.subtract(multiply_48, multiply_49) + del multiply_48, multiply_49 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_50 = paddle._C_ops.multiply(strided_slice_24, slice_12) + del strided_slice_24 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_51 = paddle._C_ops.multiply(strided_slice_25, slice_13) + del strided_slice_25 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_31 = paddle._C_ops.add(multiply_50, multiply_51) + del multiply_50, multiply_51 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_12 = [subtract_12, add_31] + del add_31, subtract_12 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_12 = paddle._C_ops.stack(combine_12, -1) + del combine_12 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_12 = paddle._C_ops.flatten(stack_12, 3, 4) + del stack_12 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_26 = paddle._C_ops.strided_slice( + transpose_25, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_27 = paddle._C_ops.strided_slice( + transpose_25, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_25 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_52 = paddle._C_ops.multiply(strided_slice_26, slice_13) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_53 = paddle._C_ops.multiply(strided_slice_27, slice_12) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_13 = paddle._C_ops.subtract(multiply_52, multiply_53) + del multiply_52, multiply_53 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_54 = paddle._C_ops.multiply(strided_slice_26, slice_12) + del slice_12, strided_slice_26 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_55 = paddle._C_ops.multiply(strided_slice_27, slice_13) + del slice_13, strided_slice_27 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_32 = paddle._C_ops.add(multiply_54, multiply_55) + del multiply_54, multiply_55 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_13 = [subtract_13, add_32] + del add_32, subtract_13 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_13 = paddle._C_ops.stack(combine_13, -1) + del combine_13 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_13 = paddle._C_ops.flatten(stack_13, 3, 4) + del stack_13 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_51 = paddle._C_ops.matmul(flatten_12, flatten_13, False, True) + del flatten_12, flatten_13 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_20 = paddle._C_ops.scale(matmul_51, full_4, float("0"), True) + del matmul_51 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_33 = paddle._C_ops.add(scale_20, unsqueeze_0) + del scale_20 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_6 = paddle._C_ops.softmax(add_33, -1) + del add_33 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_52 = paddle._C_ops.matmul(dropout_38, transpose_26, False, False) + del dropout_38, transpose_26 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_27 = paddle._C_ops.transpose(matmul_52, [0, 2, 1, 3]) + del matmul_52 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(transpose_27, full_int_array_8) + del transpose_27 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_53 = paddle._C_ops.matmul(reshape_27, parameter_56, False, False) + del parameter_56, reshape_27 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_53, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_53 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_34 = paddle._C_ops.add(divide_13, dropout_40) + del divide_13, dropout_40 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_13 = paddle._C_ops.square(add_34) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_13 = paddle._C_ops.mean(square_13, full_int_array_1, True) + del square_13 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_21 = paddle._C_ops.scale(mean_13, full_2, float("1e-12"), True) + del mean_13 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_13 = paddle._C_ops.sqrt(scale_21) + del scale_21 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_14 = paddle._C_ops.divide(add_34, sqrt_13) + del add_34, sqrt_13 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_54 = paddle._C_ops.matmul(divide_14, parameter_55, False, False) + del parameter_55 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_6 = paddle._C_ops.relu(matmul_54) + del matmul_54 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_55 = paddle._C_ops.matmul(relu_6, parameter_54, False, False) + del parameter_54, relu_6 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_55, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_55 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_35 = paddle._C_ops.add(divide_14, dropout_42) + del divide_14, dropout_42 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_14 = paddle._C_ops.square(add_35) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_14 = paddle._C_ops.mean(square_14, full_int_array_1, True) + del square_14 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_22 = paddle._C_ops.scale(mean_14, full_2, float("1e-12"), True) + del mean_14 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_14 = paddle._C_ops.sqrt(scale_22) + del scale_22 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_15 = paddle._C_ops.divide(add_35, sqrt_14) + del add_35, sqrt_14 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_56 = paddle._C_ops.matmul(divide_15, parameter_53, False, False) + del parameter_53 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_57 = paddle._C_ops.matmul(divide_15, parameter_52, False, False) + del parameter_52 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_58 = paddle._C_ops.matmul(divide_15, parameter_51, False, False) + del parameter_51 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(matmul_56, full_int_array_2) + del matmul_56 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_28 = paddle._C_ops.transpose(reshape_28, [0, 2, 1, 3]) + del reshape_28 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(matmul_57, full_int_array_2) + del matmul_57 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_29 = paddle._C_ops.transpose(reshape_29, [0, 2, 1, 3]) + del reshape_29 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(matmul_58, full_int_array_2) + del matmul_58 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_30 = paddle._C_ops.transpose(reshape_30, [0, 2, 1, 3]) + del reshape_30 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + parameter_9, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_9 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + parameter_8, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_8 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_28 = paddle._C_ops.strided_slice( + transpose_28, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_29 = paddle._C_ops.strided_slice( + transpose_28, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_28 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_56 = paddle._C_ops.multiply(strided_slice_28, slice_15) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_57 = paddle._C_ops.multiply(strided_slice_29, slice_14) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_14 = paddle._C_ops.subtract(multiply_56, multiply_57) + del multiply_56, multiply_57 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_58 = paddle._C_ops.multiply(strided_slice_28, slice_14) + del strided_slice_28 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_59 = paddle._C_ops.multiply(strided_slice_29, slice_15) + del strided_slice_29 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_36 = paddle._C_ops.add(multiply_58, multiply_59) + del multiply_58, multiply_59 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_14 = [subtract_14, add_36] + del add_36, subtract_14 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_14 = paddle._C_ops.stack(combine_14, -1) + del combine_14 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_14 = paddle._C_ops.flatten(stack_14, 3, 4) + del stack_14 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_30 = paddle._C_ops.strided_slice( + transpose_29, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_31 = paddle._C_ops.strided_slice( + transpose_29, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_29 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_60 = paddle._C_ops.multiply(strided_slice_30, slice_15) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_61 = paddle._C_ops.multiply(strided_slice_31, slice_14) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_15 = paddle._C_ops.subtract(multiply_60, multiply_61) + del multiply_60, multiply_61 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_62 = paddle._C_ops.multiply(strided_slice_30, slice_14) + del slice_14, strided_slice_30 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_63 = paddle._C_ops.multiply(strided_slice_31, slice_15) + del slice_15, strided_slice_31 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_37 = paddle._C_ops.add(multiply_62, multiply_63) + del multiply_62, multiply_63 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_15 = [subtract_15, add_37] + del add_37, subtract_15 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_15 = paddle._C_ops.stack(combine_15, -1) + del combine_15 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_15 = paddle._C_ops.flatten(stack_15, 3, 4) + del stack_15 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_59 = paddle._C_ops.matmul(flatten_14, flatten_15, False, True) + del flatten_14, flatten_15 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_23 = paddle._C_ops.scale(matmul_59, full_4, float("0"), True) + del matmul_59 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_38 = paddle._C_ops.add(scale_23, unsqueeze_0) + del scale_23 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_7 = paddle._C_ops.softmax(add_38, -1) + del add_38 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_60 = paddle._C_ops.matmul(dropout_44, transpose_30, False, False) + del dropout_44, transpose_30 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_31 = paddle._C_ops.transpose(matmul_60, [0, 2, 1, 3]) + del matmul_60 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_31 = paddle._C_ops.reshape(transpose_31, full_int_array_8) + del transpose_31 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_61 = paddle._C_ops.matmul(reshape_31, parameter_50, False, False) + del parameter_50, reshape_31 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_61, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_61 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_39 = paddle._C_ops.add(divide_15, dropout_46) + del divide_15, dropout_46 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_15 = paddle._C_ops.square(add_39) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_15 = paddle._C_ops.mean(square_15, full_int_array_1, True) + del square_15 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_24 = paddle._C_ops.scale(mean_15, full_2, float("1e-12"), True) + del mean_15 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_15 = paddle._C_ops.sqrt(scale_24) + del scale_24 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_16 = paddle._C_ops.divide(add_39, sqrt_15) + del add_39, sqrt_15 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_62 = paddle._C_ops.matmul(divide_16, parameter_49, False, False) + del parameter_49 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_7 = paddle._C_ops.relu(matmul_62) + del matmul_62 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_63 = paddle._C_ops.matmul(relu_7, parameter_48, False, False) + del parameter_48, relu_7 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_63, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_63 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_40 = paddle._C_ops.add(divide_16, dropout_48) + del divide_16, dropout_48 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_16 = paddle._C_ops.square(add_40) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_16 = paddle._C_ops.mean(square_16, full_int_array_1, True) + del square_16 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_25 = paddle._C_ops.scale(mean_16, full_2, float("1e-12"), True) + del mean_16 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_16 = paddle._C_ops.sqrt(scale_25) + del scale_25 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_17 = paddle._C_ops.divide(add_40, sqrt_16) + del add_40, sqrt_16 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_64 = paddle._C_ops.matmul(divide_17, parameter_47, False, False) + del parameter_47 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_65 = paddle._C_ops.matmul(divide_17, parameter_46, False, False) + del parameter_46 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_66 = paddle._C_ops.matmul(divide_17, parameter_45, False, False) + del parameter_45 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(matmul_64, full_int_array_2) + del matmul_64 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_32 = paddle._C_ops.transpose(reshape_32, [0, 2, 1, 3]) + del reshape_32 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(matmul_65, full_int_array_2) + del matmul_65 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_33 = paddle._C_ops.transpose(reshape_33, [0, 2, 1, 3]) + del reshape_33 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(matmul_66, full_int_array_2) + del matmul_66 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_34 = paddle._C_ops.transpose(reshape_34, [0, 2, 1, 3]) + del reshape_34 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + parameter_7, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_7 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + parameter_6, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_6 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_32 = paddle._C_ops.strided_slice( + transpose_32, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_33 = paddle._C_ops.strided_slice( + transpose_32, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_32 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_64 = paddle._C_ops.multiply(strided_slice_32, slice_17) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_65 = paddle._C_ops.multiply(strided_slice_33, slice_16) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_16 = paddle._C_ops.subtract(multiply_64, multiply_65) + del multiply_64, multiply_65 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_66 = paddle._C_ops.multiply(strided_slice_32, slice_16) + del strided_slice_32 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_67 = paddle._C_ops.multiply(strided_slice_33, slice_17) + del strided_slice_33 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_41 = paddle._C_ops.add(multiply_66, multiply_67) + del multiply_66, multiply_67 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_16 = [subtract_16, add_41] + del add_41, subtract_16 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_16 = paddle._C_ops.stack(combine_16, -1) + del combine_16 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_16 = paddle._C_ops.flatten(stack_16, 3, 4) + del stack_16 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_34 = paddle._C_ops.strided_slice( + transpose_33, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_35 = paddle._C_ops.strided_slice( + transpose_33, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_33 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_68 = paddle._C_ops.multiply(strided_slice_34, slice_17) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_69 = paddle._C_ops.multiply(strided_slice_35, slice_16) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_17 = paddle._C_ops.subtract(multiply_68, multiply_69) + del multiply_68, multiply_69 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_70 = paddle._C_ops.multiply(strided_slice_34, slice_16) + del slice_16, strided_slice_34 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_71 = paddle._C_ops.multiply(strided_slice_35, slice_17) + del slice_17, strided_slice_35 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_42 = paddle._C_ops.add(multiply_70, multiply_71) + del multiply_70, multiply_71 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_17 = [subtract_17, add_42] + del add_42, subtract_17 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_17 = paddle._C_ops.stack(combine_17, -1) + del combine_17 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_17 = paddle._C_ops.flatten(stack_17, 3, 4) + del stack_17 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_67 = paddle._C_ops.matmul(flatten_16, flatten_17, False, True) + del flatten_16, flatten_17 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_26 = paddle._C_ops.scale(matmul_67, full_4, float("0"), True) + del matmul_67 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_43 = paddle._C_ops.add(scale_26, unsqueeze_0) + del scale_26 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_8 = paddle._C_ops.softmax(add_43, -1) + del add_43 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_50, dropout_51 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_68 = paddle._C_ops.matmul(dropout_50, transpose_34, False, False) + del dropout_50, transpose_34 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_35 = paddle._C_ops.transpose(matmul_68, [0, 2, 1, 3]) + del matmul_68 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_35 = paddle._C_ops.reshape(transpose_35, full_int_array_8) + del transpose_35 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_69 = paddle._C_ops.matmul(reshape_35, parameter_44, False, False) + del parameter_44, reshape_35 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_52, dropout_53 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_69, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_69 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_44 = paddle._C_ops.add(divide_17, dropout_52) + del divide_17, dropout_52 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_17 = paddle._C_ops.square(add_44) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_17 = paddle._C_ops.mean(square_17, full_int_array_1, True) + del square_17 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_27 = paddle._C_ops.scale(mean_17, full_2, float("1e-12"), True) + del mean_17 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_17 = paddle._C_ops.sqrt(scale_27) + del scale_27 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_18 = paddle._C_ops.divide(add_44, sqrt_17) + del add_44, sqrt_17 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_70 = paddle._C_ops.matmul(divide_18, parameter_43, False, False) + del parameter_43 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_8 = paddle._C_ops.relu(matmul_70) + del matmul_70 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_71 = paddle._C_ops.matmul(relu_8, parameter_42, False, False) + del parameter_42, relu_8 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_54, dropout_55 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_71, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_71 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_45 = paddle._C_ops.add(divide_18, dropout_54) + del divide_18, dropout_54 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_18 = paddle._C_ops.square(add_45) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_18 = paddle._C_ops.mean(square_18, full_int_array_1, True) + del square_18 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_28 = paddle._C_ops.scale(mean_18, full_2, float("1e-12"), True) + del mean_18 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_18 = paddle._C_ops.sqrt(scale_28) + del scale_28 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_19 = paddle._C_ops.divide(add_45, sqrt_18) + del add_45, sqrt_18 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_72 = paddle._C_ops.matmul(divide_19, parameter_41, False, False) + del parameter_41 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_73 = paddle._C_ops.matmul(divide_19, parameter_40, False, False) + del parameter_40 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_74 = paddle._C_ops.matmul(divide_19, parameter_39, False, False) + del parameter_39 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(matmul_72, full_int_array_2) + del matmul_72 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_36 = paddle._C_ops.transpose(reshape_36, [0, 2, 1, 3]) + del reshape_36 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(matmul_73, full_int_array_2) + del matmul_73 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_37 = paddle._C_ops.transpose(reshape_37, [0, 2, 1, 3]) + del reshape_37 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(matmul_74, full_int_array_2) + del matmul_74 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_38 = paddle._C_ops.transpose(reshape_38, [0, 2, 1, 3]) + del reshape_38 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + parameter_5, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_5 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + parameter_4, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_4 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_36 = paddle._C_ops.strided_slice( + transpose_36, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_37 = paddle._C_ops.strided_slice( + transpose_36, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_36 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_72 = paddle._C_ops.multiply(strided_slice_36, slice_19) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_73 = paddle._C_ops.multiply(strided_slice_37, slice_18) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_18 = paddle._C_ops.subtract(multiply_72, multiply_73) + del multiply_72, multiply_73 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_74 = paddle._C_ops.multiply(strided_slice_36, slice_18) + del strided_slice_36 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_75 = paddle._C_ops.multiply(strided_slice_37, slice_19) + del strided_slice_37 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_46 = paddle._C_ops.add(multiply_74, multiply_75) + del multiply_74, multiply_75 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_18 = [subtract_18, add_46] + del add_46, subtract_18 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_18 = paddle._C_ops.stack(combine_18, -1) + del combine_18 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_18 = paddle._C_ops.flatten(stack_18, 3, 4) + del stack_18 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_38 = paddle._C_ops.strided_slice( + transpose_37, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_39 = paddle._C_ops.strided_slice( + transpose_37, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_37 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_76 = paddle._C_ops.multiply(strided_slice_38, slice_19) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_77 = paddle._C_ops.multiply(strided_slice_39, slice_18) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_19 = paddle._C_ops.subtract(multiply_76, multiply_77) + del multiply_76, multiply_77 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_78 = paddle._C_ops.multiply(strided_slice_38, slice_18) + del slice_18, strided_slice_38 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_79 = paddle._C_ops.multiply(strided_slice_39, slice_19) + del slice_19, strided_slice_39 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_47 = paddle._C_ops.add(multiply_78, multiply_79) + del multiply_78, multiply_79 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_19 = [subtract_19, add_47] + del add_47, subtract_19 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_19 = paddle._C_ops.stack(combine_19, -1) + del combine_19 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_19 = paddle._C_ops.flatten(stack_19, 3, 4) + del stack_19 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_75 = paddle._C_ops.matmul(flatten_18, flatten_19, False, True) + del flatten_18, flatten_19 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_29 = paddle._C_ops.scale(matmul_75, full_4, float("0"), True) + del matmul_75 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_48 = paddle._C_ops.add(scale_29, unsqueeze_0) + del scale_29 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_9 = paddle._C_ops.softmax(add_48, -1) + del add_48 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_56, dropout_57 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_76 = paddle._C_ops.matmul(dropout_56, transpose_38, False, False) + del dropout_56, transpose_38 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_39 = paddle._C_ops.transpose(matmul_76, [0, 2, 1, 3]) + del matmul_76 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(transpose_39, full_int_array_8) + del transpose_39 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_77 = paddle._C_ops.matmul(reshape_39, parameter_38, False, False) + del parameter_38, reshape_39 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_58, dropout_59 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_77, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_77 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_49 = paddle._C_ops.add(divide_19, dropout_58) + del divide_19, dropout_58 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_19 = paddle._C_ops.square(add_49) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_19 = paddle._C_ops.mean(square_19, full_int_array_1, True) + del square_19 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_30 = paddle._C_ops.scale(mean_19, full_2, float("1e-12"), True) + del mean_19 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_19 = paddle._C_ops.sqrt(scale_30) + del scale_30 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_20 = paddle._C_ops.divide(add_49, sqrt_19) + del add_49, sqrt_19 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_78 = paddle._C_ops.matmul(divide_20, parameter_37, False, False) + del parameter_37 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_9 = paddle._C_ops.relu(matmul_78) + del matmul_78 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_79 = paddle._C_ops.matmul(relu_9, parameter_36, False, False) + del parameter_36, relu_9 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_60, dropout_61 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_79, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_79 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_50 = paddle._C_ops.add(divide_20, dropout_60) + del divide_20, dropout_60 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_20 = paddle._C_ops.square(add_50) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_20 = paddle._C_ops.mean(square_20, full_int_array_1, True) + del square_20 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_31 = paddle._C_ops.scale(mean_20, full_2, float("1e-12"), True) + del mean_20 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_20 = paddle._C_ops.sqrt(scale_31) + del scale_31 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_21 = paddle._C_ops.divide(add_50, sqrt_20) + del add_50, sqrt_20 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_80 = paddle._C_ops.matmul(divide_21, parameter_35, False, False) + del parameter_35 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_81 = paddle._C_ops.matmul(divide_21, parameter_34, False, False) + del parameter_34 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_82 = paddle._C_ops.matmul(divide_21, parameter_33, False, False) + del parameter_33 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(matmul_80, full_int_array_2) + del matmul_80 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_40 = paddle._C_ops.transpose(reshape_40, [0, 2, 1, 3]) + del reshape_40 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(matmul_81, full_int_array_2) + del matmul_81 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_41 = paddle._C_ops.transpose(reshape_41, [0, 2, 1, 3]) + del reshape_41 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(matmul_82, full_int_array_2) + del matmul_82 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_42 = paddle._C_ops.transpose(reshape_42, [0, 2, 1, 3]) + del reshape_42 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + parameter_3, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_3 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + parameter_2, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_2 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_40 = paddle._C_ops.strided_slice( + transpose_40, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_41 = paddle._C_ops.strided_slice( + transpose_40, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_40 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_80 = paddle._C_ops.multiply(strided_slice_40, slice_21) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_81 = paddle._C_ops.multiply(strided_slice_41, slice_20) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_20 = paddle._C_ops.subtract(multiply_80, multiply_81) + del multiply_80, multiply_81 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_82 = paddle._C_ops.multiply(strided_slice_40, slice_20) + del strided_slice_40 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_83 = paddle._C_ops.multiply(strided_slice_41, slice_21) + del strided_slice_41 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_51 = paddle._C_ops.add(multiply_82, multiply_83) + del multiply_82, multiply_83 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_20 = [subtract_20, add_51] + del add_51, subtract_20 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_20 = paddle._C_ops.stack(combine_20, -1) + del combine_20 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_20 = paddle._C_ops.flatten(stack_20, 3, 4) + del stack_20 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_42 = paddle._C_ops.strided_slice( + transpose_41, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_43 = paddle._C_ops.strided_slice( + transpose_41, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_41 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_84 = paddle._C_ops.multiply(strided_slice_42, slice_21) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_85 = paddle._C_ops.multiply(strided_slice_43, slice_20) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_21 = paddle._C_ops.subtract(multiply_84, multiply_85) + del multiply_84, multiply_85 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_86 = paddle._C_ops.multiply(strided_slice_42, slice_20) + del slice_20, strided_slice_42 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_87 = paddle._C_ops.multiply(strided_slice_43, slice_21) + del slice_21, strided_slice_43 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_52 = paddle._C_ops.add(multiply_86, multiply_87) + del multiply_86, multiply_87 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_21 = [subtract_21, add_52] + del add_52, subtract_21 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_21 = paddle._C_ops.stack(combine_21, -1) + del combine_21 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_21 = paddle._C_ops.flatten(stack_21, 3, 4) + del stack_21 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_83 = paddle._C_ops.matmul(flatten_20, flatten_21, False, True) + del flatten_20, flatten_21 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_32 = paddle._C_ops.scale(matmul_83, full_4, float("0"), True) + del matmul_83 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_53 = paddle._C_ops.add(scale_32, unsqueeze_0) + del scale_32 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_10 = paddle._C_ops.softmax(add_53, -1) + del add_53 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_62, dropout_63 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_84 = paddle._C_ops.matmul(dropout_62, transpose_42, False, False) + del dropout_62, transpose_42 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_43 = paddle._C_ops.transpose(matmul_84, [0, 2, 1, 3]) + del matmul_84 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_43, full_int_array_8) + del transpose_43 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_85 = paddle._C_ops.matmul(reshape_43, parameter_32, False, False) + del parameter_32, reshape_43 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_64, dropout_65 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_85, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_85 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_54 = paddle._C_ops.add(divide_21, dropout_64) + del divide_21, dropout_64 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_21 = paddle._C_ops.square(add_54) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_21 = paddle._C_ops.mean(square_21, full_int_array_1, True) + del square_21 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_33 = paddle._C_ops.scale(mean_21, full_2, float("1e-12"), True) + del mean_21 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_21 = paddle._C_ops.sqrt(scale_33) + del scale_33 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_22 = paddle._C_ops.divide(add_54, sqrt_21) + del add_54, sqrt_21 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_86 = paddle._C_ops.matmul(divide_22, parameter_31, False, False) + del parameter_31 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_10 = paddle._C_ops.relu(matmul_86) + del matmul_86 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_87 = paddle._C_ops.matmul(relu_10, parameter_30, False, False) + del parameter_30, relu_10 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_66, dropout_67 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_87, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_87 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_55 = paddle._C_ops.add(divide_22, dropout_66) + del divide_22, dropout_66 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_22 = paddle._C_ops.square(add_55) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_22 = paddle._C_ops.mean(square_22, full_int_array_1, True) + del square_22 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_34 = paddle._C_ops.scale(mean_22, full_2, float("1e-12"), True) + del mean_22 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_22 = paddle._C_ops.sqrt(scale_34) + del scale_34 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_23 = paddle._C_ops.divide(add_55, sqrt_22) + del add_55, sqrt_22 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_88 = paddle._C_ops.matmul(divide_23, parameter_29, False, False) + del parameter_29 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_89 = paddle._C_ops.matmul(divide_23, parameter_28, False, False) + del parameter_28 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_90 = paddle._C_ops.matmul(divide_23, parameter_27, False, False) + del parameter_27 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(matmul_88, full_int_array_2) + del matmul_88 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_44 = paddle._C_ops.transpose(reshape_44, [0, 2, 1, 3]) + del reshape_44 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(matmul_89, full_int_array_2) + del matmul_89 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_45 = paddle._C_ops.transpose(reshape_45, [0, 2, 1, 3]) + del reshape_45 + + # pd_op.reshape: (1x11x12x64xf32) <- (1x11x768xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(matmul_90, full_int_array_2) + del full_int_array_2, matmul_90 + + # pd_op.transpose: (1x12x11x64xf32) <- (1x11x12x64xf32) + transpose_46 = paddle._C_ops.transpose(reshape_46, [0, 2, 1, 3]) + del reshape_46 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + parameter_1, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_1 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + parameter_0, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del full_int_array_4, parameter_0 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_44 = paddle._C_ops.strided_slice( + transpose_44, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_45 = paddle._C_ops.strided_slice( + transpose_44, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_44 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_88 = paddle._C_ops.multiply(strided_slice_44, slice_23) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_89 = paddle._C_ops.multiply(strided_slice_45, slice_22) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_22 = paddle._C_ops.subtract(multiply_88, multiply_89) + del multiply_88, multiply_89 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_90 = paddle._C_ops.multiply(strided_slice_44, slice_22) + del strided_slice_44 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_91 = paddle._C_ops.multiply(strided_slice_45, slice_23) + del strided_slice_45 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_56 = paddle._C_ops.add(multiply_90, multiply_91) + del multiply_90, multiply_91 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_22 = [subtract_22, add_56] + del add_56, subtract_22 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_22 = paddle._C_ops.stack(combine_22, -1) + del combine_22 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_22 = paddle._C_ops.flatten(stack_22, 3, 4) + del stack_22 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_46 = paddle._C_ops.strided_slice( + transpose_45, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + del full_int_array_3 + + # pd_op.strided_slice: (1x12x11x32xf32) <- (1x12x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_47 = paddle._C_ops.strided_slice( + transpose_45, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del full_int_array_5, full_int_array_6, full_int_array_7, transpose_45 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_92 = paddle._C_ops.multiply(strided_slice_46, slice_23) + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_93 = paddle._C_ops.multiply(strided_slice_47, slice_22) + + # pd_op.subtract: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + subtract_23 = paddle._C_ops.subtract(multiply_92, multiply_93) + del multiply_92, multiply_93 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_94 = paddle._C_ops.multiply(strided_slice_46, slice_22) + del slice_22, strided_slice_46 + + # pd_op.multiply: (1x12x11x32xf32) <- (1x12x11x32xf32, 11x32xf32) + multiply_95 = paddle._C_ops.multiply(strided_slice_47, slice_23) + del slice_23, strided_slice_47 + + # pd_op.add: (1x12x11x32xf32) <- (1x12x11x32xf32, 1x12x11x32xf32) + add_57 = paddle._C_ops.add(multiply_94, multiply_95) + del multiply_94, multiply_95 + + # builtin.combine: ([1x12x11x32xf32, 1x12x11x32xf32]) <- (1x12x11x32xf32, 1x12x11x32xf32) + combine_23 = [subtract_23, add_57] + del add_57, subtract_23 + + # pd_op.stack: (1x12x11x32x2xf32) <- ([1x12x11x32xf32, 1x12x11x32xf32]) + stack_23 = paddle._C_ops.stack(combine_23, -1) + del combine_23 + + # pd_op.flatten: (1x12x11x64xf32) <- (1x12x11x32x2xf32) + flatten_23 = paddle._C_ops.flatten(stack_23, 3, 4) + del stack_23 + + # pd_op.matmul: (1x12x11x11xf32) <- (1x12x11x64xf32, 1x12x11x64xf32) + matmul_91 = paddle._C_ops.matmul(flatten_22, flatten_23, False, True) + del flatten_22, flatten_23 + + # pd_op.scale: (1x12x11x11xf32) <- (1x12x11x11xf32, 1xf32) + scale_35 = paddle._C_ops.scale(matmul_91, full_4, float("0"), True) + del full_4, matmul_91 + + # pd_op.add: (1x12x11x11xf32) <- (1x12x11x11xf32, 1x1x1x11xf32) + add_58 = paddle._C_ops.add(scale_35, unsqueeze_0) + del scale_35, unsqueeze_0 + + # pd_op.softmax: (1x12x11x11xf32) <- (1x12x11x11xf32) + softmax_11 = paddle._C_ops.softmax(add_58, -1) + del add_58 + + # pd_op.dropout: (1x12x11x11xf32, 1x12x11x11xui8) <- (1x12x11x11xf32, None, 1xf32) + dropout_68, dropout_69 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # pd_op.matmul: (1x12x11x64xf32) <- (1x12x11x11xf32, 1x12x11x64xf32) + matmul_92 = paddle._C_ops.matmul(dropout_68, transpose_46, False, False) + del dropout_68, transpose_46 + + # pd_op.transpose: (1x11x12x64xf32) <- (1x12x11x64xf32) + transpose_47 = paddle._C_ops.transpose(matmul_92, [0, 2, 1, 3]) + del matmul_92 + + # pd_op.reshape: (1x11x768xf32) <- (1x11x12x64xf32, 3xi64) + reshape_47 = paddle._C_ops.reshape(transpose_47, full_int_array_8) + del full_int_array_8, transpose_47 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x768xf32, 768x768xf32) + matmul_93 = paddle._C_ops.matmul(reshape_47, parameter_26, False, False) + del parameter_26, reshape_47 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_70, dropout_71 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_93, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_93 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_59 = paddle._C_ops.add(divide_23, dropout_70) + del divide_23, dropout_70 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_23 = paddle._C_ops.square(add_59) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_23 = paddle._C_ops.mean(square_23, full_int_array_1, True) + del square_23 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_36 = paddle._C_ops.scale(mean_23, full_2, float("1e-12"), True) + del mean_23 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_23 = paddle._C_ops.sqrt(scale_36) + del scale_36 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_24 = paddle._C_ops.divide(add_59, sqrt_23) + del add_59, sqrt_23 + + # pd_op.matmul: (1x11x3072xf32) <- (1x11x768xf32, 768x3072xf32) + matmul_94 = paddle._C_ops.matmul(divide_24, parameter_25, False, False) + del parameter_25 + + # pd_op.relu: (1x11x3072xf32) <- (1x11x3072xf32) + relu_11 = paddle._C_ops.relu(matmul_94) + del matmul_94 + + # pd_op.matmul: (1x11x768xf32) <- (1x11x3072xf32, 3072x768xf32) + matmul_95 = paddle._C_ops.matmul(relu_11, parameter_24, False, False) + del parameter_24, relu_11 + + # pd_op.dropout: (1x11x768xf32, 1x11x768xui8) <- (1x11x768xf32, None, 1xf32) + dropout_72, dropout_73 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_95, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del full_3, matmul_95 + + # pd_op.add: (1x11x768xf32) <- (1x11x768xf32, 1x11x768xf32) + add_60 = paddle._C_ops.add(divide_24, dropout_72) + del divide_24, dropout_72 + + # pd_op.square: (1x11x768xf32) <- (1x11x768xf32) + square_24 = paddle._C_ops.square(add_60) + + # pd_op.mean: (1x11x1xf32) <- (1x11x768xf32, 1xi64) + mean_24 = paddle._C_ops.mean(square_24, full_int_array_1, True) + del full_int_array_1, square_24 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_37 = paddle._C_ops.scale(mean_24, full_2, float("1e-12"), True) + del full_2, mean_24 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_24 = paddle._C_ops.sqrt(scale_37) + del scale_37 + + # pd_op.divide: (1x11x768xf32) <- (1x11x768xf32, 1x11x1xf32) + divide_0 = paddle._C_ops.divide(add_60, sqrt_24) + del add_60, sqrt_24 + + return divide_0 diff --git a/paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/weight_meta.py b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/weight_meta.py new file mode 100644 index 000000000..b785f7160 --- /dev/null +++ b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_base/weight_meta.py @@ -0,0 +1,1076 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0395285") + max_val = float("0.0395284") + mean = float("1.13426e-05") + std = float("0.0228167") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0395284") + max_val = float("0.0395285") + mean = float("-5.25268e-06") + std = float("0.0228268") + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624997") + max_val = float("0.0624999") + mean = float("-5.39878e-05") + std = float("0.0360806") + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624999") + max_val = float("0.0624999") + mean = float("-1.09891e-05") + std = float("0.0360856") + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624999") + max_val = float("0.0625") + mean = float("-1.2814e-05") + std = float("0.036088") + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624999") + max_val = float("0.0624999") + mean = float("7.22076e-05") + std = float("0.0361164") + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0395284") + max_val = float("0.0395285") + mean = float("9.72124e-06") + std = float("0.0228176") + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0395284") + max_val = float("0.0395284") + mean = float("6.65912e-06") + std = float("0.0228225") + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624998") + max_val = float("0.0624999") + mean = float("-4.45009e-05") + std = float("0.0360536") + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0625") + max_val = float("0.0624998") + mean = float("3.77434e-05") + std = float("0.0360737") + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624995") + max_val = float("0.0624999") + mean = float("-1.88929e-05") + std = float("0.0360864") + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624994") + max_val = float("0.0624998") + mean = float("-6.78034e-05") + std = float("0.036086") + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0395284") + max_val = float("0.0395284") + mean = float("5.02629e-06") + std = float("0.0228184") + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0395285") + max_val = float("0.0395285") + mean = float("-1.67528e-06") + std = float("0.0228274") + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0625") + max_val = float("0.0624999") + mean = float("-2.54649e-05") + std = float("0.0360794") + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624998") + max_val = float("0.0625") + mean = float("-3.85251e-05") + std = float("0.0361117") + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0625") + max_val = float("0.0624999") + mean = float("3.43287e-05") + std = float("0.0360758") + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624999") + max_val = float("0.0624998") + mean = float("-2.73458e-05") + std = float("0.0360868") + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0395285") + max_val = float("0.0395285") + mean = float("3.55745e-05") + std = float("0.0228226") + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0395284") + max_val = float("0.0395285") + mean = float("3.84547e-06") + std = float("0.0228181") + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624997") + max_val = float("0.0624997") + mean = float("1.80691e-05") + std = float("0.0360809") + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624998") + max_val = float("0.0624994") + mean = float("-5.31637e-06") + std = float("0.0360749") + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624999") + max_val = float("0.0625") + mean = float("-3.3499e-05") + std = float("0.0360776") + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624999") + max_val = float("0.0624998") + mean = float("-3.11013e-05") + std = float("0.0361021") + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0395284") + max_val = float("0.0395284") + mean = float("2.1219e-05") + std = float("0.0228199") + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0395284") + max_val = float("0.0395284") + mean = float("2.9588e-05") + std = float("0.0228215") + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0625") + max_val = float("0.0624998") + mean = float("-6.77279e-05") + std = float("0.036045") + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624999") + max_val = float("0.0624994") + mean = float("2.88554e-05") + std = float("0.0361064") + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624992") + max_val = float("0.0624999") + mean = float("-2.08459e-05") + std = float("0.0360712") + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624998") + max_val = float("0.0624997") + mean = float("-0.000117026") + std = float("0.0361156") + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0395285") + max_val = float("0.0395285") + mean = float("-1.03245e-05") + std = float("0.0228246") + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0395285") + max_val = float("0.0395284") + mean = float("3.3744e-05") + std = float("0.0228196") + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624999") + max_val = float("0.0624999") + mean = float("-5.90678e-06") + std = float("0.0361063") + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624999") + max_val = float("0.0624999") + mean = float("7.06287e-06") + std = float("0.036098") + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624995") + max_val = float("0.0625") + mean = float("4.46207e-05") + std = float("0.036074") + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624997") + max_val = float("0.0624996") + mean = float("5.28125e-05") + std = float("0.0360855") + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0395284") + max_val = float("0.0395285") + mean = float("-7.43705e-06") + std = float("0.0228173") + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0395284") + max_val = float("0.0395284") + mean = float("-4.60809e-06") + std = float("0.0228351") + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624994") + max_val = float("0.0625") + mean = float("9.21371e-06") + std = float("0.0360874") + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624998") + max_val = float("0.0624995") + mean = float("7.19146e-05") + std = float("0.0361123") + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624996") + max_val = float("0.0624998") + mean = float("7.27561e-05") + std = float("0.0360691") + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0625") + max_val = float("0.0625") + mean = float("-7.87738e-05") + std = float("0.0361093") + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0395284") + max_val = float("0.0395285") + mean = float("2.49948e-05") + std = float("0.0228228") + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0395285") + max_val = float("0.0395285") + mean = float("-1.23643e-05") + std = float("0.0228186") + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0625") + max_val = float("0.0624999") + mean = float("3.26686e-05") + std = float("0.0360618") + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624999") + max_val = float("0.0624997") + mean = float("-5.25524e-05") + std = float("0.0360597") + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624998") + max_val = float("0.0624994") + mean = float("-1.12868e-05") + std = float("0.0361122") + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624999") + max_val = float("0.0624998") + mean = float("5.7049e-05") + std = float("0.0360835") + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0395285") + max_val = float("0.0395284") + mean = float("1.21428e-06") + std = float("0.0228164") + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0395284") + max_val = float("0.0395285") + mean = float("-1.55906e-05") + std = float("0.0228161") + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624999") + max_val = float("0.0624999") + mean = float("-2.01802e-05") + std = float("0.0360989") + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624989") + max_val = float("0.0624993") + mean = float("-6.85602e-06") + std = float("0.0361141") + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624995") + max_val = float("0.0624998") + mean = float("-6.96434e-05") + std = float("0.0360804") + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0625") + max_val = float("0.0624998") + mean = float("4.9293e-05") + std = float("0.0361318") + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0395285") + max_val = float("0.0395285") + mean = float("-4.79489e-06") + std = float("0.0228224") + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0395285") + max_val = float("0.0395285") + mean = float("1.27755e-05") + std = float("0.0228229") + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624999") + max_val = float("0.0624997") + mean = float("0.000119071") + std = float("0.0360898") + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624996") + max_val = float("0.0624997") + mean = float("-5.06956e-05") + std = float("0.0360536") + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0625") + max_val = float("0.0624999") + mean = float("3.90921e-06") + std = float("0.0361107") + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624999") + max_val = float("0.0624998") + mean = float("-6.17378e-05") + std = float("0.0360687") + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0395285") + max_val = float("0.0395284") + mean = float("9.40166e-06") + std = float("0.0228278") + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0395285") + max_val = float("0.0395285") + mean = float("1.03596e-05") + std = float("0.0228248") + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624999") + max_val = float("0.0624997") + mean = float("-3.66526e-05") + std = float("0.0360391") + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624997") + max_val = float("0.0624996") + mean = float("-3.80437e-05") + std = float("0.0360945") + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0625") + max_val = float("0.0624998") + mean = float("-2.27478e-06") + std = float("0.0360823") + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0625") + max_val = float("0.0624997") + mean = float("5.51583e-05") + std = float("0.0360848") + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [3072, 768] + dtype = "float32" + min_val = float("-0.0395284") + max_val = float("0.0395284") + mean = float("-1.09022e-05") + std = float("0.0228238") + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [768, 3072] + dtype = "float32" + min_val = float("-0.0395285") + max_val = float("0.0395285") + mean = float("-1.22675e-05") + std = float("0.0228108") + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624999") + max_val = float("0.0625") + mean = float("-7.75606e-05") + std = float("0.036113") + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624993") + max_val = float("0.0624998") + mean = float("-3.3427e-05") + std = float("0.0360793") + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624999") + max_val = float("0.0624999") + mean = float("1.87338e-05") + std = float("0.036101") + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [768, 768] + dtype = "float32" + min_val = float("-0.0624997") + max_val = float("0.0624998") + mean = float("1.06708e-05") + std = float("0.0360391") + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [2, 768] + dtype = "float32" + min_val = float("-0.0881995") + max_val = float("0.0881802") + mean = float("0.000648857") + std = float("0.0506212") + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [12000, 768] + dtype = "float32" + min_val = float("-0.0216777") + max_val = float("0.0216777") + mean = float("7.39003e-06") + std = float("0.0125124") + data = None diff --git a/paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/graph_hash.txt b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/graph_hash.txt new file mode 100644 index 000000000..c1290e57b --- /dev/null +++ b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/graph_hash.txt @@ -0,0 +1 @@ +21538a6607cebd864efa7f783af24eee36f7da4dd0fa85942c9e481cb270c743 \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/graph_net.json b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/graph_net.json new file mode 100644 index 000000000..815456bac --- /dev/null +++ b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "roformer_v2_chinese_char_large", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/input_meta.py b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/input_meta.py new file mode 100644 index 000000000..b6791c33c --- /dev/null +++ b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/input_meta.py @@ -0,0 +1,12 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 11] + dtype = "int64" + data = [101, 3223, 6500, 421, 4179, 4331, 2008, 7263, 3055, 106, 102] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 11] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/model.py b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/model.py new file mode 100644 index 000000000..218656aa3 --- /dev/null +++ b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/model.py @@ -0,0 +1,6436 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + parameter_50, + parameter_51, + parameter_52, + parameter_53, + parameter_54, + parameter_55, + parameter_56, + parameter_57, + parameter_58, + parameter_59, + parameter_60, + parameter_61, + parameter_62, + parameter_63, + parameter_64, + parameter_65, + parameter_66, + parameter_67, + parameter_68, + parameter_69, + parameter_70, + parameter_71, + parameter_72, + parameter_73, + parameter_74, + parameter_75, + parameter_76, + parameter_77, + parameter_78, + parameter_79, + parameter_80, + parameter_81, + parameter_82, + parameter_83, + parameter_84, + parameter_85, + parameter_86, + parameter_87, + parameter_88, + parameter_89, + parameter_90, + parameter_91, + parameter_92, + parameter_93, + parameter_94, + parameter_95, + parameter_96, + parameter_97, + parameter_98, + parameter_99, + parameter_100, + parameter_101, + parameter_102, + parameter_103, + parameter_104, + parameter_105, + parameter_106, + parameter_107, + parameter_108, + parameter_109, + parameter_110, + parameter_111, + parameter_112, + parameter_113, + parameter_114, + parameter_115, + parameter_116, + parameter_117, + parameter_118, + parameter_119, + parameter_120, + parameter_121, + parameter_122, + parameter_123, + parameter_124, + parameter_125, + parameter_126, + parameter_127, + parameter_128, + parameter_129, + parameter_130, + parameter_131, + parameter_132, + parameter_133, + parameter_134, + parameter_135, + parameter_136, + parameter_137, + parameter_138, + parameter_139, + parameter_140, + parameter_141, + parameter_142, + parameter_143, + parameter_144, + parameter_145, + parameter_146, + parameter_147, + parameter_148, + parameter_149, + parameter_150, + parameter_151, + parameter_152, + parameter_153, + parameter_154, + parameter_155, + parameter_156, + parameter_157, + parameter_158, + parameter_159, + parameter_160, + parameter_161, + parameter_162, + parameter_163, + parameter_164, + parameter_165, + parameter_166, + parameter_167, + parameter_168, + parameter_169, + parameter_170, + parameter_171, + parameter_172, + parameter_173, + parameter_174, + parameter_175, + parameter_176, + parameter_177, + parameter_178, + parameter_179, + parameter_180, + parameter_181, + parameter_182, + parameter_183, + parameter_184, + parameter_185, + parameter_186, + parameter_187, + parameter_188, + parameter_189, + parameter_190, + parameter_191, + parameter_192, + parameter_193, + data_0, + data_1, + ): + # pd_op.full: (xi64) <- () + full_0 = paddle._C_ops.full( + [], float("0"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.equal: (1x11xb) <- (1x11xi64, xi64) + equal_0 = paddle._C_ops.equal(data_0, full_0) + del full_0 + + # pd_op.cast: (1x11xf32) <- (1x11xb) + cast_0 = paddle._C_ops.cast(equal_0, paddle.float32) + del equal_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x11xf32) <- (1x11xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_1, float("0"), True) + del cast_0, full_1 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_0 = [1, 2] + + # pd_op.unsqueeze: (1x1x1x11xf32) <- (1x11xf32, 2xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(scale_0, full_int_array_0) + del full_int_array_0, scale_0 + + # pd_op.embedding: (1x11x1024xf32) <- (1x11xi64, 12000x1024xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_193, -1, False) + del data_0, parameter_193 + + # pd_op.embedding: (1x11x1024xf32) <- (1x11xi64, 2x1024xf32) + embedding_1 = paddle._C_ops.embedding(data_1, parameter_192, -1, False) + del data_1, parameter_192 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_0 = paddle._C_ops.square(add_0) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [-1] + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_0 = paddle._C_ops.mean(square_0, full_int_array_1, True) + del square_0 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_1 = paddle._C_ops.scale(mean_0, full_2, float("1e-12"), True) + del mean_0 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_0 = paddle._C_ops.sqrt(scale_1) + del scale_1 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_1 = paddle._C_ops.divide(add_0, sqrt_0) + del add_0, sqrt_0 + + # pd_op.full: (1xf32) <- () + full_3 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + divide_1, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del divide_1 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_0 = paddle._C_ops.matmul(dropout_0, parameter_191, False, False) + del parameter_191 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_1 = paddle._C_ops.matmul(dropout_0, parameter_190, False, False) + del parameter_190 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_2 = paddle._C_ops.matmul(dropout_0, parameter_189, False, False) + del parameter_189 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_2 = [0, 0, 16, 64] + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(matmul_0, full_int_array_2) + del matmul_0 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(matmul_1, full_int_array_2) + del matmul_1 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(matmul_2, full_int_array_2) + del matmul_2 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_4 = [11] + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + parameter_47, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_47 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + parameter_46, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_46 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_5 = [2147483647] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_6 = [2] + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + transpose_0, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_7 = [1] + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + transpose_0, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_0 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_0 = paddle._C_ops.multiply(strided_slice_0, slice_1) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_1 = paddle._C_ops.multiply(strided_slice_1, slice_0) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_0 = paddle._C_ops.subtract(multiply_0, multiply_1) + del multiply_0, multiply_1 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_2 = paddle._C_ops.multiply(strided_slice_0, slice_0) + del strided_slice_0 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_3 = paddle._C_ops.multiply(strided_slice_1, slice_1) + del strided_slice_1 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_1 = paddle._C_ops.add(multiply_2, multiply_3) + del multiply_2, multiply_3 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_0 = [subtract_0, add_1] + del add_1, subtract_0 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_0 = paddle._C_ops.stack(combine_0, -1) + del combine_0 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_0 = paddle._C_ops.flatten(stack_0, 3, 4) + del stack_0 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + transpose_1, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + transpose_1, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_1 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_4 = paddle._C_ops.multiply(strided_slice_2, slice_1) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_5 = paddle._C_ops.multiply(strided_slice_3, slice_0) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_1 = paddle._C_ops.subtract(multiply_4, multiply_5) + del multiply_4, multiply_5 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_6 = paddle._C_ops.multiply(strided_slice_2, slice_0) + del slice_0, strided_slice_2 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_7 = paddle._C_ops.multiply(strided_slice_3, slice_1) + del slice_1, strided_slice_3 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_2 = paddle._C_ops.add(multiply_6, multiply_7) + del multiply_6, multiply_7 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_1 = [subtract_1, add_2] + del add_2, subtract_1 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_1 = paddle._C_ops.stack(combine_1, -1) + del combine_1 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_1 = paddle._C_ops.flatten(stack_1, 3, 4) + del stack_1 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_3 = paddle._C_ops.matmul(flatten_0, flatten_1, False, True) + del flatten_0, flatten_1 + + # pd_op.full: (1xf32) <- () + full_4 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_2 = paddle._C_ops.scale(matmul_3, full_4, float("0"), True) + del matmul_3 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_3 = paddle._C_ops.add(scale_2, unsqueeze_0) + del scale_2 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_0 = paddle._C_ops.softmax(add_3, -1) + del add_3 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_4 = paddle._C_ops.matmul(dropout_2, transpose_2, False, False) + del dropout_2, transpose_2 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_3 = paddle._C_ops.transpose(matmul_4, [0, 2, 1, 3]) + del matmul_4 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_8 = [0, 0, 1024] + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_8) + del transpose_3 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_5 = paddle._C_ops.matmul(reshape_3, parameter_188, False, False) + del parameter_188, reshape_3 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_5, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_5 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_4 = paddle._C_ops.add(dropout_0, dropout_4) + del dropout_0, dropout_4 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_1 = paddle._C_ops.square(add_4) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_1 = paddle._C_ops.mean(square_1, full_int_array_1, True) + del square_1 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_3 = paddle._C_ops.scale(mean_1, full_2, float("1e-12"), True) + del mean_1 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_1 = paddle._C_ops.sqrt(scale_3) + del scale_3 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_2 = paddle._C_ops.divide(add_4, sqrt_1) + del add_4, sqrt_1 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_6 = paddle._C_ops.matmul(divide_2, parameter_187, False, False) + del parameter_187 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_0 = paddle._C_ops.relu(matmul_6) + del matmul_6 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_7 = paddle._C_ops.matmul(relu_0, parameter_186, False, False) + del parameter_186, relu_0 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_7, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_7 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_5 = paddle._C_ops.add(divide_2, dropout_6) + del divide_2, dropout_6 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_2 = paddle._C_ops.square(add_5) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_2 = paddle._C_ops.mean(square_2, full_int_array_1, True) + del square_2 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_4 = paddle._C_ops.scale(mean_2, full_2, float("1e-12"), True) + del mean_2 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_2 = paddle._C_ops.sqrt(scale_4) + del scale_4 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_3 = paddle._C_ops.divide(add_5, sqrt_2) + del add_5, sqrt_2 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_8 = paddle._C_ops.matmul(divide_3, parameter_185, False, False) + del parameter_185 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_9 = paddle._C_ops.matmul(divide_3, parameter_184, False, False) + del parameter_184 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_10 = paddle._C_ops.matmul(divide_3, parameter_183, False, False) + del parameter_183 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(matmul_8, full_int_array_2) + del matmul_8 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(matmul_9, full_int_array_2) + del matmul_9 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(matmul_10, full_int_array_2) + del matmul_10 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + parameter_45, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_45 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + parameter_44, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_44 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + transpose_4, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + transpose_4, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_4 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_8 = paddle._C_ops.multiply(strided_slice_4, slice_3) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_9 = paddle._C_ops.multiply(strided_slice_5, slice_2) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_2 = paddle._C_ops.subtract(multiply_8, multiply_9) + del multiply_8, multiply_9 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_10 = paddle._C_ops.multiply(strided_slice_4, slice_2) + del strided_slice_4 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_11 = paddle._C_ops.multiply(strided_slice_5, slice_3) + del strided_slice_5 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_6 = paddle._C_ops.add(multiply_10, multiply_11) + del multiply_10, multiply_11 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_2 = [subtract_2, add_6] + del add_6, subtract_2 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_2 = paddle._C_ops.stack(combine_2, -1) + del combine_2 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_2 = paddle._C_ops.flatten(stack_2, 3, 4) + del stack_2 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + transpose_5, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + transpose_5, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_5 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_12 = paddle._C_ops.multiply(strided_slice_6, slice_3) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_13 = paddle._C_ops.multiply(strided_slice_7, slice_2) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_3 = paddle._C_ops.subtract(multiply_12, multiply_13) + del multiply_12, multiply_13 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_14 = paddle._C_ops.multiply(strided_slice_6, slice_2) + del slice_2, strided_slice_6 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_15 = paddle._C_ops.multiply(strided_slice_7, slice_3) + del slice_3, strided_slice_7 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_7 = paddle._C_ops.add(multiply_14, multiply_15) + del multiply_14, multiply_15 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_3 = [subtract_3, add_7] + del add_7, subtract_3 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_3 = paddle._C_ops.stack(combine_3, -1) + del combine_3 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_3 = paddle._C_ops.flatten(stack_3, 3, 4) + del stack_3 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_11 = paddle._C_ops.matmul(flatten_2, flatten_3, False, True) + del flatten_2, flatten_3 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_5 = paddle._C_ops.scale(matmul_11, full_4, float("0"), True) + del matmul_11 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_8 = paddle._C_ops.add(scale_5, unsqueeze_0) + del scale_5 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_1 = paddle._C_ops.softmax(add_8, -1) + del add_8 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_12 = paddle._C_ops.matmul(dropout_8, transpose_6, False, False) + del dropout_8, transpose_6 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_7 = paddle._C_ops.transpose(matmul_12, [0, 2, 1, 3]) + del matmul_12 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_8) + del transpose_7 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_13 = paddle._C_ops.matmul(reshape_7, parameter_182, False, False) + del parameter_182, reshape_7 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_13, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_13 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_9 = paddle._C_ops.add(divide_3, dropout_10) + del divide_3, dropout_10 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_3 = paddle._C_ops.square(add_9) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_3 = paddle._C_ops.mean(square_3, full_int_array_1, True) + del square_3 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_6 = paddle._C_ops.scale(mean_3, full_2, float("1e-12"), True) + del mean_3 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_3 = paddle._C_ops.sqrt(scale_6) + del scale_6 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_4 = paddle._C_ops.divide(add_9, sqrt_3) + del add_9, sqrt_3 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_14 = paddle._C_ops.matmul(divide_4, parameter_181, False, False) + del parameter_181 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_1 = paddle._C_ops.relu(matmul_14) + del matmul_14 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_15 = paddle._C_ops.matmul(relu_1, parameter_180, False, False) + del parameter_180, relu_1 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_15, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_15 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_10 = paddle._C_ops.add(divide_4, dropout_12) + del divide_4, dropout_12 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_4 = paddle._C_ops.square(add_10) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_4 = paddle._C_ops.mean(square_4, full_int_array_1, True) + del square_4 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_7 = paddle._C_ops.scale(mean_4, full_2, float("1e-12"), True) + del mean_4 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_4 = paddle._C_ops.sqrt(scale_7) + del scale_7 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_5 = paddle._C_ops.divide(add_10, sqrt_4) + del add_10, sqrt_4 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_16 = paddle._C_ops.matmul(divide_5, parameter_179, False, False) + del parameter_179 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_17 = paddle._C_ops.matmul(divide_5, parameter_178, False, False) + del parameter_178 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_18 = paddle._C_ops.matmul(divide_5, parameter_177, False, False) + del parameter_177 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(matmul_16, full_int_array_2) + del matmul_16 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(matmul_17, full_int_array_2) + del matmul_17 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(matmul_18, full_int_array_2) + del matmul_18 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + parameter_43, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_43 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + parameter_42, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_42 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + transpose_8, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + transpose_8, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_8 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_16 = paddle._C_ops.multiply(strided_slice_8, slice_5) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_17 = paddle._C_ops.multiply(strided_slice_9, slice_4) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_4 = paddle._C_ops.subtract(multiply_16, multiply_17) + del multiply_16, multiply_17 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_18 = paddle._C_ops.multiply(strided_slice_8, slice_4) + del strided_slice_8 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_19 = paddle._C_ops.multiply(strided_slice_9, slice_5) + del strided_slice_9 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_11 = paddle._C_ops.add(multiply_18, multiply_19) + del multiply_18, multiply_19 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_4 = [subtract_4, add_11] + del add_11, subtract_4 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_4 = paddle._C_ops.stack(combine_4, -1) + del combine_4 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_4 = paddle._C_ops.flatten(stack_4, 3, 4) + del stack_4 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + transpose_9, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + transpose_9, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_9 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_20 = paddle._C_ops.multiply(strided_slice_10, slice_5) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_21 = paddle._C_ops.multiply(strided_slice_11, slice_4) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_5 = paddle._C_ops.subtract(multiply_20, multiply_21) + del multiply_20, multiply_21 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_22 = paddle._C_ops.multiply(strided_slice_10, slice_4) + del slice_4, strided_slice_10 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_23 = paddle._C_ops.multiply(strided_slice_11, slice_5) + del slice_5, strided_slice_11 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_12 = paddle._C_ops.add(multiply_22, multiply_23) + del multiply_22, multiply_23 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_5 = [subtract_5, add_12] + del add_12, subtract_5 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_5 = paddle._C_ops.stack(combine_5, -1) + del combine_5 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_5 = paddle._C_ops.flatten(stack_5, 3, 4) + del stack_5 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_19 = paddle._C_ops.matmul(flatten_4, flatten_5, False, True) + del flatten_4, flatten_5 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_8 = paddle._C_ops.scale(matmul_19, full_4, float("0"), True) + del matmul_19 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_13 = paddle._C_ops.add(scale_8, unsqueeze_0) + del scale_8 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_2 = paddle._C_ops.softmax(add_13, -1) + del add_13 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_20 = paddle._C_ops.matmul(dropout_14, transpose_10, False, False) + del dropout_14, transpose_10 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_11 = paddle._C_ops.transpose(matmul_20, [0, 2, 1, 3]) + del matmul_20 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_8) + del transpose_11 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_21 = paddle._C_ops.matmul(reshape_11, parameter_176, False, False) + del parameter_176, reshape_11 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_21, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_21 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_14 = paddle._C_ops.add(divide_5, dropout_16) + del divide_5, dropout_16 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_5 = paddle._C_ops.square(add_14) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_5 = paddle._C_ops.mean(square_5, full_int_array_1, True) + del square_5 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_9 = paddle._C_ops.scale(mean_5, full_2, float("1e-12"), True) + del mean_5 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_5 = paddle._C_ops.sqrt(scale_9) + del scale_9 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_6 = paddle._C_ops.divide(add_14, sqrt_5) + del add_14, sqrt_5 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_22 = paddle._C_ops.matmul(divide_6, parameter_175, False, False) + del parameter_175 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_2 = paddle._C_ops.relu(matmul_22) + del matmul_22 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_23 = paddle._C_ops.matmul(relu_2, parameter_174, False, False) + del parameter_174, relu_2 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_23, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_23 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_15 = paddle._C_ops.add(divide_6, dropout_18) + del divide_6, dropout_18 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_6 = paddle._C_ops.square(add_15) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_6 = paddle._C_ops.mean(square_6, full_int_array_1, True) + del square_6 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_10 = paddle._C_ops.scale(mean_6, full_2, float("1e-12"), True) + del mean_6 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_6 = paddle._C_ops.sqrt(scale_10) + del scale_10 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_7 = paddle._C_ops.divide(add_15, sqrt_6) + del add_15, sqrt_6 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_24 = paddle._C_ops.matmul(divide_7, parameter_173, False, False) + del parameter_173 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_25 = paddle._C_ops.matmul(divide_7, parameter_172, False, False) + del parameter_172 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_26 = paddle._C_ops.matmul(divide_7, parameter_171, False, False) + del parameter_171 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(matmul_24, full_int_array_2) + del matmul_24 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(matmul_25, full_int_array_2) + del matmul_25 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(matmul_26, full_int_array_2) + del matmul_26 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + parameter_41, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_41 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + parameter_40, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_40 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_12 = paddle._C_ops.strided_slice( + transpose_12, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_13 = paddle._C_ops.strided_slice( + transpose_12, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_12 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_24 = paddle._C_ops.multiply(strided_slice_12, slice_7) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_25 = paddle._C_ops.multiply(strided_slice_13, slice_6) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_6 = paddle._C_ops.subtract(multiply_24, multiply_25) + del multiply_24, multiply_25 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_26 = paddle._C_ops.multiply(strided_slice_12, slice_6) + del strided_slice_12 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_27 = paddle._C_ops.multiply(strided_slice_13, slice_7) + del strided_slice_13 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_16 = paddle._C_ops.add(multiply_26, multiply_27) + del multiply_26, multiply_27 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_6 = [subtract_6, add_16] + del add_16, subtract_6 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_6 = paddle._C_ops.stack(combine_6, -1) + del combine_6 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_6 = paddle._C_ops.flatten(stack_6, 3, 4) + del stack_6 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_14 = paddle._C_ops.strided_slice( + transpose_13, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_15 = paddle._C_ops.strided_slice( + transpose_13, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_13 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_28 = paddle._C_ops.multiply(strided_slice_14, slice_7) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_29 = paddle._C_ops.multiply(strided_slice_15, slice_6) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_7 = paddle._C_ops.subtract(multiply_28, multiply_29) + del multiply_28, multiply_29 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_30 = paddle._C_ops.multiply(strided_slice_14, slice_6) + del slice_6, strided_slice_14 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_31 = paddle._C_ops.multiply(strided_slice_15, slice_7) + del slice_7, strided_slice_15 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_17 = paddle._C_ops.add(multiply_30, multiply_31) + del multiply_30, multiply_31 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_7 = [subtract_7, add_17] + del add_17, subtract_7 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_7 = paddle._C_ops.stack(combine_7, -1) + del combine_7 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_7 = paddle._C_ops.flatten(stack_7, 3, 4) + del stack_7 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_27 = paddle._C_ops.matmul(flatten_6, flatten_7, False, True) + del flatten_6, flatten_7 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_11 = paddle._C_ops.scale(matmul_27, full_4, float("0"), True) + del matmul_27 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_18 = paddle._C_ops.add(scale_11, unsqueeze_0) + del scale_11 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_3 = paddle._C_ops.softmax(add_18, -1) + del add_18 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_28 = paddle._C_ops.matmul(dropout_20, transpose_14, False, False) + del dropout_20, transpose_14 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_15 = paddle._C_ops.transpose(matmul_28, [0, 2, 1, 3]) + del matmul_28 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_8) + del transpose_15 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_29 = paddle._C_ops.matmul(reshape_15, parameter_170, False, False) + del parameter_170, reshape_15 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_29, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_29 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_19 = paddle._C_ops.add(divide_7, dropout_22) + del divide_7, dropout_22 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_7 = paddle._C_ops.square(add_19) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_7 = paddle._C_ops.mean(square_7, full_int_array_1, True) + del square_7 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_12 = paddle._C_ops.scale(mean_7, full_2, float("1e-12"), True) + del mean_7 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_7 = paddle._C_ops.sqrt(scale_12) + del scale_12 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_8 = paddle._C_ops.divide(add_19, sqrt_7) + del add_19, sqrt_7 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_30 = paddle._C_ops.matmul(divide_8, parameter_169, False, False) + del parameter_169 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_3 = paddle._C_ops.relu(matmul_30) + del matmul_30 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_31 = paddle._C_ops.matmul(relu_3, parameter_168, False, False) + del parameter_168, relu_3 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_31, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_31 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_20 = paddle._C_ops.add(divide_8, dropout_24) + del divide_8, dropout_24 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_8 = paddle._C_ops.square(add_20) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_8 = paddle._C_ops.mean(square_8, full_int_array_1, True) + del square_8 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_13 = paddle._C_ops.scale(mean_8, full_2, float("1e-12"), True) + del mean_8 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_8 = paddle._C_ops.sqrt(scale_13) + del scale_13 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_9 = paddle._C_ops.divide(add_20, sqrt_8) + del add_20, sqrt_8 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_32 = paddle._C_ops.matmul(divide_9, parameter_167, False, False) + del parameter_167 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_33 = paddle._C_ops.matmul(divide_9, parameter_166, False, False) + del parameter_166 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_34 = paddle._C_ops.matmul(divide_9, parameter_165, False, False) + del parameter_165 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(matmul_32, full_int_array_2) + del matmul_32 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_16 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(matmul_33, full_int_array_2) + del matmul_33 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(matmul_34, full_int_array_2) + del matmul_34 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + parameter_39, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_39 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + parameter_38, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_38 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_16 = paddle._C_ops.strided_slice( + transpose_16, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_17 = paddle._C_ops.strided_slice( + transpose_16, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_16 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_32 = paddle._C_ops.multiply(strided_slice_16, slice_9) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_33 = paddle._C_ops.multiply(strided_slice_17, slice_8) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_8 = paddle._C_ops.subtract(multiply_32, multiply_33) + del multiply_32, multiply_33 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_34 = paddle._C_ops.multiply(strided_slice_16, slice_8) + del strided_slice_16 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_35 = paddle._C_ops.multiply(strided_slice_17, slice_9) + del strided_slice_17 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_21 = paddle._C_ops.add(multiply_34, multiply_35) + del multiply_34, multiply_35 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_8 = [subtract_8, add_21] + del add_21, subtract_8 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_8 = paddle._C_ops.stack(combine_8, -1) + del combine_8 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_8 = paddle._C_ops.flatten(stack_8, 3, 4) + del stack_8 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_18 = paddle._C_ops.strided_slice( + transpose_17, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_19 = paddle._C_ops.strided_slice( + transpose_17, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_17 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_36 = paddle._C_ops.multiply(strided_slice_18, slice_9) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_37 = paddle._C_ops.multiply(strided_slice_19, slice_8) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_9 = paddle._C_ops.subtract(multiply_36, multiply_37) + del multiply_36, multiply_37 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_38 = paddle._C_ops.multiply(strided_slice_18, slice_8) + del slice_8, strided_slice_18 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_39 = paddle._C_ops.multiply(strided_slice_19, slice_9) + del slice_9, strided_slice_19 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_22 = paddle._C_ops.add(multiply_38, multiply_39) + del multiply_38, multiply_39 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_9 = [subtract_9, add_22] + del add_22, subtract_9 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_9 = paddle._C_ops.stack(combine_9, -1) + del combine_9 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_9 = paddle._C_ops.flatten(stack_9, 3, 4) + del stack_9 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_35 = paddle._C_ops.matmul(flatten_8, flatten_9, False, True) + del flatten_8, flatten_9 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_14 = paddle._C_ops.scale(matmul_35, full_4, float("0"), True) + del matmul_35 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_23 = paddle._C_ops.add(scale_14, unsqueeze_0) + del scale_14 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_4 = paddle._C_ops.softmax(add_23, -1) + del add_23 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_36 = paddle._C_ops.matmul(dropout_26, transpose_18, False, False) + del dropout_26, transpose_18 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_19 = paddle._C_ops.transpose(matmul_36, [0, 2, 1, 3]) + del matmul_36 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_19, full_int_array_8) + del transpose_19 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_37 = paddle._C_ops.matmul(reshape_19, parameter_164, False, False) + del parameter_164, reshape_19 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_37, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_37 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_24 = paddle._C_ops.add(divide_9, dropout_28) + del divide_9, dropout_28 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_9 = paddle._C_ops.square(add_24) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_9 = paddle._C_ops.mean(square_9, full_int_array_1, True) + del square_9 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_15 = paddle._C_ops.scale(mean_9, full_2, float("1e-12"), True) + del mean_9 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_9 = paddle._C_ops.sqrt(scale_15) + del scale_15 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_10 = paddle._C_ops.divide(add_24, sqrt_9) + del add_24, sqrt_9 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_38 = paddle._C_ops.matmul(divide_10, parameter_163, False, False) + del parameter_163 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_4 = paddle._C_ops.relu(matmul_38) + del matmul_38 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_39 = paddle._C_ops.matmul(relu_4, parameter_162, False, False) + del parameter_162, relu_4 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_39, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_39 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_25 = paddle._C_ops.add(divide_10, dropout_30) + del divide_10, dropout_30 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_10 = paddle._C_ops.square(add_25) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_10 = paddle._C_ops.mean(square_10, full_int_array_1, True) + del square_10 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_16 = paddle._C_ops.scale(mean_10, full_2, float("1e-12"), True) + del mean_10 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_10 = paddle._C_ops.sqrt(scale_16) + del scale_16 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_11 = paddle._C_ops.divide(add_25, sqrt_10) + del add_25, sqrt_10 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_40 = paddle._C_ops.matmul(divide_11, parameter_161, False, False) + del parameter_161 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_41 = paddle._C_ops.matmul(divide_11, parameter_160, False, False) + del parameter_160 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_42 = paddle._C_ops.matmul(divide_11, parameter_159, False, False) + del parameter_159 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(matmul_40, full_int_array_2) + del matmul_40 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_20 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(matmul_41, full_int_array_2) + del matmul_41 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(matmul_42, full_int_array_2) + del matmul_42 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + parameter_37, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_37 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + parameter_36, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_36 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_20 = paddle._C_ops.strided_slice( + transpose_20, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_21 = paddle._C_ops.strided_slice( + transpose_20, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_20 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_40 = paddle._C_ops.multiply(strided_slice_20, slice_11) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_41 = paddle._C_ops.multiply(strided_slice_21, slice_10) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_10 = paddle._C_ops.subtract(multiply_40, multiply_41) + del multiply_40, multiply_41 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_42 = paddle._C_ops.multiply(strided_slice_20, slice_10) + del strided_slice_20 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_43 = paddle._C_ops.multiply(strided_slice_21, slice_11) + del strided_slice_21 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_26 = paddle._C_ops.add(multiply_42, multiply_43) + del multiply_42, multiply_43 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_10 = [subtract_10, add_26] + del add_26, subtract_10 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_10 = paddle._C_ops.stack(combine_10, -1) + del combine_10 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_10 = paddle._C_ops.flatten(stack_10, 3, 4) + del stack_10 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_22 = paddle._C_ops.strided_slice( + transpose_21, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_23 = paddle._C_ops.strided_slice( + transpose_21, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_21 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_44 = paddle._C_ops.multiply(strided_slice_22, slice_11) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_45 = paddle._C_ops.multiply(strided_slice_23, slice_10) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_11 = paddle._C_ops.subtract(multiply_44, multiply_45) + del multiply_44, multiply_45 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_46 = paddle._C_ops.multiply(strided_slice_22, slice_10) + del slice_10, strided_slice_22 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_47 = paddle._C_ops.multiply(strided_slice_23, slice_11) + del slice_11, strided_slice_23 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_27 = paddle._C_ops.add(multiply_46, multiply_47) + del multiply_46, multiply_47 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_11 = [subtract_11, add_27] + del add_27, subtract_11 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_11 = paddle._C_ops.stack(combine_11, -1) + del combine_11 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_11 = paddle._C_ops.flatten(stack_11, 3, 4) + del stack_11 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_43 = paddle._C_ops.matmul(flatten_10, flatten_11, False, True) + del flatten_10, flatten_11 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_17 = paddle._C_ops.scale(matmul_43, full_4, float("0"), True) + del matmul_43 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_28 = paddle._C_ops.add(scale_17, unsqueeze_0) + del scale_17 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_5 = paddle._C_ops.softmax(add_28, -1) + del add_28 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_44 = paddle._C_ops.matmul(dropout_32, transpose_22, False, False) + del dropout_32, transpose_22 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_23 = paddle._C_ops.transpose(matmul_44, [0, 2, 1, 3]) + del matmul_44 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_23, full_int_array_8) + del transpose_23 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_45 = paddle._C_ops.matmul(reshape_23, parameter_158, False, False) + del parameter_158, reshape_23 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_45, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_45 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_29 = paddle._C_ops.add(divide_11, dropout_34) + del divide_11, dropout_34 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_11 = paddle._C_ops.square(add_29) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_11 = paddle._C_ops.mean(square_11, full_int_array_1, True) + del square_11 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_18 = paddle._C_ops.scale(mean_11, full_2, float("1e-12"), True) + del mean_11 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_11 = paddle._C_ops.sqrt(scale_18) + del scale_18 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_12 = paddle._C_ops.divide(add_29, sqrt_11) + del add_29, sqrt_11 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_46 = paddle._C_ops.matmul(divide_12, parameter_157, False, False) + del parameter_157 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_5 = paddle._C_ops.relu(matmul_46) + del matmul_46 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_47 = paddle._C_ops.matmul(relu_5, parameter_156, False, False) + del parameter_156, relu_5 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_47, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_47 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_30 = paddle._C_ops.add(divide_12, dropout_36) + del divide_12, dropout_36 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_12 = paddle._C_ops.square(add_30) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_12 = paddle._C_ops.mean(square_12, full_int_array_1, True) + del square_12 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_19 = paddle._C_ops.scale(mean_12, full_2, float("1e-12"), True) + del mean_12 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_12 = paddle._C_ops.sqrt(scale_19) + del scale_19 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_13 = paddle._C_ops.divide(add_30, sqrt_12) + del add_30, sqrt_12 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_48 = paddle._C_ops.matmul(divide_13, parameter_155, False, False) + del parameter_155 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_49 = paddle._C_ops.matmul(divide_13, parameter_154, False, False) + del parameter_154 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_50 = paddle._C_ops.matmul(divide_13, parameter_153, False, False) + del parameter_153 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_24 = paddle._C_ops.reshape(matmul_48, full_int_array_2) + del matmul_48 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_24 = paddle._C_ops.transpose(reshape_24, [0, 2, 1, 3]) + del reshape_24 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_25 = paddle._C_ops.reshape(matmul_49, full_int_array_2) + del matmul_49 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_25 = paddle._C_ops.transpose(reshape_25, [0, 2, 1, 3]) + del reshape_25 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_26 = paddle._C_ops.reshape(matmul_50, full_int_array_2) + del matmul_50 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_26 = paddle._C_ops.transpose(reshape_26, [0, 2, 1, 3]) + del reshape_26 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_12 = paddle._C_ops.slice( + parameter_35, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_35 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_13 = paddle._C_ops.slice( + parameter_34, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_34 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_24 = paddle._C_ops.strided_slice( + transpose_24, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_25 = paddle._C_ops.strided_slice( + transpose_24, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_24 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_48 = paddle._C_ops.multiply(strided_slice_24, slice_13) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_49 = paddle._C_ops.multiply(strided_slice_25, slice_12) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_12 = paddle._C_ops.subtract(multiply_48, multiply_49) + del multiply_48, multiply_49 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_50 = paddle._C_ops.multiply(strided_slice_24, slice_12) + del strided_slice_24 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_51 = paddle._C_ops.multiply(strided_slice_25, slice_13) + del strided_slice_25 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_31 = paddle._C_ops.add(multiply_50, multiply_51) + del multiply_50, multiply_51 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_12 = [subtract_12, add_31] + del add_31, subtract_12 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_12 = paddle._C_ops.stack(combine_12, -1) + del combine_12 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_12 = paddle._C_ops.flatten(stack_12, 3, 4) + del stack_12 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_26 = paddle._C_ops.strided_slice( + transpose_25, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_27 = paddle._C_ops.strided_slice( + transpose_25, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_25 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_52 = paddle._C_ops.multiply(strided_slice_26, slice_13) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_53 = paddle._C_ops.multiply(strided_slice_27, slice_12) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_13 = paddle._C_ops.subtract(multiply_52, multiply_53) + del multiply_52, multiply_53 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_54 = paddle._C_ops.multiply(strided_slice_26, slice_12) + del slice_12, strided_slice_26 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_55 = paddle._C_ops.multiply(strided_slice_27, slice_13) + del slice_13, strided_slice_27 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_32 = paddle._C_ops.add(multiply_54, multiply_55) + del multiply_54, multiply_55 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_13 = [subtract_13, add_32] + del add_32, subtract_13 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_13 = paddle._C_ops.stack(combine_13, -1) + del combine_13 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_13 = paddle._C_ops.flatten(stack_13, 3, 4) + del stack_13 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_51 = paddle._C_ops.matmul(flatten_12, flatten_13, False, True) + del flatten_12, flatten_13 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_20 = paddle._C_ops.scale(matmul_51, full_4, float("0"), True) + del matmul_51 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_33 = paddle._C_ops.add(scale_20, unsqueeze_0) + del scale_20 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_6 = paddle._C_ops.softmax(add_33, -1) + del add_33 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_38, dropout_39 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_6, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_6 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_52 = paddle._C_ops.matmul(dropout_38, transpose_26, False, False) + del dropout_38, transpose_26 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_27 = paddle._C_ops.transpose(matmul_52, [0, 2, 1, 3]) + del matmul_52 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_27 = paddle._C_ops.reshape(transpose_27, full_int_array_8) + del transpose_27 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_53 = paddle._C_ops.matmul(reshape_27, parameter_152, False, False) + del parameter_152, reshape_27 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_40, dropout_41 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_53, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_53 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_34 = paddle._C_ops.add(divide_13, dropout_40) + del divide_13, dropout_40 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_13 = paddle._C_ops.square(add_34) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_13 = paddle._C_ops.mean(square_13, full_int_array_1, True) + del square_13 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_21 = paddle._C_ops.scale(mean_13, full_2, float("1e-12"), True) + del mean_13 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_13 = paddle._C_ops.sqrt(scale_21) + del scale_21 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_14 = paddle._C_ops.divide(add_34, sqrt_13) + del add_34, sqrt_13 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_54 = paddle._C_ops.matmul(divide_14, parameter_151, False, False) + del parameter_151 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_6 = paddle._C_ops.relu(matmul_54) + del matmul_54 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_55 = paddle._C_ops.matmul(relu_6, parameter_150, False, False) + del parameter_150, relu_6 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_42, dropout_43 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_55, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_55 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_35 = paddle._C_ops.add(divide_14, dropout_42) + del divide_14, dropout_42 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_14 = paddle._C_ops.square(add_35) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_14 = paddle._C_ops.mean(square_14, full_int_array_1, True) + del square_14 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_22 = paddle._C_ops.scale(mean_14, full_2, float("1e-12"), True) + del mean_14 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_14 = paddle._C_ops.sqrt(scale_22) + del scale_22 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_15 = paddle._C_ops.divide(add_35, sqrt_14) + del add_35, sqrt_14 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_56 = paddle._C_ops.matmul(divide_15, parameter_149, False, False) + del parameter_149 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_57 = paddle._C_ops.matmul(divide_15, parameter_148, False, False) + del parameter_148 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_58 = paddle._C_ops.matmul(divide_15, parameter_147, False, False) + del parameter_147 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_28 = paddle._C_ops.reshape(matmul_56, full_int_array_2) + del matmul_56 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_28 = paddle._C_ops.transpose(reshape_28, [0, 2, 1, 3]) + del reshape_28 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_29 = paddle._C_ops.reshape(matmul_57, full_int_array_2) + del matmul_57 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_29 = paddle._C_ops.transpose(reshape_29, [0, 2, 1, 3]) + del reshape_29 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_30 = paddle._C_ops.reshape(matmul_58, full_int_array_2) + del matmul_58 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_30 = paddle._C_ops.transpose(reshape_30, [0, 2, 1, 3]) + del reshape_30 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_14 = paddle._C_ops.slice( + parameter_33, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_33 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_15 = paddle._C_ops.slice( + parameter_32, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_32 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_28 = paddle._C_ops.strided_slice( + transpose_28, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_29 = paddle._C_ops.strided_slice( + transpose_28, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_28 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_56 = paddle._C_ops.multiply(strided_slice_28, slice_15) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_57 = paddle._C_ops.multiply(strided_slice_29, slice_14) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_14 = paddle._C_ops.subtract(multiply_56, multiply_57) + del multiply_56, multiply_57 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_58 = paddle._C_ops.multiply(strided_slice_28, slice_14) + del strided_slice_28 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_59 = paddle._C_ops.multiply(strided_slice_29, slice_15) + del strided_slice_29 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_36 = paddle._C_ops.add(multiply_58, multiply_59) + del multiply_58, multiply_59 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_14 = [subtract_14, add_36] + del add_36, subtract_14 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_14 = paddle._C_ops.stack(combine_14, -1) + del combine_14 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_14 = paddle._C_ops.flatten(stack_14, 3, 4) + del stack_14 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_30 = paddle._C_ops.strided_slice( + transpose_29, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_31 = paddle._C_ops.strided_slice( + transpose_29, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_29 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_60 = paddle._C_ops.multiply(strided_slice_30, slice_15) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_61 = paddle._C_ops.multiply(strided_slice_31, slice_14) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_15 = paddle._C_ops.subtract(multiply_60, multiply_61) + del multiply_60, multiply_61 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_62 = paddle._C_ops.multiply(strided_slice_30, slice_14) + del slice_14, strided_slice_30 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_63 = paddle._C_ops.multiply(strided_slice_31, slice_15) + del slice_15, strided_slice_31 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_37 = paddle._C_ops.add(multiply_62, multiply_63) + del multiply_62, multiply_63 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_15 = [subtract_15, add_37] + del add_37, subtract_15 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_15 = paddle._C_ops.stack(combine_15, -1) + del combine_15 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_15 = paddle._C_ops.flatten(stack_15, 3, 4) + del stack_15 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_59 = paddle._C_ops.matmul(flatten_14, flatten_15, False, True) + del flatten_14, flatten_15 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_23 = paddle._C_ops.scale(matmul_59, full_4, float("0"), True) + del matmul_59 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_38 = paddle._C_ops.add(scale_23, unsqueeze_0) + del scale_23 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_7 = paddle._C_ops.softmax(add_38, -1) + del add_38 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_44, dropout_45 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_7, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_7 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_60 = paddle._C_ops.matmul(dropout_44, transpose_30, False, False) + del dropout_44, transpose_30 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_31 = paddle._C_ops.transpose(matmul_60, [0, 2, 1, 3]) + del matmul_60 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_31 = paddle._C_ops.reshape(transpose_31, full_int_array_8) + del transpose_31 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_61 = paddle._C_ops.matmul(reshape_31, parameter_146, False, False) + del parameter_146, reshape_31 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_46, dropout_47 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_61, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_61 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_39 = paddle._C_ops.add(divide_15, dropout_46) + del divide_15, dropout_46 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_15 = paddle._C_ops.square(add_39) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_15 = paddle._C_ops.mean(square_15, full_int_array_1, True) + del square_15 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_24 = paddle._C_ops.scale(mean_15, full_2, float("1e-12"), True) + del mean_15 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_15 = paddle._C_ops.sqrt(scale_24) + del scale_24 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_16 = paddle._C_ops.divide(add_39, sqrt_15) + del add_39, sqrt_15 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_62 = paddle._C_ops.matmul(divide_16, parameter_145, False, False) + del parameter_145 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_7 = paddle._C_ops.relu(matmul_62) + del matmul_62 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_63 = paddle._C_ops.matmul(relu_7, parameter_144, False, False) + del parameter_144, relu_7 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_48, dropout_49 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_63, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_63 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_40 = paddle._C_ops.add(divide_16, dropout_48) + del divide_16, dropout_48 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_16 = paddle._C_ops.square(add_40) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_16 = paddle._C_ops.mean(square_16, full_int_array_1, True) + del square_16 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_25 = paddle._C_ops.scale(mean_16, full_2, float("1e-12"), True) + del mean_16 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_16 = paddle._C_ops.sqrt(scale_25) + del scale_25 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_17 = paddle._C_ops.divide(add_40, sqrt_16) + del add_40, sqrt_16 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_64 = paddle._C_ops.matmul(divide_17, parameter_143, False, False) + del parameter_143 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_65 = paddle._C_ops.matmul(divide_17, parameter_142, False, False) + del parameter_142 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_66 = paddle._C_ops.matmul(divide_17, parameter_141, False, False) + del parameter_141 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_32 = paddle._C_ops.reshape(matmul_64, full_int_array_2) + del matmul_64 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_32 = paddle._C_ops.transpose(reshape_32, [0, 2, 1, 3]) + del reshape_32 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_33 = paddle._C_ops.reshape(matmul_65, full_int_array_2) + del matmul_65 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_33 = paddle._C_ops.transpose(reshape_33, [0, 2, 1, 3]) + del reshape_33 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_34 = paddle._C_ops.reshape(matmul_66, full_int_array_2) + del matmul_66 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_34 = paddle._C_ops.transpose(reshape_34, [0, 2, 1, 3]) + del reshape_34 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_16 = paddle._C_ops.slice( + parameter_31, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_31 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_17 = paddle._C_ops.slice( + parameter_30, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_30 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_32 = paddle._C_ops.strided_slice( + transpose_32, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_33 = paddle._C_ops.strided_slice( + transpose_32, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_32 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_64 = paddle._C_ops.multiply(strided_slice_32, slice_17) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_65 = paddle._C_ops.multiply(strided_slice_33, slice_16) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_16 = paddle._C_ops.subtract(multiply_64, multiply_65) + del multiply_64, multiply_65 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_66 = paddle._C_ops.multiply(strided_slice_32, slice_16) + del strided_slice_32 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_67 = paddle._C_ops.multiply(strided_slice_33, slice_17) + del strided_slice_33 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_41 = paddle._C_ops.add(multiply_66, multiply_67) + del multiply_66, multiply_67 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_16 = [subtract_16, add_41] + del add_41, subtract_16 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_16 = paddle._C_ops.stack(combine_16, -1) + del combine_16 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_16 = paddle._C_ops.flatten(stack_16, 3, 4) + del stack_16 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_34 = paddle._C_ops.strided_slice( + transpose_33, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_35 = paddle._C_ops.strided_slice( + transpose_33, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_33 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_68 = paddle._C_ops.multiply(strided_slice_34, slice_17) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_69 = paddle._C_ops.multiply(strided_slice_35, slice_16) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_17 = paddle._C_ops.subtract(multiply_68, multiply_69) + del multiply_68, multiply_69 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_70 = paddle._C_ops.multiply(strided_slice_34, slice_16) + del slice_16, strided_slice_34 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_71 = paddle._C_ops.multiply(strided_slice_35, slice_17) + del slice_17, strided_slice_35 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_42 = paddle._C_ops.add(multiply_70, multiply_71) + del multiply_70, multiply_71 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_17 = [subtract_17, add_42] + del add_42, subtract_17 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_17 = paddle._C_ops.stack(combine_17, -1) + del combine_17 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_17 = paddle._C_ops.flatten(stack_17, 3, 4) + del stack_17 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_67 = paddle._C_ops.matmul(flatten_16, flatten_17, False, True) + del flatten_16, flatten_17 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_26 = paddle._C_ops.scale(matmul_67, full_4, float("0"), True) + del matmul_67 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_43 = paddle._C_ops.add(scale_26, unsqueeze_0) + del scale_26 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_8 = paddle._C_ops.softmax(add_43, -1) + del add_43 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_50, dropout_51 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_8, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_8 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_68 = paddle._C_ops.matmul(dropout_50, transpose_34, False, False) + del dropout_50, transpose_34 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_35 = paddle._C_ops.transpose(matmul_68, [0, 2, 1, 3]) + del matmul_68 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_35 = paddle._C_ops.reshape(transpose_35, full_int_array_8) + del transpose_35 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_69 = paddle._C_ops.matmul(reshape_35, parameter_140, False, False) + del parameter_140, reshape_35 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_52, dropout_53 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_69, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_69 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_44 = paddle._C_ops.add(divide_17, dropout_52) + del divide_17, dropout_52 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_17 = paddle._C_ops.square(add_44) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_17 = paddle._C_ops.mean(square_17, full_int_array_1, True) + del square_17 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_27 = paddle._C_ops.scale(mean_17, full_2, float("1e-12"), True) + del mean_17 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_17 = paddle._C_ops.sqrt(scale_27) + del scale_27 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_18 = paddle._C_ops.divide(add_44, sqrt_17) + del add_44, sqrt_17 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_70 = paddle._C_ops.matmul(divide_18, parameter_139, False, False) + del parameter_139 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_8 = paddle._C_ops.relu(matmul_70) + del matmul_70 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_71 = paddle._C_ops.matmul(relu_8, parameter_138, False, False) + del parameter_138, relu_8 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_54, dropout_55 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_71, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_71 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_45 = paddle._C_ops.add(divide_18, dropout_54) + del divide_18, dropout_54 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_18 = paddle._C_ops.square(add_45) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_18 = paddle._C_ops.mean(square_18, full_int_array_1, True) + del square_18 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_28 = paddle._C_ops.scale(mean_18, full_2, float("1e-12"), True) + del mean_18 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_18 = paddle._C_ops.sqrt(scale_28) + del scale_28 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_19 = paddle._C_ops.divide(add_45, sqrt_18) + del add_45, sqrt_18 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_72 = paddle._C_ops.matmul(divide_19, parameter_137, False, False) + del parameter_137 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_73 = paddle._C_ops.matmul(divide_19, parameter_136, False, False) + del parameter_136 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_74 = paddle._C_ops.matmul(divide_19, parameter_135, False, False) + del parameter_135 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_36 = paddle._C_ops.reshape(matmul_72, full_int_array_2) + del matmul_72 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_36 = paddle._C_ops.transpose(reshape_36, [0, 2, 1, 3]) + del reshape_36 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_37 = paddle._C_ops.reshape(matmul_73, full_int_array_2) + del matmul_73 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_37 = paddle._C_ops.transpose(reshape_37, [0, 2, 1, 3]) + del reshape_37 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_38 = paddle._C_ops.reshape(matmul_74, full_int_array_2) + del matmul_74 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_38 = paddle._C_ops.transpose(reshape_38, [0, 2, 1, 3]) + del reshape_38 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_18 = paddle._C_ops.slice( + parameter_29, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_29 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_19 = paddle._C_ops.slice( + parameter_28, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_28 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_36 = paddle._C_ops.strided_slice( + transpose_36, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_37 = paddle._C_ops.strided_slice( + transpose_36, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_36 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_72 = paddle._C_ops.multiply(strided_slice_36, slice_19) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_73 = paddle._C_ops.multiply(strided_slice_37, slice_18) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_18 = paddle._C_ops.subtract(multiply_72, multiply_73) + del multiply_72, multiply_73 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_74 = paddle._C_ops.multiply(strided_slice_36, slice_18) + del strided_slice_36 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_75 = paddle._C_ops.multiply(strided_slice_37, slice_19) + del strided_slice_37 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_46 = paddle._C_ops.add(multiply_74, multiply_75) + del multiply_74, multiply_75 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_18 = [subtract_18, add_46] + del add_46, subtract_18 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_18 = paddle._C_ops.stack(combine_18, -1) + del combine_18 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_18 = paddle._C_ops.flatten(stack_18, 3, 4) + del stack_18 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_38 = paddle._C_ops.strided_slice( + transpose_37, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_39 = paddle._C_ops.strided_slice( + transpose_37, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_37 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_76 = paddle._C_ops.multiply(strided_slice_38, slice_19) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_77 = paddle._C_ops.multiply(strided_slice_39, slice_18) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_19 = paddle._C_ops.subtract(multiply_76, multiply_77) + del multiply_76, multiply_77 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_78 = paddle._C_ops.multiply(strided_slice_38, slice_18) + del slice_18, strided_slice_38 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_79 = paddle._C_ops.multiply(strided_slice_39, slice_19) + del slice_19, strided_slice_39 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_47 = paddle._C_ops.add(multiply_78, multiply_79) + del multiply_78, multiply_79 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_19 = [subtract_19, add_47] + del add_47, subtract_19 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_19 = paddle._C_ops.stack(combine_19, -1) + del combine_19 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_19 = paddle._C_ops.flatten(stack_19, 3, 4) + del stack_19 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_75 = paddle._C_ops.matmul(flatten_18, flatten_19, False, True) + del flatten_18, flatten_19 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_29 = paddle._C_ops.scale(matmul_75, full_4, float("0"), True) + del matmul_75 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_48 = paddle._C_ops.add(scale_29, unsqueeze_0) + del scale_29 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_9 = paddle._C_ops.softmax(add_48, -1) + del add_48 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_56, dropout_57 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_9, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_9 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_76 = paddle._C_ops.matmul(dropout_56, transpose_38, False, False) + del dropout_56, transpose_38 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_39 = paddle._C_ops.transpose(matmul_76, [0, 2, 1, 3]) + del matmul_76 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_39 = paddle._C_ops.reshape(transpose_39, full_int_array_8) + del transpose_39 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_77 = paddle._C_ops.matmul(reshape_39, parameter_134, False, False) + del parameter_134, reshape_39 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_58, dropout_59 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_77, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_77 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_49 = paddle._C_ops.add(divide_19, dropout_58) + del divide_19, dropout_58 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_19 = paddle._C_ops.square(add_49) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_19 = paddle._C_ops.mean(square_19, full_int_array_1, True) + del square_19 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_30 = paddle._C_ops.scale(mean_19, full_2, float("1e-12"), True) + del mean_19 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_19 = paddle._C_ops.sqrt(scale_30) + del scale_30 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_20 = paddle._C_ops.divide(add_49, sqrt_19) + del add_49, sqrt_19 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_78 = paddle._C_ops.matmul(divide_20, parameter_133, False, False) + del parameter_133 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_9 = paddle._C_ops.relu(matmul_78) + del matmul_78 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_79 = paddle._C_ops.matmul(relu_9, parameter_132, False, False) + del parameter_132, relu_9 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_60, dropout_61 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_79, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_79 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_50 = paddle._C_ops.add(divide_20, dropout_60) + del divide_20, dropout_60 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_20 = paddle._C_ops.square(add_50) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_20 = paddle._C_ops.mean(square_20, full_int_array_1, True) + del square_20 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_31 = paddle._C_ops.scale(mean_20, full_2, float("1e-12"), True) + del mean_20 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_20 = paddle._C_ops.sqrt(scale_31) + del scale_31 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_21 = paddle._C_ops.divide(add_50, sqrt_20) + del add_50, sqrt_20 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_80 = paddle._C_ops.matmul(divide_21, parameter_131, False, False) + del parameter_131 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_81 = paddle._C_ops.matmul(divide_21, parameter_130, False, False) + del parameter_130 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_82 = paddle._C_ops.matmul(divide_21, parameter_129, False, False) + del parameter_129 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_40 = paddle._C_ops.reshape(matmul_80, full_int_array_2) + del matmul_80 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_40 = paddle._C_ops.transpose(reshape_40, [0, 2, 1, 3]) + del reshape_40 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_41 = paddle._C_ops.reshape(matmul_81, full_int_array_2) + del matmul_81 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_41 = paddle._C_ops.transpose(reshape_41, [0, 2, 1, 3]) + del reshape_41 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_42 = paddle._C_ops.reshape(matmul_82, full_int_array_2) + del matmul_82 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_42 = paddle._C_ops.transpose(reshape_42, [0, 2, 1, 3]) + del reshape_42 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_20 = paddle._C_ops.slice( + parameter_27, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_27 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_21 = paddle._C_ops.slice( + parameter_26, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_26 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_40 = paddle._C_ops.strided_slice( + transpose_40, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_41 = paddle._C_ops.strided_slice( + transpose_40, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_40 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_80 = paddle._C_ops.multiply(strided_slice_40, slice_21) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_81 = paddle._C_ops.multiply(strided_slice_41, slice_20) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_20 = paddle._C_ops.subtract(multiply_80, multiply_81) + del multiply_80, multiply_81 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_82 = paddle._C_ops.multiply(strided_slice_40, slice_20) + del strided_slice_40 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_83 = paddle._C_ops.multiply(strided_slice_41, slice_21) + del strided_slice_41 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_51 = paddle._C_ops.add(multiply_82, multiply_83) + del multiply_82, multiply_83 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_20 = [subtract_20, add_51] + del add_51, subtract_20 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_20 = paddle._C_ops.stack(combine_20, -1) + del combine_20 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_20 = paddle._C_ops.flatten(stack_20, 3, 4) + del stack_20 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_42 = paddle._C_ops.strided_slice( + transpose_41, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_43 = paddle._C_ops.strided_slice( + transpose_41, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_41 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_84 = paddle._C_ops.multiply(strided_slice_42, slice_21) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_85 = paddle._C_ops.multiply(strided_slice_43, slice_20) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_21 = paddle._C_ops.subtract(multiply_84, multiply_85) + del multiply_84, multiply_85 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_86 = paddle._C_ops.multiply(strided_slice_42, slice_20) + del slice_20, strided_slice_42 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_87 = paddle._C_ops.multiply(strided_slice_43, slice_21) + del slice_21, strided_slice_43 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_52 = paddle._C_ops.add(multiply_86, multiply_87) + del multiply_86, multiply_87 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_21 = [subtract_21, add_52] + del add_52, subtract_21 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_21 = paddle._C_ops.stack(combine_21, -1) + del combine_21 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_21 = paddle._C_ops.flatten(stack_21, 3, 4) + del stack_21 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_83 = paddle._C_ops.matmul(flatten_20, flatten_21, False, True) + del flatten_20, flatten_21 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_32 = paddle._C_ops.scale(matmul_83, full_4, float("0"), True) + del matmul_83 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_53 = paddle._C_ops.add(scale_32, unsqueeze_0) + del scale_32 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_10 = paddle._C_ops.softmax(add_53, -1) + del add_53 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_62, dropout_63 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_10, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_10 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_84 = paddle._C_ops.matmul(dropout_62, transpose_42, False, False) + del dropout_62, transpose_42 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_43 = paddle._C_ops.transpose(matmul_84, [0, 2, 1, 3]) + del matmul_84 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_43 = paddle._C_ops.reshape(transpose_43, full_int_array_8) + del transpose_43 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_85 = paddle._C_ops.matmul(reshape_43, parameter_128, False, False) + del parameter_128, reshape_43 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_64, dropout_65 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_85, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_85 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_54 = paddle._C_ops.add(divide_21, dropout_64) + del divide_21, dropout_64 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_21 = paddle._C_ops.square(add_54) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_21 = paddle._C_ops.mean(square_21, full_int_array_1, True) + del square_21 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_33 = paddle._C_ops.scale(mean_21, full_2, float("1e-12"), True) + del mean_21 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_21 = paddle._C_ops.sqrt(scale_33) + del scale_33 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_22 = paddle._C_ops.divide(add_54, sqrt_21) + del add_54, sqrt_21 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_86 = paddle._C_ops.matmul(divide_22, parameter_127, False, False) + del parameter_127 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_10 = paddle._C_ops.relu(matmul_86) + del matmul_86 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_87 = paddle._C_ops.matmul(relu_10, parameter_126, False, False) + del parameter_126, relu_10 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_66, dropout_67 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_87, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_87 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_55 = paddle._C_ops.add(divide_22, dropout_66) + del divide_22, dropout_66 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_22 = paddle._C_ops.square(add_55) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_22 = paddle._C_ops.mean(square_22, full_int_array_1, True) + del square_22 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_34 = paddle._C_ops.scale(mean_22, full_2, float("1e-12"), True) + del mean_22 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_22 = paddle._C_ops.sqrt(scale_34) + del scale_34 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_23 = paddle._C_ops.divide(add_55, sqrt_22) + del add_55, sqrt_22 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_88 = paddle._C_ops.matmul(divide_23, parameter_125, False, False) + del parameter_125 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_89 = paddle._C_ops.matmul(divide_23, parameter_124, False, False) + del parameter_124 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_90 = paddle._C_ops.matmul(divide_23, parameter_123, False, False) + del parameter_123 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_44 = paddle._C_ops.reshape(matmul_88, full_int_array_2) + del matmul_88 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_44 = paddle._C_ops.transpose(reshape_44, [0, 2, 1, 3]) + del reshape_44 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_45 = paddle._C_ops.reshape(matmul_89, full_int_array_2) + del matmul_89 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_45 = paddle._C_ops.transpose(reshape_45, [0, 2, 1, 3]) + del reshape_45 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_46 = paddle._C_ops.reshape(matmul_90, full_int_array_2) + del matmul_90 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_46 = paddle._C_ops.transpose(reshape_46, [0, 2, 1, 3]) + del reshape_46 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_22 = paddle._C_ops.slice( + parameter_25, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_25 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_23 = paddle._C_ops.slice( + parameter_24, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_24 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_44 = paddle._C_ops.strided_slice( + transpose_44, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_45 = paddle._C_ops.strided_slice( + transpose_44, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_44 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_88 = paddle._C_ops.multiply(strided_slice_44, slice_23) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_89 = paddle._C_ops.multiply(strided_slice_45, slice_22) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_22 = paddle._C_ops.subtract(multiply_88, multiply_89) + del multiply_88, multiply_89 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_90 = paddle._C_ops.multiply(strided_slice_44, slice_22) + del strided_slice_44 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_91 = paddle._C_ops.multiply(strided_slice_45, slice_23) + del strided_slice_45 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_56 = paddle._C_ops.add(multiply_90, multiply_91) + del multiply_90, multiply_91 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_22 = [subtract_22, add_56] + del add_56, subtract_22 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_22 = paddle._C_ops.stack(combine_22, -1) + del combine_22 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_22 = paddle._C_ops.flatten(stack_22, 3, 4) + del stack_22 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_46 = paddle._C_ops.strided_slice( + transpose_45, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_47 = paddle._C_ops.strided_slice( + transpose_45, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_45 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_92 = paddle._C_ops.multiply(strided_slice_46, slice_23) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_93 = paddle._C_ops.multiply(strided_slice_47, slice_22) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_23 = paddle._C_ops.subtract(multiply_92, multiply_93) + del multiply_92, multiply_93 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_94 = paddle._C_ops.multiply(strided_slice_46, slice_22) + del slice_22, strided_slice_46 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_95 = paddle._C_ops.multiply(strided_slice_47, slice_23) + del slice_23, strided_slice_47 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_57 = paddle._C_ops.add(multiply_94, multiply_95) + del multiply_94, multiply_95 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_23 = [subtract_23, add_57] + del add_57, subtract_23 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_23 = paddle._C_ops.stack(combine_23, -1) + del combine_23 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_23 = paddle._C_ops.flatten(stack_23, 3, 4) + del stack_23 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_91 = paddle._C_ops.matmul(flatten_22, flatten_23, False, True) + del flatten_22, flatten_23 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_35 = paddle._C_ops.scale(matmul_91, full_4, float("0"), True) + del matmul_91 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_58 = paddle._C_ops.add(scale_35, unsqueeze_0) + del scale_35 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_11 = paddle._C_ops.softmax(add_58, -1) + del add_58 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_68, dropout_69 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_11, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_11 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_92 = paddle._C_ops.matmul(dropout_68, transpose_46, False, False) + del dropout_68, transpose_46 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_47 = paddle._C_ops.transpose(matmul_92, [0, 2, 1, 3]) + del matmul_92 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_47 = paddle._C_ops.reshape(transpose_47, full_int_array_8) + del transpose_47 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_93 = paddle._C_ops.matmul(reshape_47, parameter_122, False, False) + del parameter_122, reshape_47 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_70, dropout_71 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_93, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_93 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_59 = paddle._C_ops.add(divide_23, dropout_70) + del divide_23, dropout_70 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_23 = paddle._C_ops.square(add_59) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_23 = paddle._C_ops.mean(square_23, full_int_array_1, True) + del square_23 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_36 = paddle._C_ops.scale(mean_23, full_2, float("1e-12"), True) + del mean_23 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_23 = paddle._C_ops.sqrt(scale_36) + del scale_36 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_24 = paddle._C_ops.divide(add_59, sqrt_23) + del add_59, sqrt_23 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_94 = paddle._C_ops.matmul(divide_24, parameter_121, False, False) + del parameter_121 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_11 = paddle._C_ops.relu(matmul_94) + del matmul_94 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_95 = paddle._C_ops.matmul(relu_11, parameter_120, False, False) + del parameter_120, relu_11 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_72, dropout_73 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_95, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_95 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_60 = paddle._C_ops.add(divide_24, dropout_72) + del divide_24, dropout_72 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_24 = paddle._C_ops.square(add_60) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_24 = paddle._C_ops.mean(square_24, full_int_array_1, True) + del square_24 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_37 = paddle._C_ops.scale(mean_24, full_2, float("1e-12"), True) + del mean_24 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_24 = paddle._C_ops.sqrt(scale_37) + del scale_37 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_25 = paddle._C_ops.divide(add_60, sqrt_24) + del add_60, sqrt_24 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_96 = paddle._C_ops.matmul(divide_25, parameter_119, False, False) + del parameter_119 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_97 = paddle._C_ops.matmul(divide_25, parameter_118, False, False) + del parameter_118 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_98 = paddle._C_ops.matmul(divide_25, parameter_117, False, False) + del parameter_117 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_48 = paddle._C_ops.reshape(matmul_96, full_int_array_2) + del matmul_96 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_48 = paddle._C_ops.transpose(reshape_48, [0, 2, 1, 3]) + del reshape_48 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_49 = paddle._C_ops.reshape(matmul_97, full_int_array_2) + del matmul_97 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_49 = paddle._C_ops.transpose(reshape_49, [0, 2, 1, 3]) + del reshape_49 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_50 = paddle._C_ops.reshape(matmul_98, full_int_array_2) + del matmul_98 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_50 = paddle._C_ops.transpose(reshape_50, [0, 2, 1, 3]) + del reshape_50 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_24 = paddle._C_ops.slice( + parameter_23, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_23 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_25 = paddle._C_ops.slice( + parameter_22, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_22 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_48 = paddle._C_ops.strided_slice( + transpose_48, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_49 = paddle._C_ops.strided_slice( + transpose_48, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_48 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_96 = paddle._C_ops.multiply(strided_slice_48, slice_25) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_97 = paddle._C_ops.multiply(strided_slice_49, slice_24) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_24 = paddle._C_ops.subtract(multiply_96, multiply_97) + del multiply_96, multiply_97 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_98 = paddle._C_ops.multiply(strided_slice_48, slice_24) + del strided_slice_48 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_99 = paddle._C_ops.multiply(strided_slice_49, slice_25) + del strided_slice_49 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_61 = paddle._C_ops.add(multiply_98, multiply_99) + del multiply_98, multiply_99 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_24 = [subtract_24, add_61] + del add_61, subtract_24 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_24 = paddle._C_ops.stack(combine_24, -1) + del combine_24 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_24 = paddle._C_ops.flatten(stack_24, 3, 4) + del stack_24 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_50 = paddle._C_ops.strided_slice( + transpose_49, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_51 = paddle._C_ops.strided_slice( + transpose_49, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_49 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_100 = paddle._C_ops.multiply(strided_slice_50, slice_25) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_101 = paddle._C_ops.multiply(strided_slice_51, slice_24) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_25 = paddle._C_ops.subtract(multiply_100, multiply_101) + del multiply_100, multiply_101 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_102 = paddle._C_ops.multiply(strided_slice_50, slice_24) + del slice_24, strided_slice_50 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_103 = paddle._C_ops.multiply(strided_slice_51, slice_25) + del slice_25, strided_slice_51 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_62 = paddle._C_ops.add(multiply_102, multiply_103) + del multiply_102, multiply_103 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_25 = [subtract_25, add_62] + del add_62, subtract_25 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_25 = paddle._C_ops.stack(combine_25, -1) + del combine_25 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_25 = paddle._C_ops.flatten(stack_25, 3, 4) + del stack_25 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_99 = paddle._C_ops.matmul(flatten_24, flatten_25, False, True) + del flatten_24, flatten_25 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_38 = paddle._C_ops.scale(matmul_99, full_4, float("0"), True) + del matmul_99 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_63 = paddle._C_ops.add(scale_38, unsqueeze_0) + del scale_38 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_12 = paddle._C_ops.softmax(add_63, -1) + del add_63 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_74, dropout_75 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_12, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_12 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_100 = paddle._C_ops.matmul(dropout_74, transpose_50, False, False) + del dropout_74, transpose_50 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_51 = paddle._C_ops.transpose(matmul_100, [0, 2, 1, 3]) + del matmul_100 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_51 = paddle._C_ops.reshape(transpose_51, full_int_array_8) + del transpose_51 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_101 = paddle._C_ops.matmul(reshape_51, parameter_116, False, False) + del parameter_116, reshape_51 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_76, dropout_77 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_101, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_101 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_64 = paddle._C_ops.add(divide_25, dropout_76) + del divide_25, dropout_76 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_25 = paddle._C_ops.square(add_64) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_25 = paddle._C_ops.mean(square_25, full_int_array_1, True) + del square_25 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_39 = paddle._C_ops.scale(mean_25, full_2, float("1e-12"), True) + del mean_25 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_25 = paddle._C_ops.sqrt(scale_39) + del scale_39 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_26 = paddle._C_ops.divide(add_64, sqrt_25) + del add_64, sqrt_25 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_102 = paddle._C_ops.matmul(divide_26, parameter_115, False, False) + del parameter_115 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_12 = paddle._C_ops.relu(matmul_102) + del matmul_102 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_103 = paddle._C_ops.matmul(relu_12, parameter_114, False, False) + del parameter_114, relu_12 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_78, dropout_79 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_103, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_103 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_65 = paddle._C_ops.add(divide_26, dropout_78) + del divide_26, dropout_78 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_26 = paddle._C_ops.square(add_65) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_26 = paddle._C_ops.mean(square_26, full_int_array_1, True) + del square_26 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_40 = paddle._C_ops.scale(mean_26, full_2, float("1e-12"), True) + del mean_26 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_26 = paddle._C_ops.sqrt(scale_40) + del scale_40 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_27 = paddle._C_ops.divide(add_65, sqrt_26) + del add_65, sqrt_26 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_104 = paddle._C_ops.matmul(divide_27, parameter_113, False, False) + del parameter_113 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_105 = paddle._C_ops.matmul(divide_27, parameter_112, False, False) + del parameter_112 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_106 = paddle._C_ops.matmul(divide_27, parameter_111, False, False) + del parameter_111 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_52 = paddle._C_ops.reshape(matmul_104, full_int_array_2) + del matmul_104 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_52 = paddle._C_ops.transpose(reshape_52, [0, 2, 1, 3]) + del reshape_52 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_53 = paddle._C_ops.reshape(matmul_105, full_int_array_2) + del matmul_105 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_53 = paddle._C_ops.transpose(reshape_53, [0, 2, 1, 3]) + del reshape_53 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_54 = paddle._C_ops.reshape(matmul_106, full_int_array_2) + del matmul_106 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_54 = paddle._C_ops.transpose(reshape_54, [0, 2, 1, 3]) + del reshape_54 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_26 = paddle._C_ops.slice( + parameter_21, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_21 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_27 = paddle._C_ops.slice( + parameter_20, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_20 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_52 = paddle._C_ops.strided_slice( + transpose_52, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_53 = paddle._C_ops.strided_slice( + transpose_52, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_52 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_104 = paddle._C_ops.multiply(strided_slice_52, slice_27) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_105 = paddle._C_ops.multiply(strided_slice_53, slice_26) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_26 = paddle._C_ops.subtract(multiply_104, multiply_105) + del multiply_104, multiply_105 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_106 = paddle._C_ops.multiply(strided_slice_52, slice_26) + del strided_slice_52 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_107 = paddle._C_ops.multiply(strided_slice_53, slice_27) + del strided_slice_53 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_66 = paddle._C_ops.add(multiply_106, multiply_107) + del multiply_106, multiply_107 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_26 = [subtract_26, add_66] + del add_66, subtract_26 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_26 = paddle._C_ops.stack(combine_26, -1) + del combine_26 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_26 = paddle._C_ops.flatten(stack_26, 3, 4) + del stack_26 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_54 = paddle._C_ops.strided_slice( + transpose_53, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_55 = paddle._C_ops.strided_slice( + transpose_53, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_53 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_108 = paddle._C_ops.multiply(strided_slice_54, slice_27) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_109 = paddle._C_ops.multiply(strided_slice_55, slice_26) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_27 = paddle._C_ops.subtract(multiply_108, multiply_109) + del multiply_108, multiply_109 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_110 = paddle._C_ops.multiply(strided_slice_54, slice_26) + del slice_26, strided_slice_54 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_111 = paddle._C_ops.multiply(strided_slice_55, slice_27) + del slice_27, strided_slice_55 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_67 = paddle._C_ops.add(multiply_110, multiply_111) + del multiply_110, multiply_111 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_27 = [subtract_27, add_67] + del add_67, subtract_27 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_27 = paddle._C_ops.stack(combine_27, -1) + del combine_27 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_27 = paddle._C_ops.flatten(stack_27, 3, 4) + del stack_27 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_107 = paddle._C_ops.matmul(flatten_26, flatten_27, False, True) + del flatten_26, flatten_27 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_41 = paddle._C_ops.scale(matmul_107, full_4, float("0"), True) + del matmul_107 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_68 = paddle._C_ops.add(scale_41, unsqueeze_0) + del scale_41 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_13 = paddle._C_ops.softmax(add_68, -1) + del add_68 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_80, dropout_81 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_13, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_13 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_108 = paddle._C_ops.matmul(dropout_80, transpose_54, False, False) + del dropout_80, transpose_54 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_55 = paddle._C_ops.transpose(matmul_108, [0, 2, 1, 3]) + del matmul_108 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_55 = paddle._C_ops.reshape(transpose_55, full_int_array_8) + del transpose_55 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_109 = paddle._C_ops.matmul(reshape_55, parameter_110, False, False) + del parameter_110, reshape_55 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_82, dropout_83 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_109, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_109 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_69 = paddle._C_ops.add(divide_27, dropout_82) + del divide_27, dropout_82 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_27 = paddle._C_ops.square(add_69) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_27 = paddle._C_ops.mean(square_27, full_int_array_1, True) + del square_27 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_42 = paddle._C_ops.scale(mean_27, full_2, float("1e-12"), True) + del mean_27 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_27 = paddle._C_ops.sqrt(scale_42) + del scale_42 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_28 = paddle._C_ops.divide(add_69, sqrt_27) + del add_69, sqrt_27 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_110 = paddle._C_ops.matmul(divide_28, parameter_109, False, False) + del parameter_109 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_13 = paddle._C_ops.relu(matmul_110) + del matmul_110 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_111 = paddle._C_ops.matmul(relu_13, parameter_108, False, False) + del parameter_108, relu_13 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_84, dropout_85 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_111, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_111 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_70 = paddle._C_ops.add(divide_28, dropout_84) + del divide_28, dropout_84 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_28 = paddle._C_ops.square(add_70) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_28 = paddle._C_ops.mean(square_28, full_int_array_1, True) + del square_28 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_43 = paddle._C_ops.scale(mean_28, full_2, float("1e-12"), True) + del mean_28 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_28 = paddle._C_ops.sqrt(scale_43) + del scale_43 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_29 = paddle._C_ops.divide(add_70, sqrt_28) + del add_70, sqrt_28 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_112 = paddle._C_ops.matmul(divide_29, parameter_107, False, False) + del parameter_107 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_113 = paddle._C_ops.matmul(divide_29, parameter_106, False, False) + del parameter_106 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_114 = paddle._C_ops.matmul(divide_29, parameter_105, False, False) + del parameter_105 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_56 = paddle._C_ops.reshape(matmul_112, full_int_array_2) + del matmul_112 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_56 = paddle._C_ops.transpose(reshape_56, [0, 2, 1, 3]) + del reshape_56 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_57 = paddle._C_ops.reshape(matmul_113, full_int_array_2) + del matmul_113 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_57 = paddle._C_ops.transpose(reshape_57, [0, 2, 1, 3]) + del reshape_57 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_58 = paddle._C_ops.reshape(matmul_114, full_int_array_2) + del matmul_114 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_58 = paddle._C_ops.transpose(reshape_58, [0, 2, 1, 3]) + del reshape_58 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_28 = paddle._C_ops.slice( + parameter_19, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_19 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_29 = paddle._C_ops.slice( + parameter_18, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_18 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_56 = paddle._C_ops.strided_slice( + transpose_56, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_57 = paddle._C_ops.strided_slice( + transpose_56, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_56 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_112 = paddle._C_ops.multiply(strided_slice_56, slice_29) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_113 = paddle._C_ops.multiply(strided_slice_57, slice_28) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_28 = paddle._C_ops.subtract(multiply_112, multiply_113) + del multiply_112, multiply_113 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_114 = paddle._C_ops.multiply(strided_slice_56, slice_28) + del strided_slice_56 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_115 = paddle._C_ops.multiply(strided_slice_57, slice_29) + del strided_slice_57 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_71 = paddle._C_ops.add(multiply_114, multiply_115) + del multiply_114, multiply_115 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_28 = [subtract_28, add_71] + del add_71, subtract_28 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_28 = paddle._C_ops.stack(combine_28, -1) + del combine_28 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_28 = paddle._C_ops.flatten(stack_28, 3, 4) + del stack_28 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_58 = paddle._C_ops.strided_slice( + transpose_57, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_59 = paddle._C_ops.strided_slice( + transpose_57, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_57 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_116 = paddle._C_ops.multiply(strided_slice_58, slice_29) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_117 = paddle._C_ops.multiply(strided_slice_59, slice_28) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_29 = paddle._C_ops.subtract(multiply_116, multiply_117) + del multiply_116, multiply_117 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_118 = paddle._C_ops.multiply(strided_slice_58, slice_28) + del slice_28, strided_slice_58 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_119 = paddle._C_ops.multiply(strided_slice_59, slice_29) + del slice_29, strided_slice_59 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_72 = paddle._C_ops.add(multiply_118, multiply_119) + del multiply_118, multiply_119 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_29 = [subtract_29, add_72] + del add_72, subtract_29 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_29 = paddle._C_ops.stack(combine_29, -1) + del combine_29 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_29 = paddle._C_ops.flatten(stack_29, 3, 4) + del stack_29 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_115 = paddle._C_ops.matmul(flatten_28, flatten_29, False, True) + del flatten_28, flatten_29 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_44 = paddle._C_ops.scale(matmul_115, full_4, float("0"), True) + del matmul_115 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_73 = paddle._C_ops.add(scale_44, unsqueeze_0) + del scale_44 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_14 = paddle._C_ops.softmax(add_73, -1) + del add_73 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_86, dropout_87 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_14, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_14 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_116 = paddle._C_ops.matmul(dropout_86, transpose_58, False, False) + del dropout_86, transpose_58 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_59 = paddle._C_ops.transpose(matmul_116, [0, 2, 1, 3]) + del matmul_116 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_59 = paddle._C_ops.reshape(transpose_59, full_int_array_8) + del transpose_59 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_117 = paddle._C_ops.matmul(reshape_59, parameter_104, False, False) + del parameter_104, reshape_59 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_88, dropout_89 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_117, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_117 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_74 = paddle._C_ops.add(divide_29, dropout_88) + del divide_29, dropout_88 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_29 = paddle._C_ops.square(add_74) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_29 = paddle._C_ops.mean(square_29, full_int_array_1, True) + del square_29 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_45 = paddle._C_ops.scale(mean_29, full_2, float("1e-12"), True) + del mean_29 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_29 = paddle._C_ops.sqrt(scale_45) + del scale_45 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_30 = paddle._C_ops.divide(add_74, sqrt_29) + del add_74, sqrt_29 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_118 = paddle._C_ops.matmul(divide_30, parameter_103, False, False) + del parameter_103 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_14 = paddle._C_ops.relu(matmul_118) + del matmul_118 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_119 = paddle._C_ops.matmul(relu_14, parameter_102, False, False) + del parameter_102, relu_14 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_90, dropout_91 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_119, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_119 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_75 = paddle._C_ops.add(divide_30, dropout_90) + del divide_30, dropout_90 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_30 = paddle._C_ops.square(add_75) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_30 = paddle._C_ops.mean(square_30, full_int_array_1, True) + del square_30 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_46 = paddle._C_ops.scale(mean_30, full_2, float("1e-12"), True) + del mean_30 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_30 = paddle._C_ops.sqrt(scale_46) + del scale_46 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_31 = paddle._C_ops.divide(add_75, sqrt_30) + del add_75, sqrt_30 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_120 = paddle._C_ops.matmul(divide_31, parameter_101, False, False) + del parameter_101 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_121 = paddle._C_ops.matmul(divide_31, parameter_100, False, False) + del parameter_100 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_122 = paddle._C_ops.matmul(divide_31, parameter_99, False, False) + del parameter_99 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_60 = paddle._C_ops.reshape(matmul_120, full_int_array_2) + del matmul_120 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_60 = paddle._C_ops.transpose(reshape_60, [0, 2, 1, 3]) + del reshape_60 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_61 = paddle._C_ops.reshape(matmul_121, full_int_array_2) + del matmul_121 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_61 = paddle._C_ops.transpose(reshape_61, [0, 2, 1, 3]) + del reshape_61 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_62 = paddle._C_ops.reshape(matmul_122, full_int_array_2) + del matmul_122 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_62 = paddle._C_ops.transpose(reshape_62, [0, 2, 1, 3]) + del reshape_62 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_30 = paddle._C_ops.slice( + parameter_17, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_17 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_31 = paddle._C_ops.slice( + parameter_16, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_16 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_60 = paddle._C_ops.strided_slice( + transpose_60, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_61 = paddle._C_ops.strided_slice( + transpose_60, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_60 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_120 = paddle._C_ops.multiply(strided_slice_60, slice_31) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_121 = paddle._C_ops.multiply(strided_slice_61, slice_30) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_30 = paddle._C_ops.subtract(multiply_120, multiply_121) + del multiply_120, multiply_121 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_122 = paddle._C_ops.multiply(strided_slice_60, slice_30) + del strided_slice_60 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_123 = paddle._C_ops.multiply(strided_slice_61, slice_31) + del strided_slice_61 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_76 = paddle._C_ops.add(multiply_122, multiply_123) + del multiply_122, multiply_123 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_30 = [subtract_30, add_76] + del add_76, subtract_30 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_30 = paddle._C_ops.stack(combine_30, -1) + del combine_30 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_30 = paddle._C_ops.flatten(stack_30, 3, 4) + del stack_30 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_62 = paddle._C_ops.strided_slice( + transpose_61, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_63 = paddle._C_ops.strided_slice( + transpose_61, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_61 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_124 = paddle._C_ops.multiply(strided_slice_62, slice_31) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_125 = paddle._C_ops.multiply(strided_slice_63, slice_30) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_31 = paddle._C_ops.subtract(multiply_124, multiply_125) + del multiply_124, multiply_125 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_126 = paddle._C_ops.multiply(strided_slice_62, slice_30) + del slice_30, strided_slice_62 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_127 = paddle._C_ops.multiply(strided_slice_63, slice_31) + del slice_31, strided_slice_63 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_77 = paddle._C_ops.add(multiply_126, multiply_127) + del multiply_126, multiply_127 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_31 = [subtract_31, add_77] + del add_77, subtract_31 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_31 = paddle._C_ops.stack(combine_31, -1) + del combine_31 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_31 = paddle._C_ops.flatten(stack_31, 3, 4) + del stack_31 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_123 = paddle._C_ops.matmul(flatten_30, flatten_31, False, True) + del flatten_30, flatten_31 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_47 = paddle._C_ops.scale(matmul_123, full_4, float("0"), True) + del matmul_123 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_78 = paddle._C_ops.add(scale_47, unsqueeze_0) + del scale_47 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_15 = paddle._C_ops.softmax(add_78, -1) + del add_78 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_92, dropout_93 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_15, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_15 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_124 = paddle._C_ops.matmul(dropout_92, transpose_62, False, False) + del dropout_92, transpose_62 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_63 = paddle._C_ops.transpose(matmul_124, [0, 2, 1, 3]) + del matmul_124 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_63 = paddle._C_ops.reshape(transpose_63, full_int_array_8) + del transpose_63 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_125 = paddle._C_ops.matmul(reshape_63, parameter_98, False, False) + del parameter_98, reshape_63 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_94, dropout_95 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_125, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_125 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_79 = paddle._C_ops.add(divide_31, dropout_94) + del divide_31, dropout_94 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_31 = paddle._C_ops.square(add_79) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_31 = paddle._C_ops.mean(square_31, full_int_array_1, True) + del square_31 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_48 = paddle._C_ops.scale(mean_31, full_2, float("1e-12"), True) + del mean_31 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_31 = paddle._C_ops.sqrt(scale_48) + del scale_48 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_32 = paddle._C_ops.divide(add_79, sqrt_31) + del add_79, sqrt_31 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_126 = paddle._C_ops.matmul(divide_32, parameter_97, False, False) + del parameter_97 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_15 = paddle._C_ops.relu(matmul_126) + del matmul_126 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_127 = paddle._C_ops.matmul(relu_15, parameter_96, False, False) + del parameter_96, relu_15 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_96, dropout_97 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_127, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_127 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_80 = paddle._C_ops.add(divide_32, dropout_96) + del divide_32, dropout_96 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_32 = paddle._C_ops.square(add_80) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_32 = paddle._C_ops.mean(square_32, full_int_array_1, True) + del square_32 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_49 = paddle._C_ops.scale(mean_32, full_2, float("1e-12"), True) + del mean_32 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_32 = paddle._C_ops.sqrt(scale_49) + del scale_49 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_33 = paddle._C_ops.divide(add_80, sqrt_32) + del add_80, sqrt_32 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_128 = paddle._C_ops.matmul(divide_33, parameter_95, False, False) + del parameter_95 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_129 = paddle._C_ops.matmul(divide_33, parameter_94, False, False) + del parameter_94 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_130 = paddle._C_ops.matmul(divide_33, parameter_93, False, False) + del parameter_93 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_64 = paddle._C_ops.reshape(matmul_128, full_int_array_2) + del matmul_128 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_64 = paddle._C_ops.transpose(reshape_64, [0, 2, 1, 3]) + del reshape_64 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_65 = paddle._C_ops.reshape(matmul_129, full_int_array_2) + del matmul_129 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_65 = paddle._C_ops.transpose(reshape_65, [0, 2, 1, 3]) + del reshape_65 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_66 = paddle._C_ops.reshape(matmul_130, full_int_array_2) + del matmul_130 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_66 = paddle._C_ops.transpose(reshape_66, [0, 2, 1, 3]) + del reshape_66 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_32 = paddle._C_ops.slice( + parameter_15, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_15 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_33 = paddle._C_ops.slice( + parameter_14, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_14 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_64 = paddle._C_ops.strided_slice( + transpose_64, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_65 = paddle._C_ops.strided_slice( + transpose_64, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_64 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_128 = paddle._C_ops.multiply(strided_slice_64, slice_33) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_129 = paddle._C_ops.multiply(strided_slice_65, slice_32) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_32 = paddle._C_ops.subtract(multiply_128, multiply_129) + del multiply_128, multiply_129 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_130 = paddle._C_ops.multiply(strided_slice_64, slice_32) + del strided_slice_64 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_131 = paddle._C_ops.multiply(strided_slice_65, slice_33) + del strided_slice_65 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_81 = paddle._C_ops.add(multiply_130, multiply_131) + del multiply_130, multiply_131 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_32 = [subtract_32, add_81] + del add_81, subtract_32 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_32 = paddle._C_ops.stack(combine_32, -1) + del combine_32 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_32 = paddle._C_ops.flatten(stack_32, 3, 4) + del stack_32 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_66 = paddle._C_ops.strided_slice( + transpose_65, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_67 = paddle._C_ops.strided_slice( + transpose_65, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_65 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_132 = paddle._C_ops.multiply(strided_slice_66, slice_33) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_133 = paddle._C_ops.multiply(strided_slice_67, slice_32) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_33 = paddle._C_ops.subtract(multiply_132, multiply_133) + del multiply_132, multiply_133 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_134 = paddle._C_ops.multiply(strided_slice_66, slice_32) + del slice_32, strided_slice_66 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_135 = paddle._C_ops.multiply(strided_slice_67, slice_33) + del slice_33, strided_slice_67 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_82 = paddle._C_ops.add(multiply_134, multiply_135) + del multiply_134, multiply_135 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_33 = [subtract_33, add_82] + del add_82, subtract_33 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_33 = paddle._C_ops.stack(combine_33, -1) + del combine_33 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_33 = paddle._C_ops.flatten(stack_33, 3, 4) + del stack_33 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_131 = paddle._C_ops.matmul(flatten_32, flatten_33, False, True) + del flatten_32, flatten_33 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_50 = paddle._C_ops.scale(matmul_131, full_4, float("0"), True) + del matmul_131 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_83 = paddle._C_ops.add(scale_50, unsqueeze_0) + del scale_50 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_16 = paddle._C_ops.softmax(add_83, -1) + del add_83 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_98, dropout_99 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_16, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_16 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_132 = paddle._C_ops.matmul(dropout_98, transpose_66, False, False) + del dropout_98, transpose_66 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_67 = paddle._C_ops.transpose(matmul_132, [0, 2, 1, 3]) + del matmul_132 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_67 = paddle._C_ops.reshape(transpose_67, full_int_array_8) + del transpose_67 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_133 = paddle._C_ops.matmul(reshape_67, parameter_92, False, False) + del parameter_92, reshape_67 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_100, dropout_101 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_133, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_133 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_84 = paddle._C_ops.add(divide_33, dropout_100) + del divide_33, dropout_100 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_33 = paddle._C_ops.square(add_84) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_33 = paddle._C_ops.mean(square_33, full_int_array_1, True) + del square_33 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_51 = paddle._C_ops.scale(mean_33, full_2, float("1e-12"), True) + del mean_33 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_33 = paddle._C_ops.sqrt(scale_51) + del scale_51 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_34 = paddle._C_ops.divide(add_84, sqrt_33) + del add_84, sqrt_33 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_134 = paddle._C_ops.matmul(divide_34, parameter_91, False, False) + del parameter_91 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_16 = paddle._C_ops.relu(matmul_134) + del matmul_134 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_135 = paddle._C_ops.matmul(relu_16, parameter_90, False, False) + del parameter_90, relu_16 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_102, dropout_103 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_135, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_135 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_85 = paddle._C_ops.add(divide_34, dropout_102) + del divide_34, dropout_102 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_34 = paddle._C_ops.square(add_85) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_34 = paddle._C_ops.mean(square_34, full_int_array_1, True) + del square_34 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_52 = paddle._C_ops.scale(mean_34, full_2, float("1e-12"), True) + del mean_34 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_34 = paddle._C_ops.sqrt(scale_52) + del scale_52 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_35 = paddle._C_ops.divide(add_85, sqrt_34) + del add_85, sqrt_34 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_136 = paddle._C_ops.matmul(divide_35, parameter_89, False, False) + del parameter_89 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_137 = paddle._C_ops.matmul(divide_35, parameter_88, False, False) + del parameter_88 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_138 = paddle._C_ops.matmul(divide_35, parameter_87, False, False) + del parameter_87 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_68 = paddle._C_ops.reshape(matmul_136, full_int_array_2) + del matmul_136 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_68 = paddle._C_ops.transpose(reshape_68, [0, 2, 1, 3]) + del reshape_68 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_69 = paddle._C_ops.reshape(matmul_137, full_int_array_2) + del matmul_137 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_69 = paddle._C_ops.transpose(reshape_69, [0, 2, 1, 3]) + del reshape_69 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_70 = paddle._C_ops.reshape(matmul_138, full_int_array_2) + del matmul_138 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_70 = paddle._C_ops.transpose(reshape_70, [0, 2, 1, 3]) + del reshape_70 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_34 = paddle._C_ops.slice( + parameter_13, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_13 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_35 = paddle._C_ops.slice( + parameter_12, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_12 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_68 = paddle._C_ops.strided_slice( + transpose_68, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_69 = paddle._C_ops.strided_slice( + transpose_68, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_68 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_136 = paddle._C_ops.multiply(strided_slice_68, slice_35) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_137 = paddle._C_ops.multiply(strided_slice_69, slice_34) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_34 = paddle._C_ops.subtract(multiply_136, multiply_137) + del multiply_136, multiply_137 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_138 = paddle._C_ops.multiply(strided_slice_68, slice_34) + del strided_slice_68 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_139 = paddle._C_ops.multiply(strided_slice_69, slice_35) + del strided_slice_69 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_86 = paddle._C_ops.add(multiply_138, multiply_139) + del multiply_138, multiply_139 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_34 = [subtract_34, add_86] + del add_86, subtract_34 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_34 = paddle._C_ops.stack(combine_34, -1) + del combine_34 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_34 = paddle._C_ops.flatten(stack_34, 3, 4) + del stack_34 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_70 = paddle._C_ops.strided_slice( + transpose_69, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_71 = paddle._C_ops.strided_slice( + transpose_69, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_69 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_140 = paddle._C_ops.multiply(strided_slice_70, slice_35) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_141 = paddle._C_ops.multiply(strided_slice_71, slice_34) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_35 = paddle._C_ops.subtract(multiply_140, multiply_141) + del multiply_140, multiply_141 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_142 = paddle._C_ops.multiply(strided_slice_70, slice_34) + del slice_34, strided_slice_70 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_143 = paddle._C_ops.multiply(strided_slice_71, slice_35) + del slice_35, strided_slice_71 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_87 = paddle._C_ops.add(multiply_142, multiply_143) + del multiply_142, multiply_143 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_35 = [subtract_35, add_87] + del add_87, subtract_35 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_35 = paddle._C_ops.stack(combine_35, -1) + del combine_35 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_35 = paddle._C_ops.flatten(stack_35, 3, 4) + del stack_35 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_139 = paddle._C_ops.matmul(flatten_34, flatten_35, False, True) + del flatten_34, flatten_35 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_53 = paddle._C_ops.scale(matmul_139, full_4, float("0"), True) + del matmul_139 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_88 = paddle._C_ops.add(scale_53, unsqueeze_0) + del scale_53 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_17 = paddle._C_ops.softmax(add_88, -1) + del add_88 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_104, dropout_105 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_17, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_17 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_140 = paddle._C_ops.matmul(dropout_104, transpose_70, False, False) + del dropout_104, transpose_70 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_71 = paddle._C_ops.transpose(matmul_140, [0, 2, 1, 3]) + del matmul_140 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_71 = paddle._C_ops.reshape(transpose_71, full_int_array_8) + del transpose_71 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_141 = paddle._C_ops.matmul(reshape_71, parameter_86, False, False) + del parameter_86, reshape_71 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_106, dropout_107 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_141, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_141 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_89 = paddle._C_ops.add(divide_35, dropout_106) + del divide_35, dropout_106 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_35 = paddle._C_ops.square(add_89) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_35 = paddle._C_ops.mean(square_35, full_int_array_1, True) + del square_35 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_54 = paddle._C_ops.scale(mean_35, full_2, float("1e-12"), True) + del mean_35 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_35 = paddle._C_ops.sqrt(scale_54) + del scale_54 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_36 = paddle._C_ops.divide(add_89, sqrt_35) + del add_89, sqrt_35 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_142 = paddle._C_ops.matmul(divide_36, parameter_85, False, False) + del parameter_85 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_17 = paddle._C_ops.relu(matmul_142) + del matmul_142 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_143 = paddle._C_ops.matmul(relu_17, parameter_84, False, False) + del parameter_84, relu_17 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_108, dropout_109 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_143, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_143 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_90 = paddle._C_ops.add(divide_36, dropout_108) + del divide_36, dropout_108 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_36 = paddle._C_ops.square(add_90) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_36 = paddle._C_ops.mean(square_36, full_int_array_1, True) + del square_36 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_55 = paddle._C_ops.scale(mean_36, full_2, float("1e-12"), True) + del mean_36 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_36 = paddle._C_ops.sqrt(scale_55) + del scale_55 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_37 = paddle._C_ops.divide(add_90, sqrt_36) + del add_90, sqrt_36 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_144 = paddle._C_ops.matmul(divide_37, parameter_83, False, False) + del parameter_83 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_145 = paddle._C_ops.matmul(divide_37, parameter_82, False, False) + del parameter_82 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_146 = paddle._C_ops.matmul(divide_37, parameter_81, False, False) + del parameter_81 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_72 = paddle._C_ops.reshape(matmul_144, full_int_array_2) + del matmul_144 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_72 = paddle._C_ops.transpose(reshape_72, [0, 2, 1, 3]) + del reshape_72 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_73 = paddle._C_ops.reshape(matmul_145, full_int_array_2) + del matmul_145 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_73 = paddle._C_ops.transpose(reshape_73, [0, 2, 1, 3]) + del reshape_73 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_74 = paddle._C_ops.reshape(matmul_146, full_int_array_2) + del matmul_146 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_74 = paddle._C_ops.transpose(reshape_74, [0, 2, 1, 3]) + del reshape_74 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_36 = paddle._C_ops.slice( + parameter_11, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_11 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_37 = paddle._C_ops.slice( + parameter_10, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_10 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_72 = paddle._C_ops.strided_slice( + transpose_72, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_73 = paddle._C_ops.strided_slice( + transpose_72, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_72 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_144 = paddle._C_ops.multiply(strided_slice_72, slice_37) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_145 = paddle._C_ops.multiply(strided_slice_73, slice_36) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_36 = paddle._C_ops.subtract(multiply_144, multiply_145) + del multiply_144, multiply_145 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_146 = paddle._C_ops.multiply(strided_slice_72, slice_36) + del strided_slice_72 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_147 = paddle._C_ops.multiply(strided_slice_73, slice_37) + del strided_slice_73 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_91 = paddle._C_ops.add(multiply_146, multiply_147) + del multiply_146, multiply_147 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_36 = [subtract_36, add_91] + del add_91, subtract_36 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_36 = paddle._C_ops.stack(combine_36, -1) + del combine_36 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_36 = paddle._C_ops.flatten(stack_36, 3, 4) + del stack_36 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_74 = paddle._C_ops.strided_slice( + transpose_73, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_75 = paddle._C_ops.strided_slice( + transpose_73, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_73 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_148 = paddle._C_ops.multiply(strided_slice_74, slice_37) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_149 = paddle._C_ops.multiply(strided_slice_75, slice_36) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_37 = paddle._C_ops.subtract(multiply_148, multiply_149) + del multiply_148, multiply_149 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_150 = paddle._C_ops.multiply(strided_slice_74, slice_36) + del slice_36, strided_slice_74 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_151 = paddle._C_ops.multiply(strided_slice_75, slice_37) + del slice_37, strided_slice_75 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_92 = paddle._C_ops.add(multiply_150, multiply_151) + del multiply_150, multiply_151 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_37 = [subtract_37, add_92] + del add_92, subtract_37 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_37 = paddle._C_ops.stack(combine_37, -1) + del combine_37 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_37 = paddle._C_ops.flatten(stack_37, 3, 4) + del stack_37 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_147 = paddle._C_ops.matmul(flatten_36, flatten_37, False, True) + del flatten_36, flatten_37 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_56 = paddle._C_ops.scale(matmul_147, full_4, float("0"), True) + del matmul_147 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_93 = paddle._C_ops.add(scale_56, unsqueeze_0) + del scale_56 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_18 = paddle._C_ops.softmax(add_93, -1) + del add_93 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_110, dropout_111 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_18, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_18 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_148 = paddle._C_ops.matmul(dropout_110, transpose_74, False, False) + del dropout_110, transpose_74 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_75 = paddle._C_ops.transpose(matmul_148, [0, 2, 1, 3]) + del matmul_148 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_75 = paddle._C_ops.reshape(transpose_75, full_int_array_8) + del transpose_75 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_149 = paddle._C_ops.matmul(reshape_75, parameter_80, False, False) + del parameter_80, reshape_75 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_112, dropout_113 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_149, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_149 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_94 = paddle._C_ops.add(divide_37, dropout_112) + del divide_37, dropout_112 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_37 = paddle._C_ops.square(add_94) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_37 = paddle._C_ops.mean(square_37, full_int_array_1, True) + del square_37 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_57 = paddle._C_ops.scale(mean_37, full_2, float("1e-12"), True) + del mean_37 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_37 = paddle._C_ops.sqrt(scale_57) + del scale_57 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_38 = paddle._C_ops.divide(add_94, sqrt_37) + del add_94, sqrt_37 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_150 = paddle._C_ops.matmul(divide_38, parameter_79, False, False) + del parameter_79 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_18 = paddle._C_ops.relu(matmul_150) + del matmul_150 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_151 = paddle._C_ops.matmul(relu_18, parameter_78, False, False) + del parameter_78, relu_18 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_114, dropout_115 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_151, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_151 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_95 = paddle._C_ops.add(divide_38, dropout_114) + del divide_38, dropout_114 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_38 = paddle._C_ops.square(add_95) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_38 = paddle._C_ops.mean(square_38, full_int_array_1, True) + del square_38 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_58 = paddle._C_ops.scale(mean_38, full_2, float("1e-12"), True) + del mean_38 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_38 = paddle._C_ops.sqrt(scale_58) + del scale_58 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_39 = paddle._C_ops.divide(add_95, sqrt_38) + del add_95, sqrt_38 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_152 = paddle._C_ops.matmul(divide_39, parameter_77, False, False) + del parameter_77 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_153 = paddle._C_ops.matmul(divide_39, parameter_76, False, False) + del parameter_76 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_154 = paddle._C_ops.matmul(divide_39, parameter_75, False, False) + del parameter_75 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_76 = paddle._C_ops.reshape(matmul_152, full_int_array_2) + del matmul_152 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_76 = paddle._C_ops.transpose(reshape_76, [0, 2, 1, 3]) + del reshape_76 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_77 = paddle._C_ops.reshape(matmul_153, full_int_array_2) + del matmul_153 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_77 = paddle._C_ops.transpose(reshape_77, [0, 2, 1, 3]) + del reshape_77 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_78 = paddle._C_ops.reshape(matmul_154, full_int_array_2) + del matmul_154 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_78 = paddle._C_ops.transpose(reshape_78, [0, 2, 1, 3]) + del reshape_78 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_38 = paddle._C_ops.slice( + parameter_9, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_9 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_39 = paddle._C_ops.slice( + parameter_8, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_8 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_76 = paddle._C_ops.strided_slice( + transpose_76, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_77 = paddle._C_ops.strided_slice( + transpose_76, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_76 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_152 = paddle._C_ops.multiply(strided_slice_76, slice_39) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_153 = paddle._C_ops.multiply(strided_slice_77, slice_38) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_38 = paddle._C_ops.subtract(multiply_152, multiply_153) + del multiply_152, multiply_153 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_154 = paddle._C_ops.multiply(strided_slice_76, slice_38) + del strided_slice_76 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_155 = paddle._C_ops.multiply(strided_slice_77, slice_39) + del strided_slice_77 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_96 = paddle._C_ops.add(multiply_154, multiply_155) + del multiply_154, multiply_155 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_38 = [subtract_38, add_96] + del add_96, subtract_38 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_38 = paddle._C_ops.stack(combine_38, -1) + del combine_38 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_38 = paddle._C_ops.flatten(stack_38, 3, 4) + del stack_38 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_78 = paddle._C_ops.strided_slice( + transpose_77, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_79 = paddle._C_ops.strided_slice( + transpose_77, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_77 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_156 = paddle._C_ops.multiply(strided_slice_78, slice_39) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_157 = paddle._C_ops.multiply(strided_slice_79, slice_38) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_39 = paddle._C_ops.subtract(multiply_156, multiply_157) + del multiply_156, multiply_157 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_158 = paddle._C_ops.multiply(strided_slice_78, slice_38) + del slice_38, strided_slice_78 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_159 = paddle._C_ops.multiply(strided_slice_79, slice_39) + del slice_39, strided_slice_79 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_97 = paddle._C_ops.add(multiply_158, multiply_159) + del multiply_158, multiply_159 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_39 = [subtract_39, add_97] + del add_97, subtract_39 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_39 = paddle._C_ops.stack(combine_39, -1) + del combine_39 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_39 = paddle._C_ops.flatten(stack_39, 3, 4) + del stack_39 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_155 = paddle._C_ops.matmul(flatten_38, flatten_39, False, True) + del flatten_38, flatten_39 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_59 = paddle._C_ops.scale(matmul_155, full_4, float("0"), True) + del matmul_155 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_98 = paddle._C_ops.add(scale_59, unsqueeze_0) + del scale_59 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_19 = paddle._C_ops.softmax(add_98, -1) + del add_98 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_116, dropout_117 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_19, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_19 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_156 = paddle._C_ops.matmul(dropout_116, transpose_78, False, False) + del dropout_116, transpose_78 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_79 = paddle._C_ops.transpose(matmul_156, [0, 2, 1, 3]) + del matmul_156 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_79 = paddle._C_ops.reshape(transpose_79, full_int_array_8) + del transpose_79 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_157 = paddle._C_ops.matmul(reshape_79, parameter_74, False, False) + del parameter_74, reshape_79 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_118, dropout_119 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_157, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_157 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_99 = paddle._C_ops.add(divide_39, dropout_118) + del divide_39, dropout_118 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_39 = paddle._C_ops.square(add_99) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_39 = paddle._C_ops.mean(square_39, full_int_array_1, True) + del square_39 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_60 = paddle._C_ops.scale(mean_39, full_2, float("1e-12"), True) + del mean_39 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_39 = paddle._C_ops.sqrt(scale_60) + del scale_60 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_40 = paddle._C_ops.divide(add_99, sqrt_39) + del add_99, sqrt_39 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_158 = paddle._C_ops.matmul(divide_40, parameter_73, False, False) + del parameter_73 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_19 = paddle._C_ops.relu(matmul_158) + del matmul_158 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_159 = paddle._C_ops.matmul(relu_19, parameter_72, False, False) + del parameter_72, relu_19 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_120, dropout_121 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_159, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_159 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_100 = paddle._C_ops.add(divide_40, dropout_120) + del divide_40, dropout_120 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_40 = paddle._C_ops.square(add_100) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_40 = paddle._C_ops.mean(square_40, full_int_array_1, True) + del square_40 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_61 = paddle._C_ops.scale(mean_40, full_2, float("1e-12"), True) + del mean_40 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_40 = paddle._C_ops.sqrt(scale_61) + del scale_61 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_41 = paddle._C_ops.divide(add_100, sqrt_40) + del add_100, sqrt_40 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_160 = paddle._C_ops.matmul(divide_41, parameter_71, False, False) + del parameter_71 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_161 = paddle._C_ops.matmul(divide_41, parameter_70, False, False) + del parameter_70 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_162 = paddle._C_ops.matmul(divide_41, parameter_69, False, False) + del parameter_69 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_80 = paddle._C_ops.reshape(matmul_160, full_int_array_2) + del matmul_160 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_80 = paddle._C_ops.transpose(reshape_80, [0, 2, 1, 3]) + del reshape_80 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_81 = paddle._C_ops.reshape(matmul_161, full_int_array_2) + del matmul_161 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_81 = paddle._C_ops.transpose(reshape_81, [0, 2, 1, 3]) + del reshape_81 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_82 = paddle._C_ops.reshape(matmul_162, full_int_array_2) + del matmul_162 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_82 = paddle._C_ops.transpose(reshape_82, [0, 2, 1, 3]) + del reshape_82 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_40 = paddle._C_ops.slice( + parameter_7, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_7 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_41 = paddle._C_ops.slice( + parameter_6, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_6 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_80 = paddle._C_ops.strided_slice( + transpose_80, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_81 = paddle._C_ops.strided_slice( + transpose_80, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_80 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_160 = paddle._C_ops.multiply(strided_slice_80, slice_41) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_161 = paddle._C_ops.multiply(strided_slice_81, slice_40) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_40 = paddle._C_ops.subtract(multiply_160, multiply_161) + del multiply_160, multiply_161 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_162 = paddle._C_ops.multiply(strided_slice_80, slice_40) + del strided_slice_80 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_163 = paddle._C_ops.multiply(strided_slice_81, slice_41) + del strided_slice_81 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_101 = paddle._C_ops.add(multiply_162, multiply_163) + del multiply_162, multiply_163 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_40 = [subtract_40, add_101] + del add_101, subtract_40 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_40 = paddle._C_ops.stack(combine_40, -1) + del combine_40 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_40 = paddle._C_ops.flatten(stack_40, 3, 4) + del stack_40 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_82 = paddle._C_ops.strided_slice( + transpose_81, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_83 = paddle._C_ops.strided_slice( + transpose_81, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_81 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_164 = paddle._C_ops.multiply(strided_slice_82, slice_41) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_165 = paddle._C_ops.multiply(strided_slice_83, slice_40) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_41 = paddle._C_ops.subtract(multiply_164, multiply_165) + del multiply_164, multiply_165 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_166 = paddle._C_ops.multiply(strided_slice_82, slice_40) + del slice_40, strided_slice_82 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_167 = paddle._C_ops.multiply(strided_slice_83, slice_41) + del slice_41, strided_slice_83 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_102 = paddle._C_ops.add(multiply_166, multiply_167) + del multiply_166, multiply_167 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_41 = [subtract_41, add_102] + del add_102, subtract_41 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_41 = paddle._C_ops.stack(combine_41, -1) + del combine_41 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_41 = paddle._C_ops.flatten(stack_41, 3, 4) + del stack_41 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_163 = paddle._C_ops.matmul(flatten_40, flatten_41, False, True) + del flatten_40, flatten_41 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_62 = paddle._C_ops.scale(matmul_163, full_4, float("0"), True) + del matmul_163 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_103 = paddle._C_ops.add(scale_62, unsqueeze_0) + del scale_62 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_20 = paddle._C_ops.softmax(add_103, -1) + del add_103 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_122, dropout_123 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_20, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_20 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_164 = paddle._C_ops.matmul(dropout_122, transpose_82, False, False) + del dropout_122, transpose_82 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_83 = paddle._C_ops.transpose(matmul_164, [0, 2, 1, 3]) + del matmul_164 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_83 = paddle._C_ops.reshape(transpose_83, full_int_array_8) + del transpose_83 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_165 = paddle._C_ops.matmul(reshape_83, parameter_68, False, False) + del parameter_68, reshape_83 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_124, dropout_125 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_165, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_165 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_104 = paddle._C_ops.add(divide_41, dropout_124) + del divide_41, dropout_124 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_41 = paddle._C_ops.square(add_104) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_41 = paddle._C_ops.mean(square_41, full_int_array_1, True) + del square_41 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_63 = paddle._C_ops.scale(mean_41, full_2, float("1e-12"), True) + del mean_41 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_41 = paddle._C_ops.sqrt(scale_63) + del scale_63 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_42 = paddle._C_ops.divide(add_104, sqrt_41) + del add_104, sqrt_41 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_166 = paddle._C_ops.matmul(divide_42, parameter_67, False, False) + del parameter_67 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_20 = paddle._C_ops.relu(matmul_166) + del matmul_166 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_167 = paddle._C_ops.matmul(relu_20, parameter_66, False, False) + del parameter_66, relu_20 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_126, dropout_127 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_167, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_167 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_105 = paddle._C_ops.add(divide_42, dropout_126) + del divide_42, dropout_126 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_42 = paddle._C_ops.square(add_105) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_42 = paddle._C_ops.mean(square_42, full_int_array_1, True) + del square_42 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_64 = paddle._C_ops.scale(mean_42, full_2, float("1e-12"), True) + del mean_42 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_42 = paddle._C_ops.sqrt(scale_64) + del scale_64 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_43 = paddle._C_ops.divide(add_105, sqrt_42) + del add_105, sqrt_42 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_168 = paddle._C_ops.matmul(divide_43, parameter_65, False, False) + del parameter_65 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_169 = paddle._C_ops.matmul(divide_43, parameter_64, False, False) + del parameter_64 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_170 = paddle._C_ops.matmul(divide_43, parameter_63, False, False) + del parameter_63 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_84 = paddle._C_ops.reshape(matmul_168, full_int_array_2) + del matmul_168 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_84 = paddle._C_ops.transpose(reshape_84, [0, 2, 1, 3]) + del reshape_84 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_85 = paddle._C_ops.reshape(matmul_169, full_int_array_2) + del matmul_169 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_85 = paddle._C_ops.transpose(reshape_85, [0, 2, 1, 3]) + del reshape_85 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_86 = paddle._C_ops.reshape(matmul_170, full_int_array_2) + del matmul_170 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_86 = paddle._C_ops.transpose(reshape_86, [0, 2, 1, 3]) + del reshape_86 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_42 = paddle._C_ops.slice( + parameter_5, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_5 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_43 = paddle._C_ops.slice( + parameter_4, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_4 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_84 = paddle._C_ops.strided_slice( + transpose_84, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_85 = paddle._C_ops.strided_slice( + transpose_84, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_84 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_168 = paddle._C_ops.multiply(strided_slice_84, slice_43) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_169 = paddle._C_ops.multiply(strided_slice_85, slice_42) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_42 = paddle._C_ops.subtract(multiply_168, multiply_169) + del multiply_168, multiply_169 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_170 = paddle._C_ops.multiply(strided_slice_84, slice_42) + del strided_slice_84 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_171 = paddle._C_ops.multiply(strided_slice_85, slice_43) + del strided_slice_85 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_106 = paddle._C_ops.add(multiply_170, multiply_171) + del multiply_170, multiply_171 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_42 = [subtract_42, add_106] + del add_106, subtract_42 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_42 = paddle._C_ops.stack(combine_42, -1) + del combine_42 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_42 = paddle._C_ops.flatten(stack_42, 3, 4) + del stack_42 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_86 = paddle._C_ops.strided_slice( + transpose_85, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_87 = paddle._C_ops.strided_slice( + transpose_85, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_85 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_172 = paddle._C_ops.multiply(strided_slice_86, slice_43) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_173 = paddle._C_ops.multiply(strided_slice_87, slice_42) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_43 = paddle._C_ops.subtract(multiply_172, multiply_173) + del multiply_172, multiply_173 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_174 = paddle._C_ops.multiply(strided_slice_86, slice_42) + del slice_42, strided_slice_86 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_175 = paddle._C_ops.multiply(strided_slice_87, slice_43) + del slice_43, strided_slice_87 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_107 = paddle._C_ops.add(multiply_174, multiply_175) + del multiply_174, multiply_175 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_43 = [subtract_43, add_107] + del add_107, subtract_43 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_43 = paddle._C_ops.stack(combine_43, -1) + del combine_43 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_43 = paddle._C_ops.flatten(stack_43, 3, 4) + del stack_43 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_171 = paddle._C_ops.matmul(flatten_42, flatten_43, False, True) + del flatten_42, flatten_43 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_65 = paddle._C_ops.scale(matmul_171, full_4, float("0"), True) + del matmul_171 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_108 = paddle._C_ops.add(scale_65, unsqueeze_0) + del scale_65 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_21 = paddle._C_ops.softmax(add_108, -1) + del add_108 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_128, dropout_129 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_21, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_21 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_172 = paddle._C_ops.matmul(dropout_128, transpose_86, False, False) + del dropout_128, transpose_86 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_87 = paddle._C_ops.transpose(matmul_172, [0, 2, 1, 3]) + del matmul_172 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_87 = paddle._C_ops.reshape(transpose_87, full_int_array_8) + del transpose_87 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_173 = paddle._C_ops.matmul(reshape_87, parameter_62, False, False) + del parameter_62, reshape_87 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_130, dropout_131 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_173, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_173 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_109 = paddle._C_ops.add(divide_43, dropout_130) + del divide_43, dropout_130 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_43 = paddle._C_ops.square(add_109) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_43 = paddle._C_ops.mean(square_43, full_int_array_1, True) + del square_43 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_66 = paddle._C_ops.scale(mean_43, full_2, float("1e-12"), True) + del mean_43 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_43 = paddle._C_ops.sqrt(scale_66) + del scale_66 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_44 = paddle._C_ops.divide(add_109, sqrt_43) + del add_109, sqrt_43 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_174 = paddle._C_ops.matmul(divide_44, parameter_61, False, False) + del parameter_61 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_21 = paddle._C_ops.relu(matmul_174) + del matmul_174 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_175 = paddle._C_ops.matmul(relu_21, parameter_60, False, False) + del parameter_60, relu_21 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_132, dropout_133 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_175, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_175 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_110 = paddle._C_ops.add(divide_44, dropout_132) + del divide_44, dropout_132 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_44 = paddle._C_ops.square(add_110) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_44 = paddle._C_ops.mean(square_44, full_int_array_1, True) + del square_44 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_67 = paddle._C_ops.scale(mean_44, full_2, float("1e-12"), True) + del mean_44 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_44 = paddle._C_ops.sqrt(scale_67) + del scale_67 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_45 = paddle._C_ops.divide(add_110, sqrt_44) + del add_110, sqrt_44 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_176 = paddle._C_ops.matmul(divide_45, parameter_59, False, False) + del parameter_59 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_177 = paddle._C_ops.matmul(divide_45, parameter_58, False, False) + del parameter_58 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_178 = paddle._C_ops.matmul(divide_45, parameter_57, False, False) + del parameter_57 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_88 = paddle._C_ops.reshape(matmul_176, full_int_array_2) + del matmul_176 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_88 = paddle._C_ops.transpose(reshape_88, [0, 2, 1, 3]) + del reshape_88 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_89 = paddle._C_ops.reshape(matmul_177, full_int_array_2) + del matmul_177 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_89 = paddle._C_ops.transpose(reshape_89, [0, 2, 1, 3]) + del reshape_89 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_90 = paddle._C_ops.reshape(matmul_178, full_int_array_2) + del matmul_178 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_90 = paddle._C_ops.transpose(reshape_90, [0, 2, 1, 3]) + del reshape_90 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_44 = paddle._C_ops.slice( + parameter_3, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_3 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_45 = paddle._C_ops.slice( + parameter_2, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_2 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_88 = paddle._C_ops.strided_slice( + transpose_88, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_89 = paddle._C_ops.strided_slice( + transpose_88, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_88 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_176 = paddle._C_ops.multiply(strided_slice_88, slice_45) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_177 = paddle._C_ops.multiply(strided_slice_89, slice_44) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_44 = paddle._C_ops.subtract(multiply_176, multiply_177) + del multiply_176, multiply_177 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_178 = paddle._C_ops.multiply(strided_slice_88, slice_44) + del strided_slice_88 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_179 = paddle._C_ops.multiply(strided_slice_89, slice_45) + del strided_slice_89 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_111 = paddle._C_ops.add(multiply_178, multiply_179) + del multiply_178, multiply_179 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_44 = [subtract_44, add_111] + del add_111, subtract_44 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_44 = paddle._C_ops.stack(combine_44, -1) + del combine_44 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_44 = paddle._C_ops.flatten(stack_44, 3, 4) + del stack_44 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_90 = paddle._C_ops.strided_slice( + transpose_89, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_91 = paddle._C_ops.strided_slice( + transpose_89, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_89 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_180 = paddle._C_ops.multiply(strided_slice_90, slice_45) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_181 = paddle._C_ops.multiply(strided_slice_91, slice_44) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_45 = paddle._C_ops.subtract(multiply_180, multiply_181) + del multiply_180, multiply_181 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_182 = paddle._C_ops.multiply(strided_slice_90, slice_44) + del slice_44, strided_slice_90 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_183 = paddle._C_ops.multiply(strided_slice_91, slice_45) + del slice_45, strided_slice_91 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_112 = paddle._C_ops.add(multiply_182, multiply_183) + del multiply_182, multiply_183 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_45 = [subtract_45, add_112] + del add_112, subtract_45 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_45 = paddle._C_ops.stack(combine_45, -1) + del combine_45 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_45 = paddle._C_ops.flatten(stack_45, 3, 4) + del stack_45 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_179 = paddle._C_ops.matmul(flatten_44, flatten_45, False, True) + del flatten_44, flatten_45 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_68 = paddle._C_ops.scale(matmul_179, full_4, float("0"), True) + del matmul_179 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_113 = paddle._C_ops.add(scale_68, unsqueeze_0) + del scale_68 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_22 = paddle._C_ops.softmax(add_113, -1) + del add_113 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_134, dropout_135 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_22, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_22 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_180 = paddle._C_ops.matmul(dropout_134, transpose_90, False, False) + del dropout_134, transpose_90 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_91 = paddle._C_ops.transpose(matmul_180, [0, 2, 1, 3]) + del matmul_180 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_91 = paddle._C_ops.reshape(transpose_91, full_int_array_8) + del transpose_91 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_181 = paddle._C_ops.matmul(reshape_91, parameter_56, False, False) + del parameter_56, reshape_91 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_136, dropout_137 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_181, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_181 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_114 = paddle._C_ops.add(divide_45, dropout_136) + del divide_45, dropout_136 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_45 = paddle._C_ops.square(add_114) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_45 = paddle._C_ops.mean(square_45, full_int_array_1, True) + del square_45 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_69 = paddle._C_ops.scale(mean_45, full_2, float("1e-12"), True) + del mean_45 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_45 = paddle._C_ops.sqrt(scale_69) + del scale_69 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_46 = paddle._C_ops.divide(add_114, sqrt_45) + del add_114, sqrt_45 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_182 = paddle._C_ops.matmul(divide_46, parameter_55, False, False) + del parameter_55 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_22 = paddle._C_ops.relu(matmul_182) + del matmul_182 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_183 = paddle._C_ops.matmul(relu_22, parameter_54, False, False) + del parameter_54, relu_22 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_138, dropout_139 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_183, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_183 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_115 = paddle._C_ops.add(divide_46, dropout_138) + del divide_46, dropout_138 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_46 = paddle._C_ops.square(add_115) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_46 = paddle._C_ops.mean(square_46, full_int_array_1, True) + del square_46 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_70 = paddle._C_ops.scale(mean_46, full_2, float("1e-12"), True) + del mean_46 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_46 = paddle._C_ops.sqrt(scale_70) + del scale_70 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_47 = paddle._C_ops.divide(add_115, sqrt_46) + del add_115, sqrt_46 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_184 = paddle._C_ops.matmul(divide_47, parameter_53, False, False) + del parameter_53 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_185 = paddle._C_ops.matmul(divide_47, parameter_52, False, False) + del parameter_52 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_186 = paddle._C_ops.matmul(divide_47, parameter_51, False, False) + del parameter_51 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_92 = paddle._C_ops.reshape(matmul_184, full_int_array_2) + del matmul_184 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_92 = paddle._C_ops.transpose(reshape_92, [0, 2, 1, 3]) + del reshape_92 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_93 = paddle._C_ops.reshape(matmul_185, full_int_array_2) + del matmul_185 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_93 = paddle._C_ops.transpose(reshape_93, [0, 2, 1, 3]) + del reshape_93 + + # pd_op.reshape: (1x11x16x64xf32) <- (1x11x1024xf32, 4xi64) + reshape_94 = paddle._C_ops.reshape(matmul_186, full_int_array_2) + del full_int_array_2, matmul_186 + + # pd_op.transpose: (1x16x11x64xf32) <- (1x11x16x64xf32) + transpose_94 = paddle._C_ops.transpose(reshape_94, [0, 2, 1, 3]) + del reshape_94 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_46 = paddle._C_ops.slice( + parameter_1, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_1 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_47 = paddle._C_ops.slice( + parameter_0, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del full_int_array_4, parameter_0 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_92 = paddle._C_ops.strided_slice( + transpose_92, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_93 = paddle._C_ops.strided_slice( + transpose_92, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_92 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_184 = paddle._C_ops.multiply(strided_slice_92, slice_47) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_185 = paddle._C_ops.multiply(strided_slice_93, slice_46) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_46 = paddle._C_ops.subtract(multiply_184, multiply_185) + del multiply_184, multiply_185 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_186 = paddle._C_ops.multiply(strided_slice_92, slice_46) + del strided_slice_92 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_187 = paddle._C_ops.multiply(strided_slice_93, slice_47) + del strided_slice_93 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_116 = paddle._C_ops.add(multiply_186, multiply_187) + del multiply_186, multiply_187 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_46 = [subtract_46, add_116] + del add_116, subtract_46 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_46 = paddle._C_ops.stack(combine_46, -1) + del combine_46 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_46 = paddle._C_ops.flatten(stack_46, 3, 4) + del stack_46 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_94 = paddle._C_ops.strided_slice( + transpose_93, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + del full_int_array_3 + + # pd_op.strided_slice: (1x16x11x32xf32) <- (1x16x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_95 = paddle._C_ops.strided_slice( + transpose_93, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del full_int_array_5, full_int_array_6, full_int_array_7, transpose_93 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_188 = paddle._C_ops.multiply(strided_slice_94, slice_47) + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_189 = paddle._C_ops.multiply(strided_slice_95, slice_46) + + # pd_op.subtract: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + subtract_47 = paddle._C_ops.subtract(multiply_188, multiply_189) + del multiply_188, multiply_189 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_190 = paddle._C_ops.multiply(strided_slice_94, slice_46) + del slice_46, strided_slice_94 + + # pd_op.multiply: (1x16x11x32xf32) <- (1x16x11x32xf32, 11x32xf32) + multiply_191 = paddle._C_ops.multiply(strided_slice_95, slice_47) + del slice_47, strided_slice_95 + + # pd_op.add: (1x16x11x32xf32) <- (1x16x11x32xf32, 1x16x11x32xf32) + add_117 = paddle._C_ops.add(multiply_190, multiply_191) + del multiply_190, multiply_191 + + # builtin.combine: ([1x16x11x32xf32, 1x16x11x32xf32]) <- (1x16x11x32xf32, 1x16x11x32xf32) + combine_47 = [subtract_47, add_117] + del add_117, subtract_47 + + # pd_op.stack: (1x16x11x32x2xf32) <- ([1x16x11x32xf32, 1x16x11x32xf32]) + stack_47 = paddle._C_ops.stack(combine_47, -1) + del combine_47 + + # pd_op.flatten: (1x16x11x64xf32) <- (1x16x11x32x2xf32) + flatten_47 = paddle._C_ops.flatten(stack_47, 3, 4) + del stack_47 + + # pd_op.matmul: (1x16x11x11xf32) <- (1x16x11x64xf32, 1x16x11x64xf32) + matmul_187 = paddle._C_ops.matmul(flatten_46, flatten_47, False, True) + del flatten_46, flatten_47 + + # pd_op.scale: (1x16x11x11xf32) <- (1x16x11x11xf32, 1xf32) + scale_71 = paddle._C_ops.scale(matmul_187, full_4, float("0"), True) + del full_4, matmul_187 + + # pd_op.add: (1x16x11x11xf32) <- (1x16x11x11xf32, 1x1x1x11xf32) + add_118 = paddle._C_ops.add(scale_71, unsqueeze_0) + del scale_71, unsqueeze_0 + + # pd_op.softmax: (1x16x11x11xf32) <- (1x16x11x11xf32) + softmax_23 = paddle._C_ops.softmax(add_118, -1) + del add_118 + + # pd_op.dropout: (1x16x11x11xf32, 1x16x11x11xui8) <- (1x16x11x11xf32, None, 1xf32) + dropout_140, dropout_141 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_23, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_23 + + # pd_op.matmul: (1x16x11x64xf32) <- (1x16x11x11xf32, 1x16x11x64xf32) + matmul_188 = paddle._C_ops.matmul(dropout_140, transpose_94, False, False) + del dropout_140, transpose_94 + + # pd_op.transpose: (1x11x16x64xf32) <- (1x16x11x64xf32) + transpose_95 = paddle._C_ops.transpose(matmul_188, [0, 2, 1, 3]) + del matmul_188 + + # pd_op.reshape: (1x11x1024xf32) <- (1x11x16x64xf32, 3xi64) + reshape_95 = paddle._C_ops.reshape(transpose_95, full_int_array_8) + del full_int_array_8, transpose_95 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x1024xf32, 1024x1024xf32) + matmul_189 = paddle._C_ops.matmul(reshape_95, parameter_50, False, False) + del parameter_50, reshape_95 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_142, dropout_143 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_189, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_189 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_119 = paddle._C_ops.add(divide_47, dropout_142) + del divide_47, dropout_142 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_47 = paddle._C_ops.square(add_119) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_47 = paddle._C_ops.mean(square_47, full_int_array_1, True) + del square_47 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_72 = paddle._C_ops.scale(mean_47, full_2, float("1e-12"), True) + del mean_47 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_47 = paddle._C_ops.sqrt(scale_72) + del scale_72 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_48 = paddle._C_ops.divide(add_119, sqrt_47) + del add_119, sqrt_47 + + # pd_op.matmul: (1x11x4096xf32) <- (1x11x1024xf32, 1024x4096xf32) + matmul_190 = paddle._C_ops.matmul(divide_48, parameter_49, False, False) + del parameter_49 + + # pd_op.relu: (1x11x4096xf32) <- (1x11x4096xf32) + relu_23 = paddle._C_ops.relu(matmul_190) + del matmul_190 + + # pd_op.matmul: (1x11x1024xf32) <- (1x11x4096xf32, 4096x1024xf32) + matmul_191 = paddle._C_ops.matmul(relu_23, parameter_48, False, False) + del parameter_48, relu_23 + + # pd_op.dropout: (1x11x1024xf32, 1x11x1024xui8) <- (1x11x1024xf32, None, 1xf32) + dropout_144, dropout_145 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_191, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del full_3, matmul_191 + + # pd_op.add: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1024xf32) + add_120 = paddle._C_ops.add(divide_48, dropout_144) + del divide_48, dropout_144 + + # pd_op.square: (1x11x1024xf32) <- (1x11x1024xf32) + square_48 = paddle._C_ops.square(add_120) + + # pd_op.mean: (1x11x1xf32) <- (1x11x1024xf32, 1xi64) + mean_48 = paddle._C_ops.mean(square_48, full_int_array_1, True) + del full_int_array_1, square_48 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_73 = paddle._C_ops.scale(mean_48, full_2, float("1e-12"), True) + del full_2, mean_48 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_48 = paddle._C_ops.sqrt(scale_73) + del scale_73 + + # pd_op.divide: (1x11x1024xf32) <- (1x11x1024xf32, 1x11x1xf32) + divide_0 = paddle._C_ops.divide(add_120, sqrt_48) + del add_120, sqrt_48 + + return divide_0 diff --git a/paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/weight_meta.py b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/weight_meta.py new file mode 100644 index 000000000..12ea3d128 --- /dev/null +++ b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_large/weight_meta.py @@ -0,0 +1,2132 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342326") + mean = float("-1.0805e-07") + std = float("0.0197677") + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342327") + mean = float("1.72234e-06") + std = float("0.0197626") + data = None + + +class Program_weight_tensor_parameter_50: + name = "parameter_50" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541264") + mean = float("2.85724e-06") + std = float("0.031239") + data = None + + +class Program_weight_tensor_parameter_51: + name = "parameter_51" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541263") + max_val = float("0.0541264") + mean = float("-3.41611e-06") + std = float("0.0312522") + data = None + + +class Program_weight_tensor_parameter_52: + name = "parameter_52" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541265") + mean = float("1.6339e-05") + std = float("0.0312434") + data = None + + +class Program_weight_tensor_parameter_53: + name = "parameter_53" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541264") + mean = float("3.73865e-05") + std = float("0.0312584") + data = None + + +class Program_weight_tensor_parameter_54: + name = "parameter_54" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342327") + mean = float("7.97859e-06") + std = float("0.01977") + data = None + + +class Program_weight_tensor_parameter_55: + name = "parameter_55" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342326") + mean = float("-1.00796e-05") + std = float("0.019761") + data = None + + +class Program_weight_tensor_parameter_56: + name = "parameter_56" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541262") + max_val = float("0.0541266") + mean = float("-5.47142e-05") + std = float("0.0312401") + data = None + + +class Program_weight_tensor_parameter_57: + name = "parameter_57" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541266") + mean = float("-1.72655e-05") + std = float("0.0312418") + data = None + + +class Program_weight_tensor_parameter_58: + name = "parameter_58" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541265") + mean = float("-1.91202e-05") + std = float("0.0312584") + data = None + + +class Program_weight_tensor_parameter_59: + name = "parameter_59" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541263") + mean = float("-8.2141e-06") + std = float("0.0312544") + data = None + + +class Program_weight_tensor_parameter_60: + name = "parameter_60" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342327") + mean = float("9.60745e-06") + std = float("0.0197609") + data = None + + +class Program_weight_tensor_parameter_61: + name = "parameter_61" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342326") + mean = float("-9.60847e-06") + std = float("0.0197673") + data = None + + +class Program_weight_tensor_parameter_62: + name = "parameter_62" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541263") + max_val = float("0.0541266") + mean = float("-3.51445e-06") + std = float("0.0312264") + data = None + + +class Program_weight_tensor_parameter_63: + name = "parameter_63" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("-6.96572e-06") + std = float("0.0312393") + data = None + + +class Program_weight_tensor_parameter_64: + name = "parameter_64" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541265") + mean = float("-4.38563e-06") + std = float("0.0312384") + data = None + + +class Program_weight_tensor_parameter_65: + name = "parameter_65" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.054126") + mean = float("-4.39111e-05") + std = float("0.0312609") + data = None + + +class Program_weight_tensor_parameter_66: + name = "parameter_66" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342326") + mean = float("-7.4757e-06") + std = float("0.0197573") + data = None + + +class Program_weight_tensor_parameter_67: + name = "parameter_67" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342327") + mean = float("3.72266e-06") + std = float("0.0197598") + data = None + + +class Program_weight_tensor_parameter_68: + name = "parameter_68" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("6.51926e-06") + std = float("0.0312677") + data = None + + +class Program_weight_tensor_parameter_69: + name = "parameter_69" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541264") + max_val = float("0.0541265") + mean = float("3.41939e-05") + std = float("0.0312393") + data = None + + +class Program_weight_tensor_parameter_70: + name = "parameter_70" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541264") + max_val = float("0.0541265") + mean = float("3.95986e-05") + std = float("0.0312597") + data = None + + +class Program_weight_tensor_parameter_71: + name = "parameter_71" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541265") + mean = float("-4.22483e-06") + std = float("0.0312503") + data = None + + +class Program_weight_tensor_parameter_72: + name = "parameter_72" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342326") + mean = float("1.46357e-05") + std = float("0.0197644") + data = None + + +class Program_weight_tensor_parameter_73: + name = "parameter_73" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342327") + mean = float("-4.75652e-06") + std = float("0.0197707") + data = None + + +class Program_weight_tensor_parameter_74: + name = "parameter_74" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541266") + mean = float("-6.86023e-06") + std = float("0.0312637") + data = None + + +class Program_weight_tensor_parameter_75: + name = "parameter_75" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541263") + max_val = float("0.0541264") + mean = float("-1.1187e-05") + std = float("0.0312445") + data = None + + +class Program_weight_tensor_parameter_76: + name = "parameter_76" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541265") + mean = float("-6.59103e-06") + std = float("0.0312607") + data = None + + +class Program_weight_tensor_parameter_77: + name = "parameter_77" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541266") + mean = float("-1.08368e-06") + std = float("0.0312597") + data = None + + +class Program_weight_tensor_parameter_78: + name = "parameter_78" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342327") + mean = float("-1.07443e-05") + std = float("0.0197675") + data = None + + +class Program_weight_tensor_parameter_79: + name = "parameter_79" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342326") + mean = float("-4.34148e-06") + std = float("0.019768") + data = None + + +class Program_weight_tensor_parameter_80: + name = "parameter_80" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541265") + mean = float("-4.65381e-05") + std = float("0.0312593") + data = None + + +class Program_weight_tensor_parameter_81: + name = "parameter_81" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541263") + max_val = float("0.0541263") + mean = float("-1.85356e-06") + std = float("0.0312556") + data = None + + +class Program_weight_tensor_parameter_82: + name = "parameter_82" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541263") + max_val = float("0.0541265") + mean = float("-1.79863e-05") + std = float("0.0312529") + data = None + + +class Program_weight_tensor_parameter_83: + name = "parameter_83" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541263") + mean = float("-1.98837e-05") + std = float("0.0312478") + data = None + + +class Program_weight_tensor_parameter_84: + name = "parameter_84" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342327") + mean = float("-1.44393e-05") + std = float("0.019764") + data = None + + +class Program_weight_tensor_parameter_85: + name = "parameter_85" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342326") + mean = float("-8.55828e-06") + std = float("0.0197657") + data = None + + +class Program_weight_tensor_parameter_86: + name = "parameter_86" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541263") + max_val = float("0.0541265") + mean = float("6.52808e-05") + std = float("0.0312608") + data = None + + +class Program_weight_tensor_parameter_87: + name = "parameter_87" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541264") + max_val = float("0.0541266") + mean = float("-2.56954e-05") + std = float("0.0312411") + data = None + + +class Program_weight_tensor_parameter_88: + name = "parameter_88" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541264") + max_val = float("0.0541265") + mean = float("-3.6882e-06") + std = float("0.0312386") + data = None + + +class Program_weight_tensor_parameter_89: + name = "parameter_89" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541266") + mean = float("8.43508e-06") + std = float("0.0312478") + data = None + + +class Program_weight_tensor_parameter_90: + name = "parameter_90" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342326") + mean = float("-1.11441e-05") + std = float("0.019766") + data = None + + +class Program_weight_tensor_parameter_91: + name = "parameter_91" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342326") + mean = float("-1.33713e-05") + std = float("0.0197641") + data = None + + +class Program_weight_tensor_parameter_92: + name = "parameter_92" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541265") + mean = float("-4.93259e-06") + std = float("0.0312243") + data = None + + +class Program_weight_tensor_parameter_93: + name = "parameter_93" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541266") + mean = float("-7.27456e-05") + std = float("0.0312637") + data = None + + +class Program_weight_tensor_parameter_94: + name = "parameter_94" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541264") + max_val = float("0.0541265") + mean = float("6.51223e-06") + std = float("0.0312292") + data = None + + +class Program_weight_tensor_parameter_95: + name = "parameter_95" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("3.57338e-05") + std = float("0.0312587") + data = None + + +class Program_weight_tensor_parameter_96: + name = "parameter_96" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342326") + mean = float("3.12858e-06") + std = float("0.0197628") + data = None + + +class Program_weight_tensor_parameter_97: + name = "parameter_97" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342326") + mean = float("2.36864e-07") + std = float("0.0197611") + data = None + + +class Program_weight_tensor_parameter_98: + name = "parameter_98" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541265") + mean = float("-5.36725e-05") + std = float("0.0312354") + data = None + + +class Program_weight_tensor_parameter_99: + name = "parameter_99" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541264") + max_val = float("0.0541266") + mean = float("2.30663e-05") + std = float("0.0312492") + data = None + + +class Program_weight_tensor_parameter_100: + name = "parameter_100" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541265") + mean = float("2.02898e-05") + std = float("0.0312397") + data = None + + +class Program_weight_tensor_parameter_101: + name = "parameter_101" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("8.44199e-06") + std = float("0.0312374") + data = None + + +class Program_weight_tensor_parameter_102: + name = "parameter_102" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342327") + mean = float("1.49939e-05") + std = float("0.0197635") + data = None + + +class Program_weight_tensor_parameter_103: + name = "parameter_103" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342326") + mean = float("-5.47284e-06") + std = float("0.0197652") + data = None + + +class Program_weight_tensor_parameter_104: + name = "parameter_104" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541263") + max_val = float("0.0541265") + mean = float("-3.41451e-05") + std = float("0.0312562") + data = None + + +class Program_weight_tensor_parameter_105: + name = "parameter_105" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("-2.14335e-05") + std = float("0.0312226") + data = None + + +class Program_weight_tensor_parameter_106: + name = "parameter_106" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541264") + max_val = float("0.0541264") + mean = float("2.9067e-06") + std = float("0.0312561") + data = None + + +class Program_weight_tensor_parameter_107: + name = "parameter_107" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541264") + max_val = float("0.0541262") + mean = float("-6.03459e-06") + std = float("0.0312487") + data = None + + +class Program_weight_tensor_parameter_108: + name = "parameter_108" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342326") + mean = float("-9.1926e-06") + std = float("0.0197667") + data = None + + +class Program_weight_tensor_parameter_109: + name = "parameter_109" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342326") + mean = float("-1.09615e-05") + std = float("0.0197577") + data = None + + +class Program_weight_tensor_parameter_110: + name = "parameter_110" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541262") + mean = float("-3.39752e-05") + std = float("0.0312673") + data = None + + +class Program_weight_tensor_parameter_111: + name = "parameter_111" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541265") + mean = float("-9.31632e-08") + std = float("0.0312667") + data = None + + +class Program_weight_tensor_parameter_112: + name = "parameter_112" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("-5.3033e-05") + std = float("0.0312605") + data = None + + +class Program_weight_tensor_parameter_113: + name = "parameter_113" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541264") + max_val = float("0.0541265") + mean = float("-1.6783e-05") + std = float("0.0312366") + data = None + + +class Program_weight_tensor_parameter_114: + name = "parameter_114" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342327") + mean = float("2.91196e-06") + std = float("0.0197582") + data = None + + +class Program_weight_tensor_parameter_115: + name = "parameter_115" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342326") + mean = float("3.49086e-08") + std = float("0.0197622") + data = None + + +class Program_weight_tensor_parameter_116: + name = "parameter_116" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("7.49642e-06") + std = float("0.0312447") + data = None + + +class Program_weight_tensor_parameter_117: + name = "parameter_117" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541264") + max_val = float("0.0541266") + mean = float("2.20859e-05") + std = float("0.0312482") + data = None + + +class Program_weight_tensor_parameter_118: + name = "parameter_118" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541264") + mean = float("1.91369e-05") + std = float("0.0312407") + data = None + + +class Program_weight_tensor_parameter_119: + name = "parameter_119" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541266") + mean = float("-2.81949e-05") + std = float("0.0312574") + data = None + + +class Program_weight_tensor_parameter_120: + name = "parameter_120" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342326") + mean = float("8.14695e-06") + std = float("0.0197657") + data = None + + +class Program_weight_tensor_parameter_121: + name = "parameter_121" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342326") + mean = float("2.68647e-06") + std = float("0.0197653") + data = None + + +class Program_weight_tensor_parameter_122: + name = "parameter_122" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541265") + mean = float("1.79758e-05") + std = float("0.0312313") + data = None + + +class Program_weight_tensor_parameter_123: + name = "parameter_123" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541264") + max_val = float("0.0541265") + mean = float("-3.62454e-05") + std = float("0.031249") + data = None + + +class Program_weight_tensor_parameter_124: + name = "parameter_124" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541264") + max_val = float("0.0541265") + mean = float("-7.64658e-05") + std = float("0.0312551") + data = None + + +class Program_weight_tensor_parameter_125: + name = "parameter_125" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("9.6901e-05") + std = float("0.0312609") + data = None + + +class Program_weight_tensor_parameter_126: + name = "parameter_126" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342326") + mean = float("-1.6435e-05") + std = float("0.0197631") + data = None + + +class Program_weight_tensor_parameter_127: + name = "parameter_127" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342326") + mean = float("-1.16081e-05") + std = float("0.019765") + data = None + + +class Program_weight_tensor_parameter_128: + name = "parameter_128" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541265") + mean = float("1.09839e-05") + std = float("0.0312648") + data = None + + +class Program_weight_tensor_parameter_129: + name = "parameter_129" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541265") + mean = float("-5.04124e-05") + std = float("0.0312445") + data = None + + +class Program_weight_tensor_parameter_130: + name = "parameter_130" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541264") + mean = float("1.07007e-05") + std = float("0.0312695") + data = None + + +class Program_weight_tensor_parameter_131: + name = "parameter_131" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("1.54728e-05") + std = float("0.031258") + data = None + + +class Program_weight_tensor_parameter_132: + name = "parameter_132" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342327") + mean = float("2.10161e-06") + std = float("0.0197592") + data = None + + +class Program_weight_tensor_parameter_133: + name = "parameter_133" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342327") + mean = float("3.86961e-07") + std = float("0.019763") + data = None + + +class Program_weight_tensor_parameter_134: + name = "parameter_134" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("3.2705e-05") + std = float("0.031256") + data = None + + +class Program_weight_tensor_parameter_135: + name = "parameter_135" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541265") + mean = float("-2.40264e-05") + std = float("0.0312517") + data = None + + +class Program_weight_tensor_parameter_136: + name = "parameter_136" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("-2.08416e-05") + std = float("0.0312394") + data = None + + +class Program_weight_tensor_parameter_137: + name = "parameter_137" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541265") + mean = float("-2.04073e-05") + std = float("0.0312392") + data = None + + +class Program_weight_tensor_parameter_138: + name = "parameter_138" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342327") + mean = float("-1.75344e-06") + std = float("0.0197625") + data = None + + +class Program_weight_tensor_parameter_139: + name = "parameter_139" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342326") + mean = float("8.34034e-06") + std = float("0.0197586") + data = None + + +class Program_weight_tensor_parameter_140: + name = "parameter_140" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("-1.93425e-05") + std = float("0.0312565") + data = None + + +class Program_weight_tensor_parameter_141: + name = "parameter_141" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541265") + mean = float("-4.4161e-05") + std = float("0.0312659") + data = None + + +class Program_weight_tensor_parameter_142: + name = "parameter_142" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541265") + mean = float("-1.18623e-05") + std = float("0.0312669") + data = None + + +class Program_weight_tensor_parameter_143: + name = "parameter_143" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541264") + mean = float("-5.52988e-05") + std = float("0.0312359") + data = None + + +class Program_weight_tensor_parameter_144: + name = "parameter_144" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342326") + mean = float("-1.59486e-05") + std = float("0.0197627") + data = None + + +class Program_weight_tensor_parameter_145: + name = "parameter_145" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342326") + mean = float("1.34348e-05") + std = float("0.0197671") + data = None + + +class Program_weight_tensor_parameter_146: + name = "parameter_146" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541266") + mean = float("-1.05507e-05") + std = float("0.0312556") + data = None + + +class Program_weight_tensor_parameter_147: + name = "parameter_147" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541264") + mean = float("-3.77256e-05") + std = float("0.0312451") + data = None + + +class Program_weight_tensor_parameter_148: + name = "parameter_148" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541264") + mean = float("3.59175e-06") + std = float("0.0312624") + data = None + + +class Program_weight_tensor_parameter_149: + name = "parameter_149" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541263") + mean = float("4.58248e-05") + std = float("0.0312459") + data = None + + +class Program_weight_tensor_parameter_150: + name = "parameter_150" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342326") + mean = float("1.81135e-05") + std = float("0.019758") + data = None + + +class Program_weight_tensor_parameter_151: + name = "parameter_151" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342326") + mean = float("1.80486e-05") + std = float("0.0197664") + data = None + + +class Program_weight_tensor_parameter_152: + name = "parameter_152" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541266") + mean = float("9.31494e-06") + std = float("0.0312232") + data = None + + +class Program_weight_tensor_parameter_153: + name = "parameter_153" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541264") + max_val = float("0.0541264") + mean = float("-8.6498e-06") + std = float("0.0312564") + data = None + + +class Program_weight_tensor_parameter_154: + name = "parameter_154" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("2.44083e-05") + std = float("0.0312309") + data = None + + +class Program_weight_tensor_parameter_155: + name = "parameter_155" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541265") + mean = float("-3.52661e-05") + std = float("0.0312474") + data = None + + +class Program_weight_tensor_parameter_156: + name = "parameter_156" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342327") + mean = float("1.80633e-06") + std = float("0.019762") + data = None + + +class Program_weight_tensor_parameter_157: + name = "parameter_157" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342326") + mean = float("1.1894e-05") + std = float("0.0197585") + data = None + + +class Program_weight_tensor_parameter_158: + name = "parameter_158" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("-5.65206e-05") + std = float("0.0312595") + data = None + + +class Program_weight_tensor_parameter_159: + name = "parameter_159" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541265") + mean = float("-5.43298e-06") + std = float("0.0312427") + data = None + + +class Program_weight_tensor_parameter_160: + name = "parameter_160" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("3.51603e-05") + std = float("0.0312202") + data = None + + +class Program_weight_tensor_parameter_161: + name = "parameter_161" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541265") + mean = float("-1.01187e-05") + std = float("0.0312491") + data = None + + +class Program_weight_tensor_parameter_162: + name = "parameter_162" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342327") + mean = float("-2.90783e-06") + std = float("0.0197622") + data = None + + +class Program_weight_tensor_parameter_163: + name = "parameter_163" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342326") + mean = float("7.24786e-06") + std = float("0.0197621") + data = None + + +class Program_weight_tensor_parameter_164: + name = "parameter_164" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541262") + max_val = float("0.0541266") + mean = float("5.15321e-05") + std = float("0.0312552") + data = None + + +class Program_weight_tensor_parameter_165: + name = "parameter_165" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541264") + max_val = float("0.0541265") + mean = float("-5.50988e-05") + std = float("0.0312501") + data = None + + +class Program_weight_tensor_parameter_166: + name = "parameter_166" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541265") + mean = float("-1.08721e-05") + std = float("0.0312569") + data = None + + +class Program_weight_tensor_parameter_167: + name = "parameter_167" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541264") + max_val = float("0.0541265") + mean = float("4.19375e-05") + std = float("0.0312529") + data = None + + +class Program_weight_tensor_parameter_168: + name = "parameter_168" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342326") + mean = float("-2.07181e-06") + std = float("0.019761") + data = None + + +class Program_weight_tensor_parameter_169: + name = "parameter_169" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342326") + mean = float("1.00691e-05") + std = float("0.0197637") + data = None + + +class Program_weight_tensor_parameter_170: + name = "parameter_170" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541265") + mean = float("-1.16119e-05") + std = float("0.0312355") + data = None + + +class Program_weight_tensor_parameter_171: + name = "parameter_171" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541265") + mean = float("-4.17873e-05") + std = float("0.0312422") + data = None + + +class Program_weight_tensor_parameter_172: + name = "parameter_172" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("1.22908e-05") + std = float("0.0312712") + data = None + + +class Program_weight_tensor_parameter_173: + name = "parameter_173" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541264") + mean = float("3.87447e-05") + std = float("0.031266") + data = None + + +class Program_weight_tensor_parameter_174: + name = "parameter_174" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342327") + mean = float("-6.7642e-06") + std = float("0.0197599") + data = None + + +class Program_weight_tensor_parameter_175: + name = "parameter_175" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342326") + mean = float("1.43618e-05") + std = float("0.0197722") + data = None + + +class Program_weight_tensor_parameter_176: + name = "parameter_176" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541265") + mean = float("-1.68095e-06") + std = float("0.031245") + data = None + + +class Program_weight_tensor_parameter_177: + name = "parameter_177" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541264") + max_val = float("0.0541266") + mean = float("1.23707e-05") + std = float("0.0312629") + data = None + + +class Program_weight_tensor_parameter_178: + name = "parameter_178" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541264") + mean = float("1.53158e-06") + std = float("0.0312785") + data = None + + +class Program_weight_tensor_parameter_179: + name = "parameter_179" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541265") + mean = float("2.48834e-05") + std = float("0.0312525") + data = None + + +class Program_weight_tensor_parameter_180: + name = "parameter_180" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342326") + max_val = float("0.0342327") + mean = float("3.79776e-06") + std = float("0.0197635") + data = None + + +class Program_weight_tensor_parameter_181: + name = "parameter_181" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342326") + mean = float("-2.66979e-05") + std = float("0.0197649") + data = None + + +class Program_weight_tensor_parameter_182: + name = "parameter_182" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541265") + mean = float("-1.41748e-05") + std = float("0.0312324") + data = None + + +class Program_weight_tensor_parameter_183: + name = "parameter_183" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541263") + mean = float("3.12531e-05") + std = float("0.0312627") + data = None + + +class Program_weight_tensor_parameter_184: + name = "parameter_184" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541266") + mean = float("1.13084e-06") + std = float("0.0312374") + data = None + + +class Program_weight_tensor_parameter_185: + name = "parameter_185" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("2.14321e-05") + std = float("0.0312647") + data = None + + +class Program_weight_tensor_parameter_186: + name = "parameter_186" + shape = [4096, 1024] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342327") + mean = float("-1.10894e-05") + std = float("0.0197657") + data = None + + +class Program_weight_tensor_parameter_187: + name = "parameter_187" + shape = [1024, 4096] + dtype = "float32" + min_val = float("-0.0342327") + max_val = float("0.0342326") + mean = float("-1.64994e-05") + std = float("0.0197599") + data = None + + +class Program_weight_tensor_parameter_188: + name = "parameter_188" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541265") + mean = float("-3.40531e-05") + std = float("0.0312563") + data = None + + +class Program_weight_tensor_parameter_189: + name = "parameter_189" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541266") + max_val = float("0.0541265") + mean = float("3.88072e-05") + std = float("0.0312462") + data = None + + +class Program_weight_tensor_parameter_190: + name = "parameter_190" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541263") + max_val = float("0.0541265") + mean = float("-3.76561e-06") + std = float("0.031233") + data = None + + +class Program_weight_tensor_parameter_191: + name = "parameter_191" + shape = [1024, 1024] + dtype = "float32" + min_val = float("-0.0541265") + max_val = float("0.0541266") + mean = float("-1.86276e-05") + std = float("0.0312537") + data = None + + +class Program_weight_tensor_parameter_192: + name = "parameter_192" + shape = [2, 1024] + dtype = "float32" + min_val = float("-0.0763707") + max_val = float("0.0764658") + mean = float("0.000264463") + std = float("0.0440942") + data = None + + +class Program_weight_tensor_parameter_193: + name = "parameter_193" + shape = [12000, 1024] + dtype = "float32" + min_val = float("-0.0214636") + max_val = float("0.0214636") + mean = float("-2.14057e-06") + std = float("0.0123936") + data = None diff --git a/paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/graph_hash.txt b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/graph_hash.txt new file mode 100644 index 000000000..304f6f7ea --- /dev/null +++ b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/graph_hash.txt @@ -0,0 +1 @@ +c80c111803b86081bd979cc557dae189beefcb35d91c6b7db3f5392a73098cfe \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/graph_net.json b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/graph_net.json new file mode 100644 index 000000000..59bdc1973 --- /dev/null +++ b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/graph_net.json @@ -0,0 +1,6 @@ +{ + "framework": "paddle", + "model_name": "roformer_v2_chinese_char_small", + "num_devices_required": 1, + "num_nodes_required": 1 +} \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/input_meta.py b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/input_meta.py new file mode 100644 index 000000000..b6791c33c --- /dev/null +++ b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/input_meta.py @@ -0,0 +1,12 @@ +class Program_weight_tensor_data_0: + name = "data_0" + shape = [1, 11] + dtype = "int64" + data = [101, 3223, 6500, 421, 4179, 4331, 2008, 7263, 3055, 106, 102] + + +class Program_weight_tensor_data_1: + name = "data_1" + shape = [1, 11] + dtype = "int64" + data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] diff --git a/paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/model.py b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/model.py new file mode 100644 index 000000000..0cf240f66 --- /dev/null +++ b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/model.py @@ -0,0 +1,1702 @@ +import paddle + + +class GraphModule(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward( + self, + parameter_0, + parameter_1, + parameter_2, + parameter_3, + parameter_4, + parameter_5, + parameter_6, + parameter_7, + parameter_8, + parameter_9, + parameter_10, + parameter_11, + parameter_12, + parameter_13, + parameter_14, + parameter_15, + parameter_16, + parameter_17, + parameter_18, + parameter_19, + parameter_20, + parameter_21, + parameter_22, + parameter_23, + parameter_24, + parameter_25, + parameter_26, + parameter_27, + parameter_28, + parameter_29, + parameter_30, + parameter_31, + parameter_32, + parameter_33, + parameter_34, + parameter_35, + parameter_36, + parameter_37, + parameter_38, + parameter_39, + parameter_40, + parameter_41, + parameter_42, + parameter_43, + parameter_44, + parameter_45, + parameter_46, + parameter_47, + parameter_48, + parameter_49, + data_0, + data_1, + ): + # pd_op.full: (xi64) <- () + full_0 = paddle._C_ops.full( + [], float("0"), paddle.int64, paddle.framework._current_expected_place() + ) + + # pd_op.equal: (1x11xb) <- (1x11xi64, xi64) + equal_0 = paddle._C_ops.equal(data_0, full_0) + del full_0 + + # pd_op.cast: (1x11xf32) <- (1x11xb) + cast_0 = paddle._C_ops.cast(equal_0, paddle.float32) + del equal_0 + + # pd_op.full: (1xf32) <- () + full_1 = paddle._C_ops.full( + [1], float("-10000"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x11xf32) <- (1x11xf32, 1xf32) + scale_0 = paddle._C_ops.scale(cast_0, full_1, float("0"), True) + del cast_0, full_1 + + # pd_op.full_int_array: (2xi64) <- () + full_int_array_0 = [1, 2] + + # pd_op.unsqueeze: (1x1x1x11xf32) <- (1x11xf32, 2xi64) + unsqueeze_0 = paddle._C_ops.unsqueeze(scale_0, full_int_array_0) + del full_int_array_0, scale_0 + + # pd_op.embedding: (1x11x384xf32) <- (1x11xi64, 12000x384xf32) + embedding_0 = paddle._C_ops.embedding(data_0, parameter_49, -1, False) + del data_0, parameter_49 + + # pd_op.embedding: (1x11x384xf32) <- (1x11xi64, 2x384xf32) + embedding_1 = paddle._C_ops.embedding(data_1, parameter_48, -1, False) + del data_1, parameter_48 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_0 = paddle._C_ops.add(embedding_0, embedding_1) + del embedding_0, embedding_1 + + # pd_op.square: (1x11x384xf32) <- (1x11x384xf32) + square_0 = paddle._C_ops.square(add_0) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_1 = [-1] + + # pd_op.mean: (1x11x1xf32) <- (1x11x384xf32, 1xi64) + mean_0 = paddle._C_ops.mean(square_0, full_int_array_1, True) + del square_0 + + # pd_op.full: (1xf32) <- () + full_2 = paddle._C_ops.full( + [1], float("1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_1 = paddle._C_ops.scale(mean_0, full_2, float("1e-12"), True) + del mean_0 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_0 = paddle._C_ops.sqrt(scale_1) + del scale_1 + + # pd_op.divide: (1x11x384xf32) <- (1x11x384xf32, 1x11x1xf32) + divide_1 = paddle._C_ops.divide(add_0, sqrt_0) + del add_0, sqrt_0 + + # pd_op.full: (1xf32) <- () + full_3 = paddle._C_ops.full( + [1], float("0.1"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.dropout: (1x11x384xf32, 1x11x384xui8) <- (1x11x384xf32, None, 1xf32) + dropout_0, dropout_1 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + divide_1, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del divide_1 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_0 = paddle._C_ops.matmul(dropout_0, parameter_47, False, False) + del parameter_47 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_1 = paddle._C_ops.matmul(dropout_0, parameter_46, False, False) + del parameter_46 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_2 = paddle._C_ops.matmul(dropout_0, parameter_45, False, False) + del parameter_45 + + # pd_op.full_int_array: (4xi64) <- () + full_int_array_2 = [0, 0, 6, 64] + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_0 = paddle._C_ops.reshape(matmul_0, full_int_array_2) + del matmul_0 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_0 = paddle._C_ops.transpose(reshape_0, [0, 2, 1, 3]) + del reshape_0 + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_1 = paddle._C_ops.reshape(matmul_1, full_int_array_2) + del matmul_1 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_1 = paddle._C_ops.transpose(reshape_1, [0, 2, 1, 3]) + del reshape_1 + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_2 = paddle._C_ops.reshape(matmul_2, full_int_array_2) + del matmul_2 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_2 = paddle._C_ops.transpose(reshape_2, [0, 2, 1, 3]) + del reshape_2 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_3 = [0] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_4 = [11] + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_0 = paddle._C_ops.slice( + parameter_11, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_11 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_1 = paddle._C_ops.slice( + parameter_10, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_10 + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_5 = [2147483647] + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_6 = [2] + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_0 = paddle._C_ops.strided_slice( + transpose_0, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.full_int_array: (1xi64) <- () + full_int_array_7 = [1] + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_1 = paddle._C_ops.strided_slice( + transpose_0, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_0 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_0 = paddle._C_ops.multiply(strided_slice_0, slice_1) + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_1 = paddle._C_ops.multiply(strided_slice_1, slice_0) + + # pd_op.subtract: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + subtract_0 = paddle._C_ops.subtract(multiply_0, multiply_1) + del multiply_0, multiply_1 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_2 = paddle._C_ops.multiply(strided_slice_0, slice_0) + del strided_slice_0 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_3 = paddle._C_ops.multiply(strided_slice_1, slice_1) + del strided_slice_1 + + # pd_op.add: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + add_1 = paddle._C_ops.add(multiply_2, multiply_3) + del multiply_2, multiply_3 + + # builtin.combine: ([1x6x11x32xf32, 1x6x11x32xf32]) <- (1x6x11x32xf32, 1x6x11x32xf32) + combine_0 = [subtract_0, add_1] + del add_1, subtract_0 + + # pd_op.stack: (1x6x11x32x2xf32) <- ([1x6x11x32xf32, 1x6x11x32xf32]) + stack_0 = paddle._C_ops.stack(combine_0, -1) + del combine_0 + + # pd_op.flatten: (1x6x11x64xf32) <- (1x6x11x32x2xf32) + flatten_0 = paddle._C_ops.flatten(stack_0, 3, 4) + del stack_0 + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_2 = paddle._C_ops.strided_slice( + transpose_1, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_3 = paddle._C_ops.strided_slice( + transpose_1, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_1 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_4 = paddle._C_ops.multiply(strided_slice_2, slice_1) + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_5 = paddle._C_ops.multiply(strided_slice_3, slice_0) + + # pd_op.subtract: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + subtract_1 = paddle._C_ops.subtract(multiply_4, multiply_5) + del multiply_4, multiply_5 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_6 = paddle._C_ops.multiply(strided_slice_2, slice_0) + del slice_0, strided_slice_2 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_7 = paddle._C_ops.multiply(strided_slice_3, slice_1) + del slice_1, strided_slice_3 + + # pd_op.add: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + add_2 = paddle._C_ops.add(multiply_6, multiply_7) + del multiply_6, multiply_7 + + # builtin.combine: ([1x6x11x32xf32, 1x6x11x32xf32]) <- (1x6x11x32xf32, 1x6x11x32xf32) + combine_1 = [subtract_1, add_2] + del add_2, subtract_1 + + # pd_op.stack: (1x6x11x32x2xf32) <- ([1x6x11x32xf32, 1x6x11x32xf32]) + stack_1 = paddle._C_ops.stack(combine_1, -1) + del combine_1 + + # pd_op.flatten: (1x6x11x64xf32) <- (1x6x11x32x2xf32) + flatten_1 = paddle._C_ops.flatten(stack_1, 3, 4) + del stack_1 + + # pd_op.matmul: (1x6x11x11xf32) <- (1x6x11x64xf32, 1x6x11x64xf32) + matmul_3 = paddle._C_ops.matmul(flatten_0, flatten_1, False, True) + del flatten_0, flatten_1 + + # pd_op.full: (1xf32) <- () + full_4 = paddle._C_ops.full( + [1], float("0.125"), paddle.float32, paddle.core.CPUPlace() + ) + + # pd_op.scale: (1x6x11x11xf32) <- (1x6x11x11xf32, 1xf32) + scale_2 = paddle._C_ops.scale(matmul_3, full_4, float("0"), True) + del matmul_3 + + # pd_op.add: (1x6x11x11xf32) <- (1x6x11x11xf32, 1x1x1x11xf32) + add_3 = paddle._C_ops.add(scale_2, unsqueeze_0) + del scale_2 + + # pd_op.softmax: (1x6x11x11xf32) <- (1x6x11x11xf32) + softmax_0 = paddle._C_ops.softmax(add_3, -1) + del add_3 + + # pd_op.dropout: (1x6x11x11xf32, 1x6x11x11xui8) <- (1x6x11x11xf32, None, 1xf32) + dropout_2, dropout_3 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_0, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_0 + + # pd_op.matmul: (1x6x11x64xf32) <- (1x6x11x11xf32, 1x6x11x64xf32) + matmul_4 = paddle._C_ops.matmul(dropout_2, transpose_2, False, False) + del dropout_2, transpose_2 + + # pd_op.transpose: (1x11x6x64xf32) <- (1x6x11x64xf32) + transpose_3 = paddle._C_ops.transpose(matmul_4, [0, 2, 1, 3]) + del matmul_4 + + # pd_op.full_int_array: (3xi64) <- () + full_int_array_8 = [0, 0, 384] + + # pd_op.reshape: (1x11x384xf32) <- (1x11x6x64xf32, 3xi64) + reshape_3 = paddle._C_ops.reshape(transpose_3, full_int_array_8) + del transpose_3 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_5 = paddle._C_ops.matmul(reshape_3, parameter_44, False, False) + del parameter_44, reshape_3 + + # pd_op.dropout: (1x11x384xf32, 1x11x384xui8) <- (1x11x384xf32, None, 1xf32) + dropout_4, dropout_5 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_5, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_5 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_4 = paddle._C_ops.add(dropout_0, dropout_4) + del dropout_0, dropout_4 + + # pd_op.square: (1x11x384xf32) <- (1x11x384xf32) + square_1 = paddle._C_ops.square(add_4) + + # pd_op.mean: (1x11x1xf32) <- (1x11x384xf32, 1xi64) + mean_1 = paddle._C_ops.mean(square_1, full_int_array_1, True) + del square_1 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_3 = paddle._C_ops.scale(mean_1, full_2, float("1e-12"), True) + del mean_1 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_1 = paddle._C_ops.sqrt(scale_3) + del scale_3 + + # pd_op.divide: (1x11x384xf32) <- (1x11x384xf32, 1x11x1xf32) + divide_2 = paddle._C_ops.divide(add_4, sqrt_1) + del add_4, sqrt_1 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_6 = paddle._C_ops.matmul(divide_2, parameter_43, False, False) + del parameter_43 + + # pd_op.relu: (1x11x1536xf32) <- (1x11x1536xf32) + relu_0 = paddle._C_ops.relu(matmul_6) + del matmul_6 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_7 = paddle._C_ops.matmul(relu_0, parameter_42, False, False) + del parameter_42, relu_0 + + # pd_op.dropout: (1x11x384xf32, 1x11x384xui8) <- (1x11x384xf32, None, 1xf32) + dropout_6, dropout_7 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_7, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_7 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_5 = paddle._C_ops.add(divide_2, dropout_6) + del divide_2, dropout_6 + + # pd_op.square: (1x11x384xf32) <- (1x11x384xf32) + square_2 = paddle._C_ops.square(add_5) + + # pd_op.mean: (1x11x1xf32) <- (1x11x384xf32, 1xi64) + mean_2 = paddle._C_ops.mean(square_2, full_int_array_1, True) + del square_2 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_4 = paddle._C_ops.scale(mean_2, full_2, float("1e-12"), True) + del mean_2 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_2 = paddle._C_ops.sqrt(scale_4) + del scale_4 + + # pd_op.divide: (1x11x384xf32) <- (1x11x384xf32, 1x11x1xf32) + divide_3 = paddle._C_ops.divide(add_5, sqrt_2) + del add_5, sqrt_2 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_8 = paddle._C_ops.matmul(divide_3, parameter_41, False, False) + del parameter_41 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_9 = paddle._C_ops.matmul(divide_3, parameter_40, False, False) + del parameter_40 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_10 = paddle._C_ops.matmul(divide_3, parameter_39, False, False) + del parameter_39 + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_4 = paddle._C_ops.reshape(matmul_8, full_int_array_2) + del matmul_8 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_4 = paddle._C_ops.transpose(reshape_4, [0, 2, 1, 3]) + del reshape_4 + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_5 = paddle._C_ops.reshape(matmul_9, full_int_array_2) + del matmul_9 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_5 = paddle._C_ops.transpose(reshape_5, [0, 2, 1, 3]) + del reshape_5 + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_6 = paddle._C_ops.reshape(matmul_10, full_int_array_2) + del matmul_10 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_6 = paddle._C_ops.transpose(reshape_6, [0, 2, 1, 3]) + del reshape_6 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_2 = paddle._C_ops.slice( + parameter_9, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_9 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_3 = paddle._C_ops.slice( + parameter_8, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_8 + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_4 = paddle._C_ops.strided_slice( + transpose_4, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_5 = paddle._C_ops.strided_slice( + transpose_4, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_4 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_8 = paddle._C_ops.multiply(strided_slice_4, slice_3) + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_9 = paddle._C_ops.multiply(strided_slice_5, slice_2) + + # pd_op.subtract: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + subtract_2 = paddle._C_ops.subtract(multiply_8, multiply_9) + del multiply_8, multiply_9 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_10 = paddle._C_ops.multiply(strided_slice_4, slice_2) + del strided_slice_4 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_11 = paddle._C_ops.multiply(strided_slice_5, slice_3) + del strided_slice_5 + + # pd_op.add: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + add_6 = paddle._C_ops.add(multiply_10, multiply_11) + del multiply_10, multiply_11 + + # builtin.combine: ([1x6x11x32xf32, 1x6x11x32xf32]) <- (1x6x11x32xf32, 1x6x11x32xf32) + combine_2 = [subtract_2, add_6] + del add_6, subtract_2 + + # pd_op.stack: (1x6x11x32x2xf32) <- ([1x6x11x32xf32, 1x6x11x32xf32]) + stack_2 = paddle._C_ops.stack(combine_2, -1) + del combine_2 + + # pd_op.flatten: (1x6x11x64xf32) <- (1x6x11x32x2xf32) + flatten_2 = paddle._C_ops.flatten(stack_2, 3, 4) + del stack_2 + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_6 = paddle._C_ops.strided_slice( + transpose_5, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_7 = paddle._C_ops.strided_slice( + transpose_5, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_5 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_12 = paddle._C_ops.multiply(strided_slice_6, slice_3) + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_13 = paddle._C_ops.multiply(strided_slice_7, slice_2) + + # pd_op.subtract: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + subtract_3 = paddle._C_ops.subtract(multiply_12, multiply_13) + del multiply_12, multiply_13 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_14 = paddle._C_ops.multiply(strided_slice_6, slice_2) + del slice_2, strided_slice_6 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_15 = paddle._C_ops.multiply(strided_slice_7, slice_3) + del slice_3, strided_slice_7 + + # pd_op.add: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + add_7 = paddle._C_ops.add(multiply_14, multiply_15) + del multiply_14, multiply_15 + + # builtin.combine: ([1x6x11x32xf32, 1x6x11x32xf32]) <- (1x6x11x32xf32, 1x6x11x32xf32) + combine_3 = [subtract_3, add_7] + del add_7, subtract_3 + + # pd_op.stack: (1x6x11x32x2xf32) <- ([1x6x11x32xf32, 1x6x11x32xf32]) + stack_3 = paddle._C_ops.stack(combine_3, -1) + del combine_3 + + # pd_op.flatten: (1x6x11x64xf32) <- (1x6x11x32x2xf32) + flatten_3 = paddle._C_ops.flatten(stack_3, 3, 4) + del stack_3 + + # pd_op.matmul: (1x6x11x11xf32) <- (1x6x11x64xf32, 1x6x11x64xf32) + matmul_11 = paddle._C_ops.matmul(flatten_2, flatten_3, False, True) + del flatten_2, flatten_3 + + # pd_op.scale: (1x6x11x11xf32) <- (1x6x11x11xf32, 1xf32) + scale_5 = paddle._C_ops.scale(matmul_11, full_4, float("0"), True) + del matmul_11 + + # pd_op.add: (1x6x11x11xf32) <- (1x6x11x11xf32, 1x1x1x11xf32) + add_8 = paddle._C_ops.add(scale_5, unsqueeze_0) + del scale_5 + + # pd_op.softmax: (1x6x11x11xf32) <- (1x6x11x11xf32) + softmax_1 = paddle._C_ops.softmax(add_8, -1) + del add_8 + + # pd_op.dropout: (1x6x11x11xf32, 1x6x11x11xui8) <- (1x6x11x11xf32, None, 1xf32) + dropout_8, dropout_9 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_1, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_1 + + # pd_op.matmul: (1x6x11x64xf32) <- (1x6x11x11xf32, 1x6x11x64xf32) + matmul_12 = paddle._C_ops.matmul(dropout_8, transpose_6, False, False) + del dropout_8, transpose_6 + + # pd_op.transpose: (1x11x6x64xf32) <- (1x6x11x64xf32) + transpose_7 = paddle._C_ops.transpose(matmul_12, [0, 2, 1, 3]) + del matmul_12 + + # pd_op.reshape: (1x11x384xf32) <- (1x11x6x64xf32, 3xi64) + reshape_7 = paddle._C_ops.reshape(transpose_7, full_int_array_8) + del transpose_7 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_13 = paddle._C_ops.matmul(reshape_7, parameter_38, False, False) + del parameter_38, reshape_7 + + # pd_op.dropout: (1x11x384xf32, 1x11x384xui8) <- (1x11x384xf32, None, 1xf32) + dropout_10, dropout_11 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_13, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_13 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_9 = paddle._C_ops.add(divide_3, dropout_10) + del divide_3, dropout_10 + + # pd_op.square: (1x11x384xf32) <- (1x11x384xf32) + square_3 = paddle._C_ops.square(add_9) + + # pd_op.mean: (1x11x1xf32) <- (1x11x384xf32, 1xi64) + mean_3 = paddle._C_ops.mean(square_3, full_int_array_1, True) + del square_3 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_6 = paddle._C_ops.scale(mean_3, full_2, float("1e-12"), True) + del mean_3 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_3 = paddle._C_ops.sqrt(scale_6) + del scale_6 + + # pd_op.divide: (1x11x384xf32) <- (1x11x384xf32, 1x11x1xf32) + divide_4 = paddle._C_ops.divide(add_9, sqrt_3) + del add_9, sqrt_3 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_14 = paddle._C_ops.matmul(divide_4, parameter_37, False, False) + del parameter_37 + + # pd_op.relu: (1x11x1536xf32) <- (1x11x1536xf32) + relu_1 = paddle._C_ops.relu(matmul_14) + del matmul_14 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_15 = paddle._C_ops.matmul(relu_1, parameter_36, False, False) + del parameter_36, relu_1 + + # pd_op.dropout: (1x11x384xf32, 1x11x384xui8) <- (1x11x384xf32, None, 1xf32) + dropout_12, dropout_13 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_15, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_15 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_10 = paddle._C_ops.add(divide_4, dropout_12) + del divide_4, dropout_12 + + # pd_op.square: (1x11x384xf32) <- (1x11x384xf32) + square_4 = paddle._C_ops.square(add_10) + + # pd_op.mean: (1x11x1xf32) <- (1x11x384xf32, 1xi64) + mean_4 = paddle._C_ops.mean(square_4, full_int_array_1, True) + del square_4 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_7 = paddle._C_ops.scale(mean_4, full_2, float("1e-12"), True) + del mean_4 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_4 = paddle._C_ops.sqrt(scale_7) + del scale_7 + + # pd_op.divide: (1x11x384xf32) <- (1x11x384xf32, 1x11x1xf32) + divide_5 = paddle._C_ops.divide(add_10, sqrt_4) + del add_10, sqrt_4 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_16 = paddle._C_ops.matmul(divide_5, parameter_35, False, False) + del parameter_35 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_17 = paddle._C_ops.matmul(divide_5, parameter_34, False, False) + del parameter_34 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_18 = paddle._C_ops.matmul(divide_5, parameter_33, False, False) + del parameter_33 + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_8 = paddle._C_ops.reshape(matmul_16, full_int_array_2) + del matmul_16 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_8 = paddle._C_ops.transpose(reshape_8, [0, 2, 1, 3]) + del reshape_8 + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_9 = paddle._C_ops.reshape(matmul_17, full_int_array_2) + del matmul_17 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_9 = paddle._C_ops.transpose(reshape_9, [0, 2, 1, 3]) + del reshape_9 + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_10 = paddle._C_ops.reshape(matmul_18, full_int_array_2) + del matmul_18 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_10 = paddle._C_ops.transpose(reshape_10, [0, 2, 1, 3]) + del reshape_10 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_4 = paddle._C_ops.slice( + parameter_7, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_7 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_5 = paddle._C_ops.slice( + parameter_6, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_6 + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_8 = paddle._C_ops.strided_slice( + transpose_8, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_9 = paddle._C_ops.strided_slice( + transpose_8, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_8 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_16 = paddle._C_ops.multiply(strided_slice_8, slice_5) + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_17 = paddle._C_ops.multiply(strided_slice_9, slice_4) + + # pd_op.subtract: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + subtract_4 = paddle._C_ops.subtract(multiply_16, multiply_17) + del multiply_16, multiply_17 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_18 = paddle._C_ops.multiply(strided_slice_8, slice_4) + del strided_slice_8 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_19 = paddle._C_ops.multiply(strided_slice_9, slice_5) + del strided_slice_9 + + # pd_op.add: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + add_11 = paddle._C_ops.add(multiply_18, multiply_19) + del multiply_18, multiply_19 + + # builtin.combine: ([1x6x11x32xf32, 1x6x11x32xf32]) <- (1x6x11x32xf32, 1x6x11x32xf32) + combine_4 = [subtract_4, add_11] + del add_11, subtract_4 + + # pd_op.stack: (1x6x11x32x2xf32) <- ([1x6x11x32xf32, 1x6x11x32xf32]) + stack_4 = paddle._C_ops.stack(combine_4, -1) + del combine_4 + + # pd_op.flatten: (1x6x11x64xf32) <- (1x6x11x32x2xf32) + flatten_4 = paddle._C_ops.flatten(stack_4, 3, 4) + del stack_4 + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_10 = paddle._C_ops.strided_slice( + transpose_9, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_11 = paddle._C_ops.strided_slice( + transpose_9, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_9 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_20 = paddle._C_ops.multiply(strided_slice_10, slice_5) + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_21 = paddle._C_ops.multiply(strided_slice_11, slice_4) + + # pd_op.subtract: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + subtract_5 = paddle._C_ops.subtract(multiply_20, multiply_21) + del multiply_20, multiply_21 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_22 = paddle._C_ops.multiply(strided_slice_10, slice_4) + del slice_4, strided_slice_10 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_23 = paddle._C_ops.multiply(strided_slice_11, slice_5) + del slice_5, strided_slice_11 + + # pd_op.add: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + add_12 = paddle._C_ops.add(multiply_22, multiply_23) + del multiply_22, multiply_23 + + # builtin.combine: ([1x6x11x32xf32, 1x6x11x32xf32]) <- (1x6x11x32xf32, 1x6x11x32xf32) + combine_5 = [subtract_5, add_12] + del add_12, subtract_5 + + # pd_op.stack: (1x6x11x32x2xf32) <- ([1x6x11x32xf32, 1x6x11x32xf32]) + stack_5 = paddle._C_ops.stack(combine_5, -1) + del combine_5 + + # pd_op.flatten: (1x6x11x64xf32) <- (1x6x11x32x2xf32) + flatten_5 = paddle._C_ops.flatten(stack_5, 3, 4) + del stack_5 + + # pd_op.matmul: (1x6x11x11xf32) <- (1x6x11x64xf32, 1x6x11x64xf32) + matmul_19 = paddle._C_ops.matmul(flatten_4, flatten_5, False, True) + del flatten_4, flatten_5 + + # pd_op.scale: (1x6x11x11xf32) <- (1x6x11x11xf32, 1xf32) + scale_8 = paddle._C_ops.scale(matmul_19, full_4, float("0"), True) + del matmul_19 + + # pd_op.add: (1x6x11x11xf32) <- (1x6x11x11xf32, 1x1x1x11xf32) + add_13 = paddle._C_ops.add(scale_8, unsqueeze_0) + del scale_8 + + # pd_op.softmax: (1x6x11x11xf32) <- (1x6x11x11xf32) + softmax_2 = paddle._C_ops.softmax(add_13, -1) + del add_13 + + # pd_op.dropout: (1x6x11x11xf32, 1x6x11x11xui8) <- (1x6x11x11xf32, None, 1xf32) + dropout_14, dropout_15 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_2, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_2 + + # pd_op.matmul: (1x6x11x64xf32) <- (1x6x11x11xf32, 1x6x11x64xf32) + matmul_20 = paddle._C_ops.matmul(dropout_14, transpose_10, False, False) + del dropout_14, transpose_10 + + # pd_op.transpose: (1x11x6x64xf32) <- (1x6x11x64xf32) + transpose_11 = paddle._C_ops.transpose(matmul_20, [0, 2, 1, 3]) + del matmul_20 + + # pd_op.reshape: (1x11x384xf32) <- (1x11x6x64xf32, 3xi64) + reshape_11 = paddle._C_ops.reshape(transpose_11, full_int_array_8) + del transpose_11 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_21 = paddle._C_ops.matmul(reshape_11, parameter_32, False, False) + del parameter_32, reshape_11 + + # pd_op.dropout: (1x11x384xf32, 1x11x384xui8) <- (1x11x384xf32, None, 1xf32) + dropout_16, dropout_17 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_21, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_21 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_14 = paddle._C_ops.add(divide_5, dropout_16) + del divide_5, dropout_16 + + # pd_op.square: (1x11x384xf32) <- (1x11x384xf32) + square_5 = paddle._C_ops.square(add_14) + + # pd_op.mean: (1x11x1xf32) <- (1x11x384xf32, 1xi64) + mean_5 = paddle._C_ops.mean(square_5, full_int_array_1, True) + del square_5 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_9 = paddle._C_ops.scale(mean_5, full_2, float("1e-12"), True) + del mean_5 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_5 = paddle._C_ops.sqrt(scale_9) + del scale_9 + + # pd_op.divide: (1x11x384xf32) <- (1x11x384xf32, 1x11x1xf32) + divide_6 = paddle._C_ops.divide(add_14, sqrt_5) + del add_14, sqrt_5 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_22 = paddle._C_ops.matmul(divide_6, parameter_31, False, False) + del parameter_31 + + # pd_op.relu: (1x11x1536xf32) <- (1x11x1536xf32) + relu_2 = paddle._C_ops.relu(matmul_22) + del matmul_22 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_23 = paddle._C_ops.matmul(relu_2, parameter_30, False, False) + del parameter_30, relu_2 + + # pd_op.dropout: (1x11x384xf32, 1x11x384xui8) <- (1x11x384xf32, None, 1xf32) + dropout_18, dropout_19 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_23, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_23 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_15 = paddle._C_ops.add(divide_6, dropout_18) + del divide_6, dropout_18 + + # pd_op.square: (1x11x384xf32) <- (1x11x384xf32) + square_6 = paddle._C_ops.square(add_15) + + # pd_op.mean: (1x11x1xf32) <- (1x11x384xf32, 1xi64) + mean_6 = paddle._C_ops.mean(square_6, full_int_array_1, True) + del square_6 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_10 = paddle._C_ops.scale(mean_6, full_2, float("1e-12"), True) + del mean_6 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_6 = paddle._C_ops.sqrt(scale_10) + del scale_10 + + # pd_op.divide: (1x11x384xf32) <- (1x11x384xf32, 1x11x1xf32) + divide_7 = paddle._C_ops.divide(add_15, sqrt_6) + del add_15, sqrt_6 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_24 = paddle._C_ops.matmul(divide_7, parameter_29, False, False) + del parameter_29 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_25 = paddle._C_ops.matmul(divide_7, parameter_28, False, False) + del parameter_28 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_26 = paddle._C_ops.matmul(divide_7, parameter_27, False, False) + del parameter_27 + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_12 = paddle._C_ops.reshape(matmul_24, full_int_array_2) + del matmul_24 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_12 = paddle._C_ops.transpose(reshape_12, [0, 2, 1, 3]) + del reshape_12 + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_13 = paddle._C_ops.reshape(matmul_25, full_int_array_2) + del matmul_25 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_13 = paddle._C_ops.transpose(reshape_13, [0, 2, 1, 3]) + del reshape_13 + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_14 = paddle._C_ops.reshape(matmul_26, full_int_array_2) + del matmul_26 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_14 = paddle._C_ops.transpose(reshape_14, [0, 2, 1, 3]) + del reshape_14 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_6 = paddle._C_ops.slice( + parameter_5, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_5 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_7 = paddle._C_ops.slice( + parameter_4, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_4 + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_12 = paddle._C_ops.strided_slice( + transpose_12, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_13 = paddle._C_ops.strided_slice( + transpose_12, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_12 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_24 = paddle._C_ops.multiply(strided_slice_12, slice_7) + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_25 = paddle._C_ops.multiply(strided_slice_13, slice_6) + + # pd_op.subtract: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + subtract_6 = paddle._C_ops.subtract(multiply_24, multiply_25) + del multiply_24, multiply_25 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_26 = paddle._C_ops.multiply(strided_slice_12, slice_6) + del strided_slice_12 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_27 = paddle._C_ops.multiply(strided_slice_13, slice_7) + del strided_slice_13 + + # pd_op.add: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + add_16 = paddle._C_ops.add(multiply_26, multiply_27) + del multiply_26, multiply_27 + + # builtin.combine: ([1x6x11x32xf32, 1x6x11x32xf32]) <- (1x6x11x32xf32, 1x6x11x32xf32) + combine_6 = [subtract_6, add_16] + del add_16, subtract_6 + + # pd_op.stack: (1x6x11x32x2xf32) <- ([1x6x11x32xf32, 1x6x11x32xf32]) + stack_6 = paddle._C_ops.stack(combine_6, -1) + del combine_6 + + # pd_op.flatten: (1x6x11x64xf32) <- (1x6x11x32x2xf32) + flatten_6 = paddle._C_ops.flatten(stack_6, 3, 4) + del stack_6 + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_14 = paddle._C_ops.strided_slice( + transpose_13, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_15 = paddle._C_ops.strided_slice( + transpose_13, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_13 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_28 = paddle._C_ops.multiply(strided_slice_14, slice_7) + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_29 = paddle._C_ops.multiply(strided_slice_15, slice_6) + + # pd_op.subtract: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + subtract_7 = paddle._C_ops.subtract(multiply_28, multiply_29) + del multiply_28, multiply_29 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_30 = paddle._C_ops.multiply(strided_slice_14, slice_6) + del slice_6, strided_slice_14 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_31 = paddle._C_ops.multiply(strided_slice_15, slice_7) + del slice_7, strided_slice_15 + + # pd_op.add: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + add_17 = paddle._C_ops.add(multiply_30, multiply_31) + del multiply_30, multiply_31 + + # builtin.combine: ([1x6x11x32xf32, 1x6x11x32xf32]) <- (1x6x11x32xf32, 1x6x11x32xf32) + combine_7 = [subtract_7, add_17] + del add_17, subtract_7 + + # pd_op.stack: (1x6x11x32x2xf32) <- ([1x6x11x32xf32, 1x6x11x32xf32]) + stack_7 = paddle._C_ops.stack(combine_7, -1) + del combine_7 + + # pd_op.flatten: (1x6x11x64xf32) <- (1x6x11x32x2xf32) + flatten_7 = paddle._C_ops.flatten(stack_7, 3, 4) + del stack_7 + + # pd_op.matmul: (1x6x11x11xf32) <- (1x6x11x64xf32, 1x6x11x64xf32) + matmul_27 = paddle._C_ops.matmul(flatten_6, flatten_7, False, True) + del flatten_6, flatten_7 + + # pd_op.scale: (1x6x11x11xf32) <- (1x6x11x11xf32, 1xf32) + scale_11 = paddle._C_ops.scale(matmul_27, full_4, float("0"), True) + del matmul_27 + + # pd_op.add: (1x6x11x11xf32) <- (1x6x11x11xf32, 1x1x1x11xf32) + add_18 = paddle._C_ops.add(scale_11, unsqueeze_0) + del scale_11 + + # pd_op.softmax: (1x6x11x11xf32) <- (1x6x11x11xf32) + softmax_3 = paddle._C_ops.softmax(add_18, -1) + del add_18 + + # pd_op.dropout: (1x6x11x11xf32, 1x6x11x11xui8) <- (1x6x11x11xf32, None, 1xf32) + dropout_20, dropout_21 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_3, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_3 + + # pd_op.matmul: (1x6x11x64xf32) <- (1x6x11x11xf32, 1x6x11x64xf32) + matmul_28 = paddle._C_ops.matmul(dropout_20, transpose_14, False, False) + del dropout_20, transpose_14 + + # pd_op.transpose: (1x11x6x64xf32) <- (1x6x11x64xf32) + transpose_15 = paddle._C_ops.transpose(matmul_28, [0, 2, 1, 3]) + del matmul_28 + + # pd_op.reshape: (1x11x384xf32) <- (1x11x6x64xf32, 3xi64) + reshape_15 = paddle._C_ops.reshape(transpose_15, full_int_array_8) + del transpose_15 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_29 = paddle._C_ops.matmul(reshape_15, parameter_26, False, False) + del parameter_26, reshape_15 + + # pd_op.dropout: (1x11x384xf32, 1x11x384xui8) <- (1x11x384xf32, None, 1xf32) + dropout_22, dropout_23 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_29, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_29 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_19 = paddle._C_ops.add(divide_7, dropout_22) + del divide_7, dropout_22 + + # pd_op.square: (1x11x384xf32) <- (1x11x384xf32) + square_7 = paddle._C_ops.square(add_19) + + # pd_op.mean: (1x11x1xf32) <- (1x11x384xf32, 1xi64) + mean_7 = paddle._C_ops.mean(square_7, full_int_array_1, True) + del square_7 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_12 = paddle._C_ops.scale(mean_7, full_2, float("1e-12"), True) + del mean_7 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_7 = paddle._C_ops.sqrt(scale_12) + del scale_12 + + # pd_op.divide: (1x11x384xf32) <- (1x11x384xf32, 1x11x1xf32) + divide_8 = paddle._C_ops.divide(add_19, sqrt_7) + del add_19, sqrt_7 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_30 = paddle._C_ops.matmul(divide_8, parameter_25, False, False) + del parameter_25 + + # pd_op.relu: (1x11x1536xf32) <- (1x11x1536xf32) + relu_3 = paddle._C_ops.relu(matmul_30) + del matmul_30 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_31 = paddle._C_ops.matmul(relu_3, parameter_24, False, False) + del parameter_24, relu_3 + + # pd_op.dropout: (1x11x384xf32, 1x11x384xui8) <- (1x11x384xf32, None, 1xf32) + dropout_24, dropout_25 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_31, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_31 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_20 = paddle._C_ops.add(divide_8, dropout_24) + del divide_8, dropout_24 + + # pd_op.square: (1x11x384xf32) <- (1x11x384xf32) + square_8 = paddle._C_ops.square(add_20) + + # pd_op.mean: (1x11x1xf32) <- (1x11x384xf32, 1xi64) + mean_8 = paddle._C_ops.mean(square_8, full_int_array_1, True) + del square_8 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_13 = paddle._C_ops.scale(mean_8, full_2, float("1e-12"), True) + del mean_8 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_8 = paddle._C_ops.sqrt(scale_13) + del scale_13 + + # pd_op.divide: (1x11x384xf32) <- (1x11x384xf32, 1x11x1xf32) + divide_9 = paddle._C_ops.divide(add_20, sqrt_8) + del add_20, sqrt_8 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_32 = paddle._C_ops.matmul(divide_9, parameter_23, False, False) + del parameter_23 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_33 = paddle._C_ops.matmul(divide_9, parameter_22, False, False) + del parameter_22 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_34 = paddle._C_ops.matmul(divide_9, parameter_21, False, False) + del parameter_21 + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_16 = paddle._C_ops.reshape(matmul_32, full_int_array_2) + del matmul_32 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_16 = paddle._C_ops.transpose(reshape_16, [0, 2, 1, 3]) + del reshape_16 + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_17 = paddle._C_ops.reshape(matmul_33, full_int_array_2) + del matmul_33 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_17 = paddle._C_ops.transpose(reshape_17, [0, 2, 1, 3]) + del reshape_17 + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_18 = paddle._C_ops.reshape(matmul_34, full_int_array_2) + del matmul_34 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_18 = paddle._C_ops.transpose(reshape_18, [0, 2, 1, 3]) + del reshape_18 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_8 = paddle._C_ops.slice( + parameter_3, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_3 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_9 = paddle._C_ops.slice( + parameter_2, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_2 + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_16 = paddle._C_ops.strided_slice( + transpose_16, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_17 = paddle._C_ops.strided_slice( + transpose_16, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_16 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_32 = paddle._C_ops.multiply(strided_slice_16, slice_9) + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_33 = paddle._C_ops.multiply(strided_slice_17, slice_8) + + # pd_op.subtract: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + subtract_8 = paddle._C_ops.subtract(multiply_32, multiply_33) + del multiply_32, multiply_33 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_34 = paddle._C_ops.multiply(strided_slice_16, slice_8) + del strided_slice_16 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_35 = paddle._C_ops.multiply(strided_slice_17, slice_9) + del strided_slice_17 + + # pd_op.add: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + add_21 = paddle._C_ops.add(multiply_34, multiply_35) + del multiply_34, multiply_35 + + # builtin.combine: ([1x6x11x32xf32, 1x6x11x32xf32]) <- (1x6x11x32xf32, 1x6x11x32xf32) + combine_8 = [subtract_8, add_21] + del add_21, subtract_8 + + # pd_op.stack: (1x6x11x32x2xf32) <- ([1x6x11x32xf32, 1x6x11x32xf32]) + stack_8 = paddle._C_ops.stack(combine_8, -1) + del combine_8 + + # pd_op.flatten: (1x6x11x64xf32) <- (1x6x11x32x2xf32) + flatten_8 = paddle._C_ops.flatten(stack_8, 3, 4) + del stack_8 + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_18 = paddle._C_ops.strided_slice( + transpose_17, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_19 = paddle._C_ops.strided_slice( + transpose_17, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_17 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_36 = paddle._C_ops.multiply(strided_slice_18, slice_9) + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_37 = paddle._C_ops.multiply(strided_slice_19, slice_8) + + # pd_op.subtract: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + subtract_9 = paddle._C_ops.subtract(multiply_36, multiply_37) + del multiply_36, multiply_37 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_38 = paddle._C_ops.multiply(strided_slice_18, slice_8) + del slice_8, strided_slice_18 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_39 = paddle._C_ops.multiply(strided_slice_19, slice_9) + del slice_9, strided_slice_19 + + # pd_op.add: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + add_22 = paddle._C_ops.add(multiply_38, multiply_39) + del multiply_38, multiply_39 + + # builtin.combine: ([1x6x11x32xf32, 1x6x11x32xf32]) <- (1x6x11x32xf32, 1x6x11x32xf32) + combine_9 = [subtract_9, add_22] + del add_22, subtract_9 + + # pd_op.stack: (1x6x11x32x2xf32) <- ([1x6x11x32xf32, 1x6x11x32xf32]) + stack_9 = paddle._C_ops.stack(combine_9, -1) + del combine_9 + + # pd_op.flatten: (1x6x11x64xf32) <- (1x6x11x32x2xf32) + flatten_9 = paddle._C_ops.flatten(stack_9, 3, 4) + del stack_9 + + # pd_op.matmul: (1x6x11x11xf32) <- (1x6x11x64xf32, 1x6x11x64xf32) + matmul_35 = paddle._C_ops.matmul(flatten_8, flatten_9, False, True) + del flatten_8, flatten_9 + + # pd_op.scale: (1x6x11x11xf32) <- (1x6x11x11xf32, 1xf32) + scale_14 = paddle._C_ops.scale(matmul_35, full_4, float("0"), True) + del matmul_35 + + # pd_op.add: (1x6x11x11xf32) <- (1x6x11x11xf32, 1x1x1x11xf32) + add_23 = paddle._C_ops.add(scale_14, unsqueeze_0) + del scale_14 + + # pd_op.softmax: (1x6x11x11xf32) <- (1x6x11x11xf32) + softmax_4 = paddle._C_ops.softmax(add_23, -1) + del add_23 + + # pd_op.dropout: (1x6x11x11xf32, 1x6x11x11xui8) <- (1x6x11x11xf32, None, 1xf32) + dropout_26, dropout_27 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_4, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_4 + + # pd_op.matmul: (1x6x11x64xf32) <- (1x6x11x11xf32, 1x6x11x64xf32) + matmul_36 = paddle._C_ops.matmul(dropout_26, transpose_18, False, False) + del dropout_26, transpose_18 + + # pd_op.transpose: (1x11x6x64xf32) <- (1x6x11x64xf32) + transpose_19 = paddle._C_ops.transpose(matmul_36, [0, 2, 1, 3]) + del matmul_36 + + # pd_op.reshape: (1x11x384xf32) <- (1x11x6x64xf32, 3xi64) + reshape_19 = paddle._C_ops.reshape(transpose_19, full_int_array_8) + del transpose_19 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_37 = paddle._C_ops.matmul(reshape_19, parameter_20, False, False) + del parameter_20, reshape_19 + + # pd_op.dropout: (1x11x384xf32, 1x11x384xui8) <- (1x11x384xf32, None, 1xf32) + dropout_28, dropout_29 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_37, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_37 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_24 = paddle._C_ops.add(divide_9, dropout_28) + del divide_9, dropout_28 + + # pd_op.square: (1x11x384xf32) <- (1x11x384xf32) + square_9 = paddle._C_ops.square(add_24) + + # pd_op.mean: (1x11x1xf32) <- (1x11x384xf32, 1xi64) + mean_9 = paddle._C_ops.mean(square_9, full_int_array_1, True) + del square_9 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_15 = paddle._C_ops.scale(mean_9, full_2, float("1e-12"), True) + del mean_9 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_9 = paddle._C_ops.sqrt(scale_15) + del scale_15 + + # pd_op.divide: (1x11x384xf32) <- (1x11x384xf32, 1x11x1xf32) + divide_10 = paddle._C_ops.divide(add_24, sqrt_9) + del add_24, sqrt_9 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_38 = paddle._C_ops.matmul(divide_10, parameter_19, False, False) + del parameter_19 + + # pd_op.relu: (1x11x1536xf32) <- (1x11x1536xf32) + relu_4 = paddle._C_ops.relu(matmul_38) + del matmul_38 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_39 = paddle._C_ops.matmul(relu_4, parameter_18, False, False) + del parameter_18, relu_4 + + # pd_op.dropout: (1x11x384xf32, 1x11x384xui8) <- (1x11x384xf32, None, 1xf32) + dropout_30, dropout_31 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_39, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_39 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_25 = paddle._C_ops.add(divide_10, dropout_30) + del divide_10, dropout_30 + + # pd_op.square: (1x11x384xf32) <- (1x11x384xf32) + square_10 = paddle._C_ops.square(add_25) + + # pd_op.mean: (1x11x1xf32) <- (1x11x384xf32, 1xi64) + mean_10 = paddle._C_ops.mean(square_10, full_int_array_1, True) + del square_10 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_16 = paddle._C_ops.scale(mean_10, full_2, float("1e-12"), True) + del mean_10 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_10 = paddle._C_ops.sqrt(scale_16) + del scale_16 + + # pd_op.divide: (1x11x384xf32) <- (1x11x384xf32, 1x11x1xf32) + divide_11 = paddle._C_ops.divide(add_25, sqrt_10) + del add_25, sqrt_10 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_40 = paddle._C_ops.matmul(divide_11, parameter_17, False, False) + del parameter_17 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_41 = paddle._C_ops.matmul(divide_11, parameter_16, False, False) + del parameter_16 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_42 = paddle._C_ops.matmul(divide_11, parameter_15, False, False) + del parameter_15 + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_20 = paddle._C_ops.reshape(matmul_40, full_int_array_2) + del matmul_40 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_20 = paddle._C_ops.transpose(reshape_20, [0, 2, 1, 3]) + del reshape_20 + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_21 = paddle._C_ops.reshape(matmul_41, full_int_array_2) + del matmul_41 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_21 = paddle._C_ops.transpose(reshape_21, [0, 2, 1, 3]) + del reshape_21 + + # pd_op.reshape: (1x11x6x64xf32) <- (1x11x384xf32, 4xi64) + reshape_22 = paddle._C_ops.reshape(matmul_42, full_int_array_2) + del full_int_array_2, matmul_42 + + # pd_op.transpose: (1x6x11x64xf32) <- (1x11x6x64xf32) + transpose_22 = paddle._C_ops.transpose(reshape_22, [0, 2, 1, 3]) + del reshape_22 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_10 = paddle._C_ops.slice( + parameter_1, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del parameter_1 + + # pd_op.slice: (11x32xf32) <- (512x32xf32, 1xi64, 1xi64) + slice_11 = paddle._C_ops.slice( + parameter_0, [0], full_int_array_3, full_int_array_4, [1], [] + ) + del full_int_array_4, parameter_0 + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_20 = paddle._C_ops.strided_slice( + transpose_20, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_21 = paddle._C_ops.strided_slice( + transpose_20, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del transpose_20 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_40 = paddle._C_ops.multiply(strided_slice_20, slice_11) + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_41 = paddle._C_ops.multiply(strided_slice_21, slice_10) + + # pd_op.subtract: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + subtract_10 = paddle._C_ops.subtract(multiply_40, multiply_41) + del multiply_40, multiply_41 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_42 = paddle._C_ops.multiply(strided_slice_20, slice_10) + del strided_slice_20 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_43 = paddle._C_ops.multiply(strided_slice_21, slice_11) + del strided_slice_21 + + # pd_op.add: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + add_26 = paddle._C_ops.add(multiply_42, multiply_43) + del multiply_42, multiply_43 + + # builtin.combine: ([1x6x11x32xf32, 1x6x11x32xf32]) <- (1x6x11x32xf32, 1x6x11x32xf32) + combine_10 = [subtract_10, add_26] + del add_26, subtract_10 + + # pd_op.stack: (1x6x11x32x2xf32) <- ([1x6x11x32xf32, 1x6x11x32xf32]) + stack_10 = paddle._C_ops.stack(combine_10, -1) + del combine_10 + + # pd_op.flatten: (1x6x11x64xf32) <- (1x6x11x32x2xf32) + flatten_10 = paddle._C_ops.flatten(stack_10, 3, 4) + del stack_10 + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_22 = paddle._C_ops.strided_slice( + transpose_21, [3], full_int_array_3, full_int_array_5, full_int_array_6 + ) + del full_int_array_3 + + # pd_op.strided_slice: (1x6x11x32xf32) <- (1x6x11x64xf32, 1xi64, 1xi64, 1xi64) + strided_slice_23 = paddle._C_ops.strided_slice( + transpose_21, [3], full_int_array_7, full_int_array_5, full_int_array_6 + ) + del full_int_array_5, full_int_array_6, full_int_array_7, transpose_21 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_44 = paddle._C_ops.multiply(strided_slice_22, slice_11) + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_45 = paddle._C_ops.multiply(strided_slice_23, slice_10) + + # pd_op.subtract: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + subtract_11 = paddle._C_ops.subtract(multiply_44, multiply_45) + del multiply_44, multiply_45 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_46 = paddle._C_ops.multiply(strided_slice_22, slice_10) + del slice_10, strided_slice_22 + + # pd_op.multiply: (1x6x11x32xf32) <- (1x6x11x32xf32, 11x32xf32) + multiply_47 = paddle._C_ops.multiply(strided_slice_23, slice_11) + del slice_11, strided_slice_23 + + # pd_op.add: (1x6x11x32xf32) <- (1x6x11x32xf32, 1x6x11x32xf32) + add_27 = paddle._C_ops.add(multiply_46, multiply_47) + del multiply_46, multiply_47 + + # builtin.combine: ([1x6x11x32xf32, 1x6x11x32xf32]) <- (1x6x11x32xf32, 1x6x11x32xf32) + combine_11 = [subtract_11, add_27] + del add_27, subtract_11 + + # pd_op.stack: (1x6x11x32x2xf32) <- ([1x6x11x32xf32, 1x6x11x32xf32]) + stack_11 = paddle._C_ops.stack(combine_11, -1) + del combine_11 + + # pd_op.flatten: (1x6x11x64xf32) <- (1x6x11x32x2xf32) + flatten_11 = paddle._C_ops.flatten(stack_11, 3, 4) + del stack_11 + + # pd_op.matmul: (1x6x11x11xf32) <- (1x6x11x64xf32, 1x6x11x64xf32) + matmul_43 = paddle._C_ops.matmul(flatten_10, flatten_11, False, True) + del flatten_10, flatten_11 + + # pd_op.scale: (1x6x11x11xf32) <- (1x6x11x11xf32, 1xf32) + scale_17 = paddle._C_ops.scale(matmul_43, full_4, float("0"), True) + del full_4, matmul_43 + + # pd_op.add: (1x6x11x11xf32) <- (1x6x11x11xf32, 1x1x1x11xf32) + add_28 = paddle._C_ops.add(scale_17, unsqueeze_0) + del scale_17, unsqueeze_0 + + # pd_op.softmax: (1x6x11x11xf32) <- (1x6x11x11xf32) + softmax_5 = paddle._C_ops.softmax(add_28, -1) + del add_28 + + # pd_op.dropout: (1x6x11x11xf32, 1x6x11x11xui8) <- (1x6x11x11xf32, None, 1xf32) + dropout_32, dropout_33 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + softmax_5, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del softmax_5 + + # pd_op.matmul: (1x6x11x64xf32) <- (1x6x11x11xf32, 1x6x11x64xf32) + matmul_44 = paddle._C_ops.matmul(dropout_32, transpose_22, False, False) + del dropout_32, transpose_22 + + # pd_op.transpose: (1x11x6x64xf32) <- (1x6x11x64xf32) + transpose_23 = paddle._C_ops.transpose(matmul_44, [0, 2, 1, 3]) + del matmul_44 + + # pd_op.reshape: (1x11x384xf32) <- (1x11x6x64xf32, 3xi64) + reshape_23 = paddle._C_ops.reshape(transpose_23, full_int_array_8) + del full_int_array_8, transpose_23 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x384xf32, 384x384xf32) + matmul_45 = paddle._C_ops.matmul(reshape_23, parameter_14, False, False) + del parameter_14, reshape_23 + + # pd_op.dropout: (1x11x384xf32, 1x11x384xui8) <- (1x11x384xf32, None, 1xf32) + dropout_34, dropout_35 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_45, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del matmul_45 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_29 = paddle._C_ops.add(divide_11, dropout_34) + del divide_11, dropout_34 + + # pd_op.square: (1x11x384xf32) <- (1x11x384xf32) + square_11 = paddle._C_ops.square(add_29) + + # pd_op.mean: (1x11x1xf32) <- (1x11x384xf32, 1xi64) + mean_11 = paddle._C_ops.mean(square_11, full_int_array_1, True) + del square_11 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_18 = paddle._C_ops.scale(mean_11, full_2, float("1e-12"), True) + del mean_11 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_11 = paddle._C_ops.sqrt(scale_18) + del scale_18 + + # pd_op.divide: (1x11x384xf32) <- (1x11x384xf32, 1x11x1xf32) + divide_12 = paddle._C_ops.divide(add_29, sqrt_11) + del add_29, sqrt_11 + + # pd_op.matmul: (1x11x1536xf32) <- (1x11x384xf32, 384x1536xf32) + matmul_46 = paddle._C_ops.matmul(divide_12, parameter_13, False, False) + del parameter_13 + + # pd_op.relu: (1x11x1536xf32) <- (1x11x1536xf32) + relu_5 = paddle._C_ops.relu(matmul_46) + del matmul_46 + + # pd_op.matmul: (1x11x384xf32) <- (1x11x1536xf32, 1536x384xf32) + matmul_47 = paddle._C_ops.matmul(relu_5, parameter_12, False, False) + del parameter_12, relu_5 + + # pd_op.dropout: (1x11x384xf32, 1x11x384xui8) <- (1x11x384xf32, None, 1xf32) + dropout_36, dropout_37 = (lambda x, f: f(x))( + paddle._C_ops.dropout( + matmul_47, None, full_3, True, "upscale_in_train", 0, False + ), + lambda out: out if isinstance(out, (list, tuple)) else (out, None), + ) + del full_3, matmul_47 + + # pd_op.add: (1x11x384xf32) <- (1x11x384xf32, 1x11x384xf32) + add_30 = paddle._C_ops.add(divide_12, dropout_36) + del divide_12, dropout_36 + + # pd_op.square: (1x11x384xf32) <- (1x11x384xf32) + square_12 = paddle._C_ops.square(add_30) + + # pd_op.mean: (1x11x1xf32) <- (1x11x384xf32, 1xi64) + mean_12 = paddle._C_ops.mean(square_12, full_int_array_1, True) + del full_int_array_1, square_12 + + # pd_op.scale: (1x11x1xf32) <- (1x11x1xf32, 1xf32) + scale_19 = paddle._C_ops.scale(mean_12, full_2, float("1e-12"), True) + del full_2, mean_12 + + # pd_op.sqrt: (1x11x1xf32) <- (1x11x1xf32) + sqrt_12 = paddle._C_ops.sqrt(scale_19) + del scale_19 + + # pd_op.divide: (1x11x384xf32) <- (1x11x384xf32, 1x11x1xf32) + divide_0 = paddle._C_ops.divide(add_30, sqrt_12) + del add_30, sqrt_12 + + return divide_0 diff --git a/paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/weight_meta.py b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/weight_meta.py new file mode 100644 index 000000000..79b843c36 --- /dev/null +++ b/paddle_samples/PaddleNLP/roformer_v2_chinese_char_small/weight_meta.py @@ -0,0 +1,548 @@ +class Program_weight_tensor_parameter_0: + name = "parameter_0" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_1: + name = "parameter_1" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_2: + name = "parameter_2" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_3: + name = "parameter_3" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_4: + name = "parameter_4" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_5: + name = "parameter_5" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_6: + name = "parameter_6" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_7: + name = "parameter_7" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_8: + name = "parameter_8" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_9: + name = "parameter_9" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_10: + name = "parameter_10" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.353456") + std = float("0.717232") + data = None + + +class Program_weight_tensor_parameter_11: + name = "parameter_11" + shape = [512, 32] + dtype = "float32" + min_val = float("-1.0") + max_val = float("1.0") + mean = float("0.16899") + std = float("0.576272") + data = None + + +class Program_weight_tensor_parameter_12: + name = "parameter_12" + shape = [1536, 384] + dtype = "float32" + min_val = float("-0.0559016") + max_val = float("0.0559016") + mean = float("5.79332e-05") + std = float("0.0322536") + data = None + + +class Program_weight_tensor_parameter_13: + name = "parameter_13" + shape = [384, 1536] + dtype = "float32" + min_val = float("-0.0559014") + max_val = float("0.0559012") + mean = float("-5.71911e-05") + std = float("0.0322937") + data = None + + +class Program_weight_tensor_parameter_14: + name = "parameter_14" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883844") + max_val = float("0.0883846") + mean = float("-0.000257144") + std = float("0.0511248") + data = None + + +class Program_weight_tensor_parameter_15: + name = "parameter_15" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883883") + max_val = float("0.088388") + mean = float("-5.70937e-05") + std = float("0.0509988") + data = None + + +class Program_weight_tensor_parameter_16: + name = "parameter_16" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.088387") + max_val = float("0.0883865") + mean = float("1.15914e-05") + std = float("0.0509112") + data = None + + +class Program_weight_tensor_parameter_17: + name = "parameter_17" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883864") + max_val = float("0.0883866") + mean = float("-0.000153516") + std = float("0.0510409") + data = None + + +class Program_weight_tensor_parameter_18: + name = "parameter_18" + shape = [1536, 384] + dtype = "float32" + min_val = float("-0.0559015") + max_val = float("0.0559017") + mean = float("5.84239e-06") + std = float("0.032251") + data = None + + +class Program_weight_tensor_parameter_19: + name = "parameter_19" + shape = [384, 1536] + dtype = "float32" + min_val = float("-0.0559016") + max_val = float("0.0559017") + mean = float("-7.04655e-05") + std = float("0.0322846") + data = None + + +class Program_weight_tensor_parameter_20: + name = "parameter_20" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883882") + max_val = float("0.0883871") + mean = float("-0.000218827") + std = float("0.0508997") + data = None + + +class Program_weight_tensor_parameter_21: + name = "parameter_21" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883877") + max_val = float("0.0883882") + mean = float("0.000270305") + std = float("0.0510042") + data = None + + +class Program_weight_tensor_parameter_22: + name = "parameter_22" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883879") + max_val = float("0.0883842") + mean = float("-0.000113491") + std = float("0.051107") + data = None + + +class Program_weight_tensor_parameter_23: + name = "parameter_23" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883878") + max_val = float("0.0883863") + mean = float("-0.000157764") + std = float("0.0511552") + data = None + + +class Program_weight_tensor_parameter_24: + name = "parameter_24" + shape = [1536, 384] + dtype = "float32" + min_val = float("-0.0559015") + max_val = float("0.0559017") + mean = float("1.95902e-05") + std = float("0.03228") + data = None + + +class Program_weight_tensor_parameter_25: + name = "parameter_25" + shape = [384, 1536] + dtype = "float32" + min_val = float("-0.0559016") + max_val = float("0.0559017") + mean = float("2.69329e-05") + std = float("0.0322613") + data = None + + +class Program_weight_tensor_parameter_26: + name = "parameter_26" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883859") + max_val = float("0.0883867") + mean = float("0.000142317") + std = float("0.0511159") + data = None + + +class Program_weight_tensor_parameter_27: + name = "parameter_27" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883867") + max_val = float("0.0883874") + mean = float("-0.000166283") + std = float("0.0509369") + data = None + + +class Program_weight_tensor_parameter_28: + name = "parameter_28" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883869") + max_val = float("0.0883878") + mean = float("4.11059e-05") + std = float("0.0510251") + data = None + + +class Program_weight_tensor_parameter_29: + name = "parameter_29" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.088387") + max_val = float("0.0883862") + mean = float("8.54805e-05") + std = float("0.0510926") + data = None + + +class Program_weight_tensor_parameter_30: + name = "parameter_30" + shape = [1536, 384] + dtype = "float32" + min_val = float("-0.0559017") + max_val = float("0.0559015") + mean = float("-3.55108e-05") + std = float("0.0322947") + data = None + + +class Program_weight_tensor_parameter_31: + name = "parameter_31" + shape = [384, 1536] + dtype = "float32" + min_val = float("-0.0559017") + max_val = float("0.0559015") + mean = float("2.03021e-06") + std = float("0.0322442") + data = None + + +class Program_weight_tensor_parameter_32: + name = "parameter_32" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.088387") + max_val = float("0.0883865") + mean = float("-0.000121814") + std = float("0.0510185") + data = None + + +class Program_weight_tensor_parameter_33: + name = "parameter_33" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883883") + max_val = float("0.0883881") + mean = float("-7.98854e-05") + std = float("0.0510031") + data = None + + +class Program_weight_tensor_parameter_34: + name = "parameter_34" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883873") + max_val = float("0.0883867") + mean = float("0.00016222") + std = float("0.0510736") + data = None + + +class Program_weight_tensor_parameter_35: + name = "parameter_35" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883876") + max_val = float("0.0883879") + mean = float("0.000130981") + std = float("0.0510446") + data = None + + +class Program_weight_tensor_parameter_36: + name = "parameter_36" + shape = [1536, 384] + dtype = "float32" + min_val = float("-0.0559015") + max_val = float("0.0559017") + mean = float("-7.10495e-05") + std = float("0.0322749") + data = None + + +class Program_weight_tensor_parameter_37: + name = "parameter_37" + shape = [384, 1536] + dtype = "float32" + min_val = float("-0.0559013") + max_val = float("0.0559015") + mean = float("-1.52181e-05") + std = float("0.0322673") + data = None + + +class Program_weight_tensor_parameter_38: + name = "parameter_38" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883881") + max_val = float("0.0883829") + mean = float("-1.35497e-05") + std = float("0.0510275") + data = None + + +class Program_weight_tensor_parameter_39: + name = "parameter_39" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883852") + max_val = float("0.0883882") + mean = float("8.21733e-06") + std = float("0.0510552") + data = None + + +class Program_weight_tensor_parameter_40: + name = "parameter_40" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883879") + max_val = float("0.088388") + mean = float("-8.87152e-05") + std = float("0.0510392") + data = None + + +class Program_weight_tensor_parameter_41: + name = "parameter_41" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883882") + max_val = float("0.0883874") + mean = float("-0.000119293") + std = float("0.0511082") + data = None + + +class Program_weight_tensor_parameter_42: + name = "parameter_42" + shape = [1536, 384] + dtype = "float32" + min_val = float("-0.0559016") + max_val = float("0.0559017") + mean = float("1.8159e-05") + std = float("0.0322383") + data = None + + +class Program_weight_tensor_parameter_43: + name = "parameter_43" + shape = [384, 1536] + dtype = "float32" + min_val = float("-0.0559016") + max_val = float("0.0559016") + mean = float("-1.48293e-05") + std = float("0.0322477") + data = None + + +class Program_weight_tensor_parameter_44: + name = "parameter_44" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883876") + max_val = float("0.0883879") + mean = float("3.05728e-05") + std = float("0.0510392") + data = None + + +class Program_weight_tensor_parameter_45: + name = "parameter_45" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883878") + max_val = float("0.0883857") + mean = float("-0.000111558") + std = float("0.0509584") + data = None + + +class Program_weight_tensor_parameter_46: + name = "parameter_46" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883879") + max_val = float("0.088388") + mean = float("0.000109751") + std = float("0.0511208") + data = None + + +class Program_weight_tensor_parameter_47: + name = "parameter_47" + shape = [384, 384] + dtype = "float32" + min_val = float("-0.0883852") + max_val = float("0.0883875") + mean = float("5.14273e-06") + std = float("0.0510309") + data = None + + +class Program_weight_tensor_parameter_48: + name = "parameter_48" + shape = [2, 384] + dtype = "float32" + min_val = float("-0.124387") + max_val = float("0.124664") + mean = float("-0.00277931") + std = float("0.0731551") + data = None + + +class Program_weight_tensor_parameter_49: + name = "parameter_49" + shape = [12000, 384] + dtype = "float32" + min_val = float("-0.0220113") + max_val = float("0.0220113") + mean = float("-2.98565e-07") + std = float("0.0127091") + data = None diff --git a/paddle_samples/PaddleNLP/xlnet-base-cased/graph_hash.txt b/paddle_samples/PaddleNLP/xlnet-base-cased/graph_hash.txt new file mode 100644 index 000000000..6d6170e69 --- /dev/null +++ b/paddle_samples/PaddleNLP/xlnet-base-cased/graph_hash.txt @@ -0,0 +1 @@ +8db5aa555c925a6ed43b42e22d96476959bdbe685aef0388f63f0a7d6a8fccbb \ No newline at end of file diff --git a/paddle_samples/PaddleNLP/xlnet-large-cased/graph_hash.txt b/paddle_samples/PaddleNLP/xlnet-large-cased/graph_hash.txt new file mode 100644 index 000000000..0900b81f3 --- /dev/null +++ b/paddle_samples/PaddleNLP/xlnet-large-cased/graph_hash.txt @@ -0,0 +1 @@ +0177dca2a28dd7aff031ea9fa6a0a662a60de0a1f313ce7f2256bcf6c80b05ba \ No newline at end of file