## Shardy:MPMD intro for JAX users

Shardy:MPMD is a new MPMD partitioning system, built in MLIR and integrated on top of JAX.

This colab demonstrates how to use MPMD pipelining for JAX users who use `jax.jit`. See our RFC for more details.

**Note**: This colab is purely read-only, and cannot be executed until we
fully open source all the components.

## Overview
This colab starts by
1. Defining a simplified Transformer (without the encode and decode stages) in SPMD with jax.jit and some sharding, and then
2. Demonstrates how to pipeline it using MPMD using different schedules.



### Set up
We connect to the Pathways server, inspect the devices, and load each slice into its own mesh. Mesh names are "m0", "m1", ...

This colab assumes we have 8 devices.

In [None]:
# @title Imports and connect to Pathways server { form-width: "80px" }
pathways_server_xid = 171321046  # @param {type: "number"}

from pprint import pprint

import jax
import jax.numpy as jnp
import numpy as np

import mpmd # Shardy MPMD python lib
import pathways_launch

jax.config.update('jax_use_shardy_partitioner', True)

# Mock API to connect to Pathways on Cloud TPUs.
pathways_launch.connect(pathways_server_xid)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
# @title Check devices:
print(f"Total num devices: {len(jax.devices())}")
mesh = jax.sharding.Mesh(np.array(jax.devices()).reshape(4,2), ("stage", "data"))
print("Base mesh: ", mesh)

Total num devices: 8
Base mesh:  Mesh('stage': 4, 'data': 2, axis_types=(Auto, Auto))


In [None]:
# @title Set up basic topology and assignment
topology = {}
for i in range(mesh.devices.shape[0]):
  topology[f"m{i}"] = jax.sharding.Mesh(
      mesh.devices[i].reshape(1, 2), ("stage", "data")
  )


print("MPMD topology: ")
pprint(topology)

MPMD topology: 
{'m0': Mesh(device_ids=array([[0, 1]]), axis_names=('stage', 'data'), axis_types=(Auto, Auto)),
 'm1': Mesh(device_ids=array([[2, 3]]), axis_names=('stage', 'data'), axis_types=(Auto, Auto)),
 'm2': Mesh(device_ids=array([[4, 5]]), axis_names=('stage', 'data'), axis_types=(Auto, Auto)),
 'm3': Mesh(device_ids=array([[6, 7]]), axis_names=('stage', 'data'), axis_types=(Auto, Auto))}


### Define a basic Transformer and util functions

In [None]:
import flax.linen as nn

# Define model parameters
BATCH_SIZE = 2
SEQ_LEN = 8 * 1024
D_MODEL = 1024
MLP_DIM = 4 * D_MODEL
NUM_LAYERS = 24


class Block(nn.Module):

  @nn.remat
  @nn.jit
  @nn.compact
  def __call__(self, x):
    attn_output = nn.MultiHeadDotProductAttention(num_heads=8, qkv_features=16)(
        x
    )
    x = x + attn_output
    x = nn.LayerNorm()(x)

    # Feed-forward network
    mlp_output = nn.Dense(features=MLP_DIM)(x)
    mlp_output = nn.gelu(mlp_output)
    mlp_output = nn.Dense(features=x.shape[-1])(mlp_output)
    x = x + mlp_output
    x = nn.LayerNorm()(x)

    return x


class Transformer(nn.Module):

  @nn.compact
  def __call__(self, x):
    for i in range(NUM_LAYERS):
      x = Block(name=f"block_{i}")(x)
    return x


# Initialize the model's parameters
dummy_input = jnp.ones((BATCH_SIZE, SEQ_LEN, D_MODEL))
transformer = Transformer()
key = jax.random.PRNGKey(0)
params = transformer.init(key, dummy_input)["params"]

print("Model initialized successfully!")

Model initialized successfully!


In [None]:
from flax.training import train_state
import optax
import time

optimizer = optax.adamw(learning_rate=0.001)
state = train_state.TrainState.create(
    apply_fn=transformer.apply, params=params, tx=optimizer
)


# Define the training step.
def train_step(state, xs, targets):
  """Trains the model for one step."""

  def loss_fn(params, x, targets):
    predictions = state.apply_fn({"params": params}, x)
    return jnp.mean((predictions - targets) ** 2)

  loss_acc, grads_acc = None, None
  for x in xs:
    loss, grads = jax.value_and_grad(loss_fn)(state.params, x, targets)

    loss_acc = loss if loss_acc is None else loss_acc + loss
    grads_acc = (
        grads
        if grads_acc is None
        else jax.tree.map(lambda x, y: x + y, grads_acc, grads)
    )

  state = state.apply_gradients(grads=grads_acc)
  return state, loss_acc


def train_with_progress(train_step, inputs, num_steps=3):
  updated_state, x, targets = inputs
  training_loss = None
  # Warmup
  jax.block_until_ready(train_step(updated_state, x, targets))

  start_time = time.perf_counter()
  for i in range(num_steps):
    updated_state, training_loss = train_step(updated_state, x, targets)
    if i % 2 == 1:
      print(f"Training loss after step {i+1}: {training_loss}")

  jax.block_until_ready(updated_state)
  end_time = time.perf_counter()
  print(f"Final training loss: {training_loss}")
  print(f"Training took: {end_time - start_time:.2f} seconds")

print("Model util functions initialized.")

Model util functions initialized.


### Run the Transformer

In [None]:
# Set up inputs.
# We set the microbatches to num_pipeline stages as that's what we'll
# use for the pipelining.
NUM_PIPELINE_STAGE = len(topology)
NUM_MB = NUM_PIPELINE_STAGE
print("Num microbatches: ", NUM_MB)

xs = tuple([jnp.ones_like(dummy_input)] * NUM_MB)
inputs = (state, xs, dummy_input)

def get_param_sharding(x):
  if len(getattr(x, "shape", [])) > 0:
    return jax.sharding.NamedSharding(
        mesh,
        jax.sharding.PartitionSpec(("stage", "data")),
    )
  else:
    return jax.sharding.NamedSharding(
        mesh,
        jax.sharding.PartitionSpec(),
    )

# Data parallel + ZeRO 3 sharding on stage + data.
in_shardings = (
    jax.tree.map(get_param_sharding, state),
    jax.sharding.NamedSharding(
        mesh,
        jax.sharding.PartitionSpec("data"),
    ),
    jax.sharding.NamedSharding(
        mesh,
        jax.sharding.PartitionSpec("data"),
    ),
)

Num microbatches:  4


In [None]:
# Simple SPMD training with micro-batching.
jitted_train_step = jax.jit(train_step, in_shardings=in_shardings)
compiled = jitted_train_step.lower(*inputs).compile()
sharded_inputs = jax.device_put(inputs, in_shardings)

train_with_progress(compiled, sharded_inputs)

Training loss after step 2: 7.991689205169678
Final training loss: 7.980075836181641
Training took: 10.06 seconds


Profile:

![spmd_xprof](https://raw.githubusercontent.com/openxla/shardy/main/rfcs/images/2025-06-18-mpmd-rfc/spmd_xprof.png)

### Pipeline the transformer

In [None]:
# To use MPMD, annotate the transformer and use mpmd.call instead of a for loop.
class AnnotatedTransformer(nn.Module):

  @nn.compact
  def __call__(self, x):
    for i in range(NUM_LAYERS):
      x = mpmd.flax.named_computation(Block, name=f"block_{i}")()(x)
    return x


def mpmd_train_step(state, xs, targets):
  """Trains the model for one step with mpmd microbatching."""

  def loss_fn(params, x):
    predictions = state.apply_fn({"params": params}, x)
    return jnp.mean((predictions - targets) ** 2)

  carry = jnp.zeros(()), jax.tree.map(jnp.zeros_like, state.params)

  # Accumulation is inside the mpmd.call, to ensure that the accumulation
  # is done as we go along. E.g. instead of at the end, which would be bad
  # for memory.
  def microbatch_step(carry, params ,x):
    val_and_grad = jax.value_and_grad(loss_fn)(params, x)
    carry = jax.tree.map(lambda x, y: x + y, carry, val_and_grad)
    return carry

  for i, x in enumerate(xs):
    # Note the mpmd.call here, with call counter, wrapping the accumulation
    # function.
    carry = mpmd.call(microbatch_step, call_counter=i)(carry, state.params, x)

  loss_acc, grads_acc = carry
  state = state.apply_gradients(grads=grads_acc)
  return state, loss_acc


annotated_transformer = AnnotatedTransformer()
annotated_params = annotated_transformer.init(key, dummy_input)["params"]
annotated_state = train_state.TrainState.create(
    apply_fn=annotated_transformer.apply, params=annotated_params, tx=optimizer
)
annotated_placeholder_inputs = (annotated_state, xs, dummy_input)

basic_assignment = {}
for i in range(NUM_LAYERS):
  layers_per_mesh = NUM_LAYERS // len(topology)
  mesh_idx = min(i // layers_per_mesh, len(topology) - 1)
  basic_assignment[f"block_{i}"] = f"m{mesh_idx}"


print("Name to mesh assignment:")
pprint(basic_assignment)

mpmd_config = mpmd.make_config(
    topology=topology,
    name_to_mesh_assignment=basic_assignment,
    partitioning_options=mpmd.make_partitioning_options({
        "mpmd_pipeline_schedule": "ONE_FWD_ONE_BWD",
    }),
)

Name to mesh assignment:
{'block_0': 'm0',
 'block_1': 'm0',
 'block_10': 'm1',
 'block_11': 'm1',
 'block_12': 'm2',
 'block_13': 'm2',
 'block_14': 'm2',
 'block_15': 'm2',
 'block_16': 'm2',
 'block_17': 'm2',
 'block_18': 'm3',
 'block_19': 'm3',
 'block_2': 'm0',
 'block_20': 'm3',
 'block_21': 'm3',
 'block_22': 'm3',
 'block_23': 'm3',
 'block_3': 'm0',
 'block_4': 'm0',
 'block_5': 'm0',
 'block_6': 'm1',
 'block_7': 'm1',
 'block_8': 'm1',
 'block_9': 'm1'}


In [None]:
# Similar to before, except the stage axis is now used for pipelining,
# so we don't shard on it.
def get_sharding_for_pipeline_state(x):
  if len(getattr(x, "shape", [])) > 0:
    return jax.sharding.NamedSharding(
        mpmd_config.sharding_mesh,
        jax.sharding.PartitionSpec("data"),
    )
  else:
    return jax.sharding.NamedSharding(
        mpmd_config.sharding_mesh,
        jax.sharding.PartitionSpec(),
    )


# Data parallel.
in_shardings = (
    jax.tree.map(get_sharding_for_pipeline_state, annotated_state),
    jax.sharding.NamedSharding(
        mpmd_config.sharding_mesh,
        jax.sharding.PartitionSpec("data"),
    ),
    jax.sharding.NamedSharding(
        mpmd_config.sharding_mesh,
        jax.sharding.PartitionSpec("data"),
    ),
)

#### Running the pipelined transformer

Now, we execute the transformer.

Note that we've not had to annotate other parts
of our program, e.g. we've not had to annotate the optimizer, nor the loss.
We've also not had to do anything with the gradient computations. These are
handled by the compiler and merged into an appropriate program.

Furthermore, we've not introduced any cross-mesh transfers explicitly. These
are automatically created on the name-to-name boundaries, e.g. when going from
"layer{i}" to "layer{i+1}", if they are assigned to different meshes, we create
the cross-mesh transfer.

We execute the transformer with various schedules, with the schedule applied at
jit-time. This can also be manually orchestrated with `mpmd.jit`, but we've
found the flexibility to be beneficial.

Note in the profile below, that some of the blocks have been compiled to
multiple programs. E.g. the backward computation of blocks 0..5 have programs
p7, p10 and p14. This is because of how we've merged in the unannotated ops.
The first backward computation p7 will have the gradient accumulators initialized, and the last one will have the param updates, which is why they
are different.

In [None]:
mpmd_jitted_train = mpmd.jit(
    mpmd_train_step,
    mpmd_config=mpmd_config,
    in_shardings=in_shardings,
    # Partitioning API is a work-in-progress. For now we've hardcoded the
    # schedule, but in the future we'll expose fine-grained control as in the
    # RFC.
    partitioning_options=mpmd.make_partitioning_options({
        "mpmd_pipeline_schedule": "ONE_FWD_ONE_BWD",
    }),
).lower(*annotated_placeholder_inputs)
mpmd_compiled = mpmd_jitted_train.compile()

# With MPMD, we need to be more careful with state, and make sure it's on the
# right devices.
pipelined_inputs = jax.device_put(
    annotated_placeholder_inputs,
    mpmd_jitted_train.function_named_shardings.input_specs,
)

print("Running program with schedule: ONE_FWD_ONE_BWD")
train_with_progress(mpmd_compiled, pipelined_inputs)

Running program with schedule: ONE_FWD_ONE_BWD
Training loss after step 2: 7.992943286895752
Final training loss: 7.976408004760742
Training took: 11.30 seconds


Profile:

![mpmd_1f1b_xprof](https://raw.githubusercontent.com/openxla/shardy/main/rfcs/images/2025-06-18-mpmd-rfc/mpmd_1f1b_xprof.png)

In [None]:
def run_xprof_with_schedule(schedule, assignment, stage_assignment=None):
  print(f"Running program with schedule: {schedule}")

  options = {"mpmd_pipeline_schedule": schedule}
  mpmd_jitted_train = mpmd.jit(
      mpmd_train_step,
      mpmd_config=mpmd.make_config(
          topology=topology,
          name_to_mesh_assignment=assignment,
          name_to_stage_assignment=stage_assignment,
          partitioning_options=mpmd.make_partitioning_options(options),
      ),
      in_shardings=in_shardings,
  ).lower(*annotated_placeholder_inputs)
  mpmd_compiled = mpmd_jitted_train.compile()

  pipelined_inputs = jax.device_put(
      annotated_placeholder_inputs,
      mpmd_jitted_train.function_named_shardings.input_specs,
  )

  train_with_progress(mpmd_compiled, pipelined_inputs)


In [None]:
run_xprof_with_schedule("GPIPE", basic_assignment)

Running program with schedule: GPIPE
Training loss after step 2: 7.992943286895752
Final training loss: 7.976408004760742
Training took: 11.35 seconds


Profile:

![mpmd_gpipe_xprof](https://raw.githubusercontent.com/openxla/shardy/main/rfcs/images/2025-06-18-mpmd-rfc/mpmd_gpipe_xprof.png)

In [None]:
circular_assignment = {}
stage_assignment = {}
for i in range(NUM_LAYERS):
  circular_assignment[f"block_{i}"] = f"m{i % len(topology)}"
  stage_assignment[f"block_{i}"] = i // 2

run_xprof_with_schedule("CIRCULAR", circular_assignment, stage_assignment)

Running program with schedule: CIRCULAR
Training loss after step 2: 7.992943286895752
Final training loss: 7.976408004760742
Training took: 7.46 seconds


Profile:

![mpmd_circular_xprof](https://raw.githubusercontent.com/openxla/shardy/main/rfcs/images/2025-06-18-mpmd-rfc/mpmd_circular_xprof.png)

In [None]:
# Print the main func body of the original MPMD program (1F1B)
mlir_module = mpmd_jitted_train.as_text("mpmd")
truncated_mlir_module = mlir_module.split("func.func")[1]
print("func.func" + truncated_mlir_module)

Note: Printing the output of above in a text cell, to get nicer formatting.

```
func.func public @main(%arg0: !mpmd.mesh_tensor<"m0", tensor<i32>, sharding=<@mesh, []>>, %arg1: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg2: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg3: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg4: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg5: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg6: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg7: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg8: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg9: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg10: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg11: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg12: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg13: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg14: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg15: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg16: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg17: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg18: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg19: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg20: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg21: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg22: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg23: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg24: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg25: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg26: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg27: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg28: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg29: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg30: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg31: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg32: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg33: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg34: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg35: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg36: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg37: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg38: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg39: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg40: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg41: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg42: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg43: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg44: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg45: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg46: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg47: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg48: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg49: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg50: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg51: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg52: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg53: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg54: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg55: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg56: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg57: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg58: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg59: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg60: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg61: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg62: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg63: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg64: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg65: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg66: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg67: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg68: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg69: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg70: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg71: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg72: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg73: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg74: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg75: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg76: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg77: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg78: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg79: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg80: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg81: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg82: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg83: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg84: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg85: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg86: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg87: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg88: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg89: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg90: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg91: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg92: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg93: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg94: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg95: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg96: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg97: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg98: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg99: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg100: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg101: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg102: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg103: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg104: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg105: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg106: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg107: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg108: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg109: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg110: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg111: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg112: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg113: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg114: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg115: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg116: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg117: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg118: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg119: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg120: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg121: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg122: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg123: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg124: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg125: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg126: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg127: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg128: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg129: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg130: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg131: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg132: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg133: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg134: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg135: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg136: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg137: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg138: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg139: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg140: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg141: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg142: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg143: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg144: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg145: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg146: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg147: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg148: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg149: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg150: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg151: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg152: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg153: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg154: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg155: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg156: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg157: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg158: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg159: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg160: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg161: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg162: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg163: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg164: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg165: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg166: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg167: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg168: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg169: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg170: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg171: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg172: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg173: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg174: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg175: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg176: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg177: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg178: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg179: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg180: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg181: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg182: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg183: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg184: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg185: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg186: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg187: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg188: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg189: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg190: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg191: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg192: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg193: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg194: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg195: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg196: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg197: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg198: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg199: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg200: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg201: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg202: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg203: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg204: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg205: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg206: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg207: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg208: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg209: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg210: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg211: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg212: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg213: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg214: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg215: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg216: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg217: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg218: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg219: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg220: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg221: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg222: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg223: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg224: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg225: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg226: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg227: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg228: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg229: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg230: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg231: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg232: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg233: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg234: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg235: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg236: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg237: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg238: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg239: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg240: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg241: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg242: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg243: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg244: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg245: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg246: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg247: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg248: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg249: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg250: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg251: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg252: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg253: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg254: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg255: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg256: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg257: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg258: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg259: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg260: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg261: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg262: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg263: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg264: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg265: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg266: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg267: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg268: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg269: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg270: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg271: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg272: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg273: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg274: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg275: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg276: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg277: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg278: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg279: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg280: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg281: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg282: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg283: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg284: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg285: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg286: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg287: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg288: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg289: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg290: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg291: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg292: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg293: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg294: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg295: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg296: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg297: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg298: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg299: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg300: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg301: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg302: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg303: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg304: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg305: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg306: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg307: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg308: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg309: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg310: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg311: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg312: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg313: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg314: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg315: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg316: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg317: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg318: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg319: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg320: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg321: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg322: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg323: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg324: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg325: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg326: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg327: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg328: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg329: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg330: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg331: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg332: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg333: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg334: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg335: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg336: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg337: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg338: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg339: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg340: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg341: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg342: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg343: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg344: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg345: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg346: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg347: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg348: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg349: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg350: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg351: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg352: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg353: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg354: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg355: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg356: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg357: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg358: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg359: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg360: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg361: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg362: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg363: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg364: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg365: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg366: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg367: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg368: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg369: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg370: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg371: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg372: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg373: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg374: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg375: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg376: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg377: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg378: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg379: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg380: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg381: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg382: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg383: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg384: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg385: !mpmd.mesh_tensor<"m0", tensor<i32>, sharding=<@mesh, []>>, %arg386: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg387: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg388: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg389: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg390: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg391: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg392: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg393: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg394: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg395: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg396: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg397: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg398: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg399: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg400: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg401: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg402: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg403: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg404: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg405: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg406: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg407: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg408: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg409: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg410: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg411: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg412: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg413: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg414: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg415: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg416: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg417: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg418: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg419: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg420: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg421: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg422: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg423: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg424: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg425: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg426: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg427: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg428: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg429: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg430: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg431: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg432: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg433: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg434: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg435: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg436: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg437: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg438: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg439: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg440: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg441: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg442: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg443: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg444: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg445: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg446: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg447: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg448: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg449: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg450: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg451: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg452: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg453: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg454: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg455: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg456: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg457: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg458: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg459: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg460: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg461: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg462: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg463: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg464: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg465: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg466: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg467: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg468: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg469: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg470: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg471: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg472: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg473: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg474: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg475: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg476: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg477: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg478: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg479: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg480: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg481: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg482: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg483: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg484: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg485: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg486: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg487: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg488: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg489: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg490: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg491: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg492: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg493: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg494: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg495: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg496: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg497: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg498: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg499: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg500: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg501: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg502: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg503: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg504: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg505: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg506: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg507: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg508: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg509: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg510: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg511: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg512: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg513: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg514: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg515: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg516: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg517: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg518: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg519: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg520: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg521: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg522: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg523: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg524: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg525: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg526: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg527: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg528: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg529: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg530: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg531: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg532: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg533: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg534: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg535: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg536: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg537: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg538: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg539: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg540: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg541: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg542: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg543: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg544: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg545: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg546: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg547: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg548: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg549: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg550: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg551: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg552: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg553: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg554: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg555: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg556: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg557: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg558: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg559: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg560: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg561: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg562: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg563: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg564: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg565: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg566: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg567: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg568: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg569: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg570: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg571: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg572: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg573: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg574: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg575: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg576: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg577: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg578: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg579: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg580: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg581: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg582: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg583: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg584: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg585: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg586: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg587: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg588: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg589: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg590: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg591: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg592: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg593: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg594: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg595: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg596: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg597: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg598: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg599: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg600: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg601: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg602: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg603: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg604: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg605: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg606: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg607: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg608: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg609: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg610: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg611: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg612: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg613: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg614: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg615: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg616: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg617: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg618: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg619: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg620: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg621: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg622: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg623: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg624: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg625: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg626: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg627: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg628: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg629: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg630: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg631: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg632: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg633: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg634: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg635: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg636: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg637: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg638: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg639: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg640: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg641: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg642: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg643: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg644: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg645: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg646: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg647: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg648: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg649: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg650: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg651: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg652: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg653: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg654: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg655: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg656: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg657: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg658: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg659: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg660: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg661: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg662: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg663: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg664: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg665: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg666: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg667: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg668: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg669: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg670: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg671: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg672: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg673: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg674: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg675: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg676: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg677: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg678: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg679: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg680: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg681: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg682: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg683: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg684: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg685: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg686: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg687: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg688: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg689: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg690: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg691: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg692: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg693: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg694: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg695: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg696: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg697: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg698: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg699: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg700: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg701: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg702: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg703: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg704: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg705: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg706: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg707: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg708: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg709: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg710: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg711: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg712: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg713: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg714: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg715: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg716: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg717: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg718: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg719: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg720: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg721: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg722: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg723: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg724: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg725: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg726: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg727: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg728: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg729: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg730: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg731: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg732: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg733: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg734: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg735: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg736: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg737: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg738: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg739: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg740: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg741: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg742: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg743: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg744: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg745: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg746: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg747: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg748: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg749: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg750: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg751: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg752: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg753: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg754: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg755: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg756: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg757: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg758: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg759: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg760: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg761: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg762: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg763: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg764: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg765: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg766: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg767: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg768: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg769: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg770: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg771: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg772: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg773: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg774: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg775: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg776: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg777: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg778: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg779: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg780: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg781: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg782: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg783: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg784: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg785: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg786: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg787: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg788: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg789: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg790: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg791: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg792: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg793: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg794: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg795: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg796: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg797: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg798: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg799: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg800: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg801: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg802: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg803: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg804: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg805: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg806: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg807: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg808: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg809: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg810: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg811: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg812: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg813: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg814: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg815: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg816: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg817: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg818: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg819: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg820: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg821: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg822: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg823: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg824: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg825: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg826: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg827: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg828: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg829: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg830: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg831: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg832: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg833: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg834: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg835: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg836: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg837: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg838: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg839: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg840: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg841: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg842: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg843: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg844: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg845: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg846: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg847: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg848: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg849: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg850: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg851: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg852: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg853: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg854: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg855: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg856: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg857: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg858: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg859: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg860: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg861: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg862: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg863: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg864: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg865: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg866: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg867: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg868: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg869: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg870: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg871: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg872: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg873: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg874: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg875: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg876: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg877: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg878: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg879: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg880: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg881: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg882: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg883: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg884: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg885: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg886: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg887: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg888: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg889: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg890: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg891: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg892: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg893: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg894: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg895: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg896: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg897: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg898: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg899: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg900: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg901: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg902: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg903: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg904: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg905: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg906: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg907: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg908: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg909: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg910: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg911: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg912: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg913: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg914: !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg915: !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg916: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg917: !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg918: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg919: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg920: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg921: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg922: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg923: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg924: !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg925: !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg926: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg927: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg928: !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg929: !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg930: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg931: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg932: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg933: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg934: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg935: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg936: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg937: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg938: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg939: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg940: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg941: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg942: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg943: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg944: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg945: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg946: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg947: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg948: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg949: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg950: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg951: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg952: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg953: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg954: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg955: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg956: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg957: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg958: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg959: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg960: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg961: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg962: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg963: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg964: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg965: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg966: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg967: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg968: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg969: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg970: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg971: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg972: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg973: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg974: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg975: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg976: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg977: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg978: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg979: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg980: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg981: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg982: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg983: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg984: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg985: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg986: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg987: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg988: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg989: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg990: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg991: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg992: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg993: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg994: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg995: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg996: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg997: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg998: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg999: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1000: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1001: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1002: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1003: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1004: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1005: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1006: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1007: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1008: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1009: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1010: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg1011: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1012: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1013: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1014: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1015: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1016: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1017: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1018: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1019: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1020: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1021: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1022: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1023: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1024: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1025: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1026: !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg1027: !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1028: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1029: !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1030: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1031: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1032: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1033: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1034: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1035: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1036: !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1037: !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1038: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1039: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1040: !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1041: !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1042: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg1043: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1044: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1045: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1046: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1047: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1048: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1049: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1050: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1051: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1052: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1053: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1054: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1055: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1056: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1057: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1058: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg1059: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1060: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1061: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1062: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1063: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1064: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1065: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1066: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1067: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1068: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1069: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1070: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1071: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1072: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1073: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1074: !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg1075: !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1076: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1077: !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1078: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1079: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1080: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1081: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1082: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1083: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1084: !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1085: !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1086: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1087: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1088: !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1089: !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1090: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg1091: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1092: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1093: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1094: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1095: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1096: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1097: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1098: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1099: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1100: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1101: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1102: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1103: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1104: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1105: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1106: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg1107: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1108: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1109: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1110: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1111: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1112: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1113: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1114: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1115: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1116: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1117: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1118: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1119: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1120: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1121: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1122: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg1123: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1124: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1125: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1126: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1127: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1128: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1129: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1130: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1131: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1132: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1133: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1134: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1135: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1136: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1137: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1138: !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, %arg1139: !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1140: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1141: !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1142: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1143: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1144: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1145: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1146: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1147: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1148: !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, %arg1149: !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1150: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1151: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1152: !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, %arg1153: !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1154: !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1155: !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1156: !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1157: !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, %arg1158: !mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m0", tensor<i32>, sharding=<@mesh, []>> {jax.result_info = "result[0].step"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_0']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_0']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_0']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_0']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_0']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_0']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_0']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_0']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_1']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_1']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_1']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_1']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_1']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_1']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_1']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_1']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_10']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_10']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_10']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_10']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_10']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_10']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_10']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_10']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_11']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_11']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_11']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_11']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_11']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_11']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_11']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_11']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_12']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_12']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_12']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_12']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_12']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_12']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_12']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_12']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_13']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_13']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_13']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_13']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_13']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_13']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_13']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_13']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_14']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_14']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_14']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_14']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_14']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_14']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_14']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_14']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_15']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_15']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_15']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_15']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_15']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_15']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_15']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_15']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_16']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_16']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_16']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_16']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_16']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_16']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_16']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_16']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_17']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_17']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_17']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_17']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_17']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_17']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_17']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_17']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_18']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_18']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_18']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_18']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_18']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_18']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_18']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_18']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_19']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_19']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_19']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_19']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_19']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_19']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_19']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_19']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_2']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_2']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_2']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_2']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_2']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_2']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_2']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_2']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_20']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_20']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_20']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_20']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_20']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_20']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_20']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_20']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_21']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_21']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_21']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_21']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_21']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_21']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_21']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_21']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_22']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_22']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_22']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_22']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_22']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_22']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_22']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_22']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_23']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_23']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_23']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_23']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_23']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_23']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_23']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_23']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_3']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_3']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_3']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_3']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_3']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_3']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_3']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_3']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_4']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_4']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_4']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_4']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_4']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_4']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_4']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_4']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_5']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_5']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_5']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_5']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_5']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_5']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_5']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_5']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_6']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_6']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_6']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_6']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_6']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_6']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_6']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_6']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_7']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_7']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_7']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_7']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_7']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_7']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_7']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_7']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_8']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_8']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_8']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_8']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_8']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_8']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_8']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_8']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_9']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_9']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_9']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_9']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_9']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_9']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_9']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_9']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].params['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].params['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<i32>, sharding=<@mesh, []>> {jax.result_info = "result[0].opt_state[0].count"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_0']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_0']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_0']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_0']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_0']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_0']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_0']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_0']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_1']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_1']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_1']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_1']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_1']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_1']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_1']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_1']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_10']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_10']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_10']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_10']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_10']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_10']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_10']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_10']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_11']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_11']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_11']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_11']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_11']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_11']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_11']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_11']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_12']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_12']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_12']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_12']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_12']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_12']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_12']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_12']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_13']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_13']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_13']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_13']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_13']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_13']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_13']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_13']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_14']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_14']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_14']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_14']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_14']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_14']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_14']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_14']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_15']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_15']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_15']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_15']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_15']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_15']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_15']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_15']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_16']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_16']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_16']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_16']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_16']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_16']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_16']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_16']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_17']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_17']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_17']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_17']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_17']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_17']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_17']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_17']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_18']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_18']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_18']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_18']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_18']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_18']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_18']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_18']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_19']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_19']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_19']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_19']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_19']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_19']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_19']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_19']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_2']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_2']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_2']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_2']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_2']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_2']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_2']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_2']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_20']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_20']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_20']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_20']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_20']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_20']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_20']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_20']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_21']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_21']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_21']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_21']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_21']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_21']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_21']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_21']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_22']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_22']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_22']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_22']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_22']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_22']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_22']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_22']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_23']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_23']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_23']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_23']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_23']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_23']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_23']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_23']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_3']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_3']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_3']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_3']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_3']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_3']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_3']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_3']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_4']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_4']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_4']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_4']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_4']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_4']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_4']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_4']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_5']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_5']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_5']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_5']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_5']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_5']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_5']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_5']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_6']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_6']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_6']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_6']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_6']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_6']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_6']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_6']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_7']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_7']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_7']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_7']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_7']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_7']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_7']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_7']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_8']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_8']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_8']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_8']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_8']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_8']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_8']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_8']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_9']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_9']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_9']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_9']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_9']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_9']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_9']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_9']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].mu['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_0']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_0']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_0']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_0']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_0']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_0']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_0']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_0']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_0']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_1']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_1']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_1']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_1']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_1']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_1']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_1']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_1']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_1']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_10']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_10']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_10']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_10']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_10']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_10']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_10']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_10']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_10']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_11']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_11']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_11']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_11']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_11']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_11']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_11']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_11']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_11']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_12']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_12']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_12']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_12']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_12']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_12']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_12']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_12']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_12']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_13']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_13']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_13']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_13']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_13']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_13']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_13']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_13']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_13']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_14']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_14']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_14']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_14']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_14']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_14']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_14']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_14']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_14']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_15']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_15']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_15']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_15']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_15']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_15']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_15']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_15']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_15']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_16']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_16']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_16']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_16']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_16']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_16']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_16']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_16']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_16']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_17']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_17']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_17']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_17']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_17']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_17']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_17']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_17']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_17']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_18']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_18']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_18']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_18']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_18']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_18']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_18']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_18']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_18']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_19']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_19']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_19']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_19']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_19']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_19']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_19']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_19']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_19']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_2']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_2']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_2']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_2']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_2']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_2']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_2']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_2']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_2']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_20']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_20']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_20']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_20']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_20']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_20']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_20']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_20']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_20']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_21']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_21']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_21']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_21']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_21']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_21']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_21']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_21']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_21']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_22']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_22']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_22']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_22']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_22']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_22']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_22']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_22']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_22']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_23']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_23']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_23']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_23']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_23']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_23']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_23']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_23']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_23']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_3']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_3']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_3']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_3']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_3']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_3']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_3']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_3']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_3']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_4']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_4']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_4']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_4']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_4']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_4']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_4']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_4']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_4']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_5']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_5']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_5']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_5']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_5']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_5']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_5']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_5']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_5']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_6']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_6']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_6']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_6']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_6']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_6']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_6']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_6']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_6']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_7']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_7']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_7']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_7']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_7']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_7']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_7']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_7']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_7']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_8']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_8']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_8']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_8']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_8']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_8']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_8']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_8']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_8']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_9']['Dense_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_9']['Dense_0']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_9']['Dense_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_9']['Dense_1']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_9']['LayerNorm_0']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_9']['LayerNorm_0']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_9']['LayerNorm_1']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_9']['LayerNorm_1']['scale']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['key']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['key']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['out']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['out']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['query']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['query']['kernel']"}, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['value']['bias']"}, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>> {jax.result_info = "result[0].opt_state[0].nu['_named_computationBlock_9']['MultiHeadDotProductAttention_0']['value']['kernel']"}, !mpmd.mesh_tensor<"m3", tensor<f32>, sharding=<@mesh, []>> {jax.result_info = "result[1]"}) attributes {topology = #mpmd.topology<<"m0" : <"stage"=1, "data"=2>>, <"m1" : <"stage"=1, "data"=2>>, <"m2" : <"stage"=1, "data"=2>>, <"m3" : <"stage"=1, "data"=2>>>} {
    %0 = mpmd.transfer %arg385 : (!mpmd.mesh_tensor<"m0", tensor<i32>, sharding=<@mesh, []>>) -> !mpmd.mesh_tensor<"m2", tensor<i32>, sharding=<@mesh, []>>
    %1 = mpmd.transfer %arg385 : (!mpmd.mesh_tensor<"m0", tensor<i32>, sharding=<@mesh, []>>) -> !mpmd.mesh_tensor<"m3", tensor<i32>, sharding=<@mesh, []>>
    %2 = mpmd.transfer %arg385 : (!mpmd.mesh_tensor<"m0", tensor<i32>, sharding=<@mesh, []>>) -> !mpmd.mesh_tensor<"m1", tensor<i32>, sharding=<@mesh, []>>
    %3:6 = mpmd.fragment_call<mesh="m0", origin=["block_0", "block_1", "block_2", "block_3", "block_4", "block_5"]> @"p0_block_0:5_fwd_calls0to3.mpmd_train_step"(%arg1154, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg23, %arg24, %arg25, %arg26, %arg27, %arg28, %arg29, %arg30, %arg31, %arg32, %arg193, %arg194, %arg195, %arg196, %arg197, %arg198, %arg199, %arg200, %arg201, %arg202, %arg203, %arg204, %arg205, %arg206, %arg207, %arg208, %arg273, %arg274, %arg275, %arg276, %arg277, %arg278, %arg279, %arg280, %arg281, %arg282, %arg283, %arg284, %arg285, %arg286, %arg287, %arg288, %arg289, %arg290, %arg291, %arg292, %arg293, %arg294, %arg295, %arg296, %arg297, %arg298, %arg299, %arg300, %arg301, %arg302, %arg303, %arg304, %arg305, %arg306, %arg307, %arg308, %arg309, %arg310, %arg311, %arg312, %arg313, %arg314, %arg315, %arg316, %arg317, %arg318, %arg319, %arg320) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %4 = mpmd.transfer %3#5 : (!mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %5:6 = mpmd.fragment_call<mesh="m1", origin=["block_6", "block_7", "block_8", "block_9", "block_10", "block_11"]> @"p1_block_6:11_fwd_calls0to3.mpmd_train_step"(%4, %arg321, %arg322, %arg323, %arg324, %arg325, %arg326, %arg327, %arg328, %arg329, %arg330, %arg331, %arg332, %arg333, %arg334, %arg335, %arg336, %arg337, %arg338, %arg339, %arg340, %arg341, %arg342, %arg343, %arg344, %arg345, %arg346, %arg347, %arg348, %arg349, %arg350, %arg351, %arg352, %arg353, %arg354, %arg355, %arg356, %arg357, %arg358, %arg359, %arg360, %arg361, %arg362, %arg363, %arg364, %arg365, %arg366, %arg367, %arg368, %arg369, %arg370, %arg371, %arg372, %arg373, %arg374, %arg375, %arg376, %arg377, %arg378, %arg379, %arg380, %arg381, %arg382, %arg383, %arg384, %arg33, %arg34, %arg35, %arg36, %arg37, %arg38, %arg39, %arg40, %arg41, %arg42, %arg43, %arg44, %arg45, %arg46, %arg47, %arg48, %arg49, %arg50, %arg51, %arg52, %arg53, %arg54, %arg55, %arg56, %arg57, %arg58, %arg59, %arg60, %arg61, %arg62, %arg63, %arg64) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %6 = mpmd.transfer %5#5 : (!mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %7:6 = mpmd.fragment_call<mesh="m2", origin=["block_12", "block_13", "block_14", "block_15", "block_16", "block_17"]> @"p2_block_12:17_fwd_calls0to3.mpmd_train_step"(%6, %arg65, %arg66, %arg67, %arg68, %arg69, %arg70, %arg71, %arg72, %arg73, %arg74, %arg75, %arg76, %arg77, %arg78, %arg79, %arg80, %arg81, %arg82, %arg83, %arg84, %arg85, %arg86, %arg87, %arg88, %arg89, %arg90, %arg91, %arg92, %arg93, %arg94, %arg95, %arg96, %arg97, %arg98, %arg99, %arg100, %arg101, %arg102, %arg103, %arg104, %arg105, %arg106, %arg107, %arg108, %arg109, %arg110, %arg111, %arg112, %arg113, %arg114, %arg115, %arg116, %arg117, %arg118, %arg119, %arg120, %arg121, %arg122, %arg123, %arg124, %arg125, %arg126, %arg127, %arg128, %arg129, %arg130, %arg131, %arg132, %arg133, %arg134, %arg135, %arg136, %arg137, %arg138, %arg139, %arg140, %arg141, %arg142, %arg143, %arg144, %arg145, %arg146, %arg147, %arg148, %arg149, %arg150, %arg151, %arg152, %arg153, %arg154, %arg155, %arg156, %arg157, %arg158, %arg159, %arg160) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %8 = mpmd.transfer %7#5 : (!mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %9:98 = mpmd.fragment_call<mesh="m3", origin=["block_18", "block_19", "block_20", "block_21", "block_22", "block_23", "block_23"(1), "block_22"(1), "block_21"(1), "block_20"(1), "block_19"(1), "block_18"(1)]> @"p3_block_18:23_fwd_bwd_call0.mpmd_train_step"(%8, %arg161, %arg162, %arg163, %arg164, %arg165, %arg166, %arg167, %arg168, %arg169, %arg170, %arg171, %arg172, %arg173, %arg174, %arg175, %arg176, %arg177, %arg178, %arg179, %arg180, %arg181, %arg182, %arg183, %arg184, %arg185, %arg186, %arg187, %arg188, %arg189, %arg190, %arg191, %arg192, %arg209, %arg210, %arg211, %arg212, %arg213, %arg214, %arg215, %arg216, %arg217, %arg218, %arg219, %arg220, %arg221, %arg222, %arg223, %arg224, %arg225, %arg226, %arg227, %arg228, %arg229, %arg230, %arg231, %arg232, %arg233, %arg234, %arg235, %arg236, %arg237, %arg238, %arg239, %arg240, %arg241, %arg242, %arg243, %arg244, %arg245, %arg246, %arg247, %arg248, %arg249, %arg250, %arg251, %arg252, %arg253, %arg254, %arg255, %arg256, %arg257, %arg258, %arg259, %arg260, %arg261, %arg262, %arg263, %arg264, %arg265, %arg266, %arg267, %arg268, %arg269, %arg270, %arg271, %arg272, %arg1158) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m3", tensor<f32>, sharding=<@mesh, []>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %10 = mpmd.transfer %9#81 : (!mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %11:6 = mpmd.fragment_call<mesh="m0", origin=["block_0", "block_1", "block_2", "block_3", "block_4", "block_5"]> @"p0_block_0:5_fwd_calls0to3.mpmd_train_step"(%arg1155, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg23, %arg24, %arg25, %arg26, %arg27, %arg28, %arg29, %arg30, %arg31, %arg32, %arg193, %arg194, %arg195, %arg196, %arg197, %arg198, %arg199, %arg200, %arg201, %arg202, %arg203, %arg204, %arg205, %arg206, %arg207, %arg208, %arg273, %arg274, %arg275, %arg276, %arg277, %arg278, %arg279, %arg280, %arg281, %arg282, %arg283, %arg284, %arg285, %arg286, %arg287, %arg288, %arg289, %arg290, %arg291, %arg292, %arg293, %arg294, %arg295, %arg296, %arg297, %arg298, %arg299, %arg300, %arg301, %arg302, %arg303, %arg304, %arg305, %arg306, %arg307, %arg308, %arg309, %arg310, %arg311, %arg312, %arg313, %arg314, %arg315, %arg316, %arg317, %arg318, %arg319, %arg320) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %12 = mpmd.transfer %11#5 : (!mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %13:6 = mpmd.fragment_call<mesh="m1", origin=["block_6", "block_7", "block_8", "block_9", "block_10", "block_11"]> @"p1_block_6:11_fwd_calls0to3.mpmd_train_step"(%12, %arg321, %arg322, %arg323, %arg324, %arg325, %arg326, %arg327, %arg328, %arg329, %arg330, %arg331, %arg332, %arg333, %arg334, %arg335, %arg336, %arg337, %arg338, %arg339, %arg340, %arg341, %arg342, %arg343, %arg344, %arg345, %arg346, %arg347, %arg348, %arg349, %arg350, %arg351, %arg352, %arg353, %arg354, %arg355, %arg356, %arg357, %arg358, %arg359, %arg360, %arg361, %arg362, %arg363, %arg364, %arg365, %arg366, %arg367, %arg368, %arg369, %arg370, %arg371, %arg372, %arg373, %arg374, %arg375, %arg376, %arg377, %arg378, %arg379, %arg380, %arg381, %arg382, %arg383, %arg384, %arg33, %arg34, %arg35, %arg36, %arg37, %arg38, %arg39, %arg40, %arg41, %arg42, %arg43, %arg44, %arg45, %arg46, %arg47, %arg48, %arg49, %arg50, %arg51, %arg52, %arg53, %arg54, %arg55, %arg56, %arg57, %arg58, %arg59, %arg60, %arg61, %arg62, %arg63, %arg64) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %14 = mpmd.transfer %13#5 : (!mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %15:6 = mpmd.fragment_call<mesh="m2", origin=["block_12", "block_13", "block_14", "block_15", "block_16", "block_17"]> @"p2_block_12:17_fwd_calls0to3.mpmd_train_step"(%14, %arg65, %arg66, %arg67, %arg68, %arg69, %arg70, %arg71, %arg72, %arg73, %arg74, %arg75, %arg76, %arg77, %arg78, %arg79, %arg80, %arg81, %arg82, %arg83, %arg84, %arg85, %arg86, %arg87, %arg88, %arg89, %arg90, %arg91, %arg92, %arg93, %arg94, %arg95, %arg96, %arg97, %arg98, %arg99, %arg100, %arg101, %arg102, %arg103, %arg104, %arg105, %arg106, %arg107, %arg108, %arg109, %arg110, %arg111, %arg112, %arg113, %arg114, %arg115, %arg116, %arg117, %arg118, %arg119, %arg120, %arg121, %arg122, %arg123, %arg124, %arg125, %arg126, %arg127, %arg128, %arg129, %arg130, %arg131, %arg132, %arg133, %arg134, %arg135, %arg136, %arg137, %arg138, %arg139, %arg140, %arg141, %arg142, %arg143, %arg144, %arg145, %arg146, %arg147, %arg148, %arg149, %arg150, %arg151, %arg152, %arg153, %arg154, %arg155, %arg156, %arg157, %arg158, %arg159, %arg160) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %16 = mpmd.transfer %15#5 : (!mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %17:98 = mpmd.fragment_call<mesh="m3", origin=["block_18", "block_19", "block_20", "block_21", "block_22", "block_23", "block_23"(1), "block_22"(1), "block_21"(1), "block_20"(1), "block_19"(1), "block_18"(1)]> @"p4_block_18:23_fwd_bwd_calls1to2.mpmd_train_step"(%16, %arg161, %arg162, %arg163, %arg164, %arg165, %arg166, %arg167, %arg168, %arg169, %arg170, %arg171, %arg172, %arg173, %arg174, %arg175, %arg176, %arg177, %arg178, %arg179, %arg180, %arg181, %arg182, %arg183, %arg184, %arg185, %arg186, %arg187, %arg188, %arg189, %arg190, %arg191, %arg192, %arg209, %arg210, %arg211, %arg212, %arg213, %arg214, %arg215, %arg216, %arg217, %arg218, %arg219, %arg220, %arg221, %arg222, %arg223, %arg224, %arg225, %arg226, %arg227, %arg228, %arg229, %arg230, %arg231, %arg232, %arg233, %arg234, %arg235, %arg236, %arg237, %arg238, %arg239, %arg240, %arg241, %arg242, %arg243, %arg244, %arg245, %arg246, %arg247, %arg248, %arg249, %arg250, %arg251, %arg252, %arg253, %arg254, %arg255, %arg256, %arg257, %arg258, %arg259, %arg260, %arg261, %arg262, %arg263, %arg264, %arg265, %arg266, %arg267, %arg268, %arg269, %arg270, %arg271, %arg272, %arg1158, %9#0, %9#1, %9#2, %9#3, %9#4, %9#5, %9#6, %9#7, %9#8, %9#9, %9#10, %9#11, %9#12, %9#13, %9#14, %9#15, %9#16, %9#17, %9#18, %9#19, %9#20, %9#21, %9#22, %9#23, %9#24, %9#25, %9#26, %9#27, %9#28, %9#29, %9#30, %9#31, %9#32, %9#33, %9#34, %9#35, %9#36, %9#37, %9#38, %9#39, %9#40, %9#41, %9#42, %9#43, %9#44, %9#45, %9#46, %9#47, %9#48, %9#49, %9#50, %9#51, %9#52, %9#53, %9#54, %9#55, %9#56, %9#57, %9#58, %9#59, %9#60, %9#61, %9#62, %9#63, %9#64, %9#65, %9#66, %9#67, %9#68, %9#69, %9#70, %9#71, %9#72, %9#73, %9#74, %9#75, %9#76, %9#77, %9#78, %9#79, %9#80, %9#82, %9#83, %9#84, %9#85, %9#86, %9#87, %9#88, %9#89, %9#90, %9#91, %9#92, %9#93, %9#94, %9#95, %9#96, %9#97) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<f32>, sharding=<@mesh, []>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m3", tensor<f32>, sharding=<@mesh, []>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %18 = mpmd.transfer %17#81 : (!mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %19:6 = mpmd.fragment_call<mesh="m0", origin=["block_0", "block_1", "block_2", "block_3", "block_4", "block_5"]> @"p0_block_0:5_fwd_calls0to3.mpmd_train_step"(%arg1156, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg23, %arg24, %arg25, %arg26, %arg27, %arg28, %arg29, %arg30, %arg31, %arg32, %arg193, %arg194, %arg195, %arg196, %arg197, %arg198, %arg199, %arg200, %arg201, %arg202, %arg203, %arg204, %arg205, %arg206, %arg207, %arg208, %arg273, %arg274, %arg275, %arg276, %arg277, %arg278, %arg279, %arg280, %arg281, %arg282, %arg283, %arg284, %arg285, %arg286, %arg287, %arg288, %arg289, %arg290, %arg291, %arg292, %arg293, %arg294, %arg295, %arg296, %arg297, %arg298, %arg299, %arg300, %arg301, %arg302, %arg303, %arg304, %arg305, %arg306, %arg307, %arg308, %arg309, %arg310, %arg311, %arg312, %arg313, %arg314, %arg315, %arg316, %arg317, %arg318, %arg319, %arg320) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %20 = mpmd.transfer %19#5 : (!mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %21:6 = mpmd.fragment_call<mesh="m1", origin=["block_6", "block_7", "block_8", "block_9", "block_10", "block_11"]> @"p1_block_6:11_fwd_calls0to3.mpmd_train_step"(%20, %arg321, %arg322, %arg323, %arg324, %arg325, %arg326, %arg327, %arg328, %arg329, %arg330, %arg331, %arg332, %arg333, %arg334, %arg335, %arg336, %arg337, %arg338, %arg339, %arg340, %arg341, %arg342, %arg343, %arg344, %arg345, %arg346, %arg347, %arg348, %arg349, %arg350, %arg351, %arg352, %arg353, %arg354, %arg355, %arg356, %arg357, %arg358, %arg359, %arg360, %arg361, %arg362, %arg363, %arg364, %arg365, %arg366, %arg367, %arg368, %arg369, %arg370, %arg371, %arg372, %arg373, %arg374, %arg375, %arg376, %arg377, %arg378, %arg379, %arg380, %arg381, %arg382, %arg383, %arg384, %arg33, %arg34, %arg35, %arg36, %arg37, %arg38, %arg39, %arg40, %arg41, %arg42, %arg43, %arg44, %arg45, %arg46, %arg47, %arg48, %arg49, %arg50, %arg51, %arg52, %arg53, %arg54, %arg55, %arg56, %arg57, %arg58, %arg59, %arg60, %arg61, %arg62, %arg63, %arg64) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %22 = mpmd.transfer %21#5 : (!mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %23:6 = mpmd.fragment_call<mesh="m0", origin=["block_0", "block_1", "block_2", "block_3", "block_4", "block_5"]> @"p0_block_0:5_fwd_calls0to3.mpmd_train_step"(%arg1157, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg7, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg23, %arg24, %arg25, %arg26, %arg27, %arg28, %arg29, %arg30, %arg31, %arg32, %arg193, %arg194, %arg195, %arg196, %arg197, %arg198, %arg199, %arg200, %arg201, %arg202, %arg203, %arg204, %arg205, %arg206, %arg207, %arg208, %arg273, %arg274, %arg275, %arg276, %arg277, %arg278, %arg279, %arg280, %arg281, %arg282, %arg283, %arg284, %arg285, %arg286, %arg287, %arg288, %arg289, %arg290, %arg291, %arg292, %arg293, %arg294, %arg295, %arg296, %arg297, %arg298, %arg299, %arg300, %arg301, %arg302, %arg303, %arg304, %arg305, %arg306, %arg307, %arg308, %arg309, %arg310, %arg311, %arg312, %arg313, %arg314, %arg315, %arg316, %arg317, %arg318, %arg319, %arg320) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %24 = mpmd.transfer %23#5 : (!mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %25:97 = mpmd.fragment_call<mesh="m2", origin=["block_17"(1), "block_16"(1), "block_15"(1), "block_14"(1), "block_13"(1), "block_12"(1)]> @"p5_block_17:12_bwd_call0.mpmd_train_step"(%arg145, %arg146, %arg147, %arg148, %arg149, %arg150, %arg152, %arg153, %arg154, %arg155, %arg156, %arg157, %arg158, %arg159, %arg160, %7#4, %10, %arg129, %arg130, %arg131, %arg132, %arg133, %arg134, %arg136, %arg137, %arg138, %arg139, %arg140, %arg141, %arg142, %arg143, %arg144, %7#3, %arg113, %arg114, %arg115, %arg116, %arg117, %arg118, %arg120, %arg121, %arg122, %arg123, %arg124, %arg125, %arg126, %arg127, %arg128, %7#2, %arg97, %arg98, %arg99, %arg100, %arg101, %arg102, %arg104, %arg105, %arg106, %arg107, %arg108, %arg109, %arg110, %arg111, %arg112, %7#1, %arg81, %arg82, %arg83, %arg84, %arg85, %arg86, %arg88, %arg89, %arg90, %arg91, %arg92, %arg93, %arg94, %arg95, %arg96, %7#0, %arg65, %arg66, %arg67, %arg68, %arg69, %arg70, %arg72, %arg73, %arg74, %arg75, %arg76, %arg77, %arg78, %arg79, %arg80, %6) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %26 = mpmd.transfer %25#80 : (!mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %27:97 = mpmd.fragment_call<mesh="m1", origin=["block_11"(1), "block_10"(1), "block_9"(1), "block_8"(1), "block_7"(1), "block_6"(1)]> @"p6_block_11:6_bwd_call0.mpmd_train_step"(%arg49, %arg50, %arg51, %arg52, %arg53, %arg54, %arg56, %arg57, %arg58, %arg59, %arg60, %arg61, %arg62, %arg63, %arg64, %5#4, %26, %arg33, %arg34, %arg35, %arg36, %arg37, %arg38, %arg40, %arg41, %arg42, %arg43, %arg44, %arg45, %arg46, %arg47, %arg48, %5#3, %arg369, %arg370, %arg371, %arg372, %arg373, %arg374, %arg376, %arg377, %arg378, %arg379, %arg380, %arg381, %arg382, %arg383, %arg384, %5#2, %arg353, %arg354, %arg355, %arg356, %arg357, %arg358, %arg360, %arg361, %arg362, %arg363, %arg364, %arg365, %arg366, %arg367, %arg368, %5#1, %arg337, %arg338, %arg339, %arg340, %arg341, %arg342, %arg344, %arg345, %arg346, %arg347, %arg348, %arg349, %arg350, %arg351, %arg352, %5#0, %arg321, %arg322, %arg323, %arg324, %arg325, %arg326, %arg328, %arg329, %arg330, %arg331, %arg332, %arg333, %arg334, %arg335, %arg336, %4) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %28 = mpmd.transfer %27#80 : (!mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %29:96 = mpmd.fragment_call<mesh="m0", origin=["block_5"(1), "block_4"(1), "block_3"(1), "block_2"(1), "block_1"(1), "block_0"(1)]> @"p7_block_5:0_bwd_call0.mpmd_train_step"(%arg305, %arg306, %arg307, %arg308, %arg309, %arg310, %arg312, %arg313, %arg314, %arg315, %arg316, %arg317, %arg318, %arg319, %arg320, %3#4, %28, %arg289, %arg290, %arg291, %arg292, %arg293, %arg294, %arg296, %arg297, %arg298, %arg299, %arg300, %arg301, %arg302, %arg303, %arg304, %3#3, %arg273, %arg274, %arg275, %arg276, %arg277, %arg278, %arg280, %arg281, %arg282, %arg283, %arg284, %arg285, %arg286, %arg287, %arg288, %3#2, %arg193, %arg194, %arg195, %arg196, %arg197, %arg198, %arg200, %arg201, %arg202, %arg203, %arg204, %arg205, %arg206, %arg207, %arg208, %3#1, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg24, %arg25, %arg26, %arg27, %arg28, %arg29, %arg30, %arg31, %arg32, %3#0, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg1154) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %30:6 = mpmd.fragment_call<mesh="m2", origin=["block_12", "block_13", "block_14", "block_15", "block_16", "block_17"]> @"p2_block_12:17_fwd_calls0to3.mpmd_train_step"(%22, %arg65, %arg66, %arg67, %arg68, %arg69, %arg70, %arg71, %arg72, %arg73, %arg74, %arg75, %arg76, %arg77, %arg78, %arg79, %arg80, %arg81, %arg82, %arg83, %arg84, %arg85, %arg86, %arg87, %arg88, %arg89, %arg90, %arg91, %arg92, %arg93, %arg94, %arg95, %arg96, %arg97, %arg98, %arg99, %arg100, %arg101, %arg102, %arg103, %arg104, %arg105, %arg106, %arg107, %arg108, %arg109, %arg110, %arg111, %arg112, %arg113, %arg114, %arg115, %arg116, %arg117, %arg118, %arg119, %arg120, %arg121, %arg122, %arg123, %arg124, %arg125, %arg126, %arg127, %arg128, %arg129, %arg130, %arg131, %arg132, %arg133, %arg134, %arg135, %arg136, %arg137, %arg138, %arg139, %arg140, %arg141, %arg142, %arg143, %arg144, %arg145, %arg146, %arg147, %arg148, %arg149, %arg150, %arg151, %arg152, %arg153, %arg154, %arg155, %arg156, %arg157, %arg158, %arg159, %arg160) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %31 = mpmd.transfer %30#5 : (!mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %32:98 = mpmd.fragment_call<mesh="m3", origin=["block_18", "block_19", "block_20", "block_21", "block_22", "block_23", "block_23"(1), "block_22"(1), "block_21"(1), "block_20"(1), "block_19"(1), "block_18"(1)]> @"p4_block_18:23_fwd_bwd_calls1to2.mpmd_train_step"(%31, %arg161, %arg162, %arg163, %arg164, %arg165, %arg166, %arg167, %arg168, %arg169, %arg170, %arg171, %arg172, %arg173, %arg174, %arg175, %arg176, %arg177, %arg178, %arg179, %arg180, %arg181, %arg182, %arg183, %arg184, %arg185, %arg186, %arg187, %arg188, %arg189, %arg190, %arg191, %arg192, %arg209, %arg210, %arg211, %arg212, %arg213, %arg214, %arg215, %arg216, %arg217, %arg218, %arg219, %arg220, %arg221, %arg222, %arg223, %arg224, %arg225, %arg226, %arg227, %arg228, %arg229, %arg230, %arg231, %arg232, %arg233, %arg234, %arg235, %arg236, %arg237, %arg238, %arg239, %arg240, %arg241, %arg242, %arg243, %arg244, %arg245, %arg246, %arg247, %arg248, %arg249, %arg250, %arg251, %arg252, %arg253, %arg254, %arg255, %arg256, %arg257, %arg258, %arg259, %arg260, %arg261, %arg262, %arg263, %arg264, %arg265, %arg266, %arg267, %arg268, %arg269, %arg270, %arg271, %arg272, %arg1158, %17#0, %17#1, %17#2, %17#3, %17#4, %17#5, %17#6, %17#7, %17#8, %17#9, %17#10, %17#11, %17#12, %17#13, %17#14, %17#15, %17#16, %17#17, %17#18, %17#19, %17#20, %17#21, %17#22, %17#23, %17#24, %17#25, %17#26, %17#27, %17#28, %17#29, %17#30, %17#31, %17#32, %17#33, %17#34, %17#35, %17#36, %17#37, %17#38, %17#39, %17#40, %17#41, %17#42, %17#43, %17#44, %17#45, %17#46, %17#47, %17#48, %17#49, %17#50, %17#51, %17#52, %17#53, %17#54, %17#55, %17#56, %17#57, %17#58, %17#59, %17#60, %17#61, %17#62, %17#63, %17#64, %17#65, %17#66, %17#67, %17#68, %17#69, %17#70, %17#71, %17#72, %17#73, %17#74, %17#75, %17#76, %17#77, %17#78, %17#79, %17#80, %17#82, %17#83, %17#84, %17#85, %17#86, %17#87, %17#88, %17#89, %17#90, %17#91, %17#92, %17#93, %17#94, %17#95, %17#96, %17#97) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<f32>, sharding=<@mesh, []>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m3", tensor<f32>, sharding=<@mesh, []>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %33 = mpmd.transfer %32#81 : (!mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %34:6 = mpmd.fragment_call<mesh="m1", origin=["block_6", "block_7", "block_8", "block_9", "block_10", "block_11"]> @"p1_block_6:11_fwd_calls0to3.mpmd_train_step"(%24, %arg321, %arg322, %arg323, %arg324, %arg325, %arg326, %arg327, %arg328, %arg329, %arg330, %arg331, %arg332, %arg333, %arg334, %arg335, %arg336, %arg337, %arg338, %arg339, %arg340, %arg341, %arg342, %arg343, %arg344, %arg345, %arg346, %arg347, %arg348, %arg349, %arg350, %arg351, %arg352, %arg353, %arg354, %arg355, %arg356, %arg357, %arg358, %arg359, %arg360, %arg361, %arg362, %arg363, %arg364, %arg365, %arg366, %arg367, %arg368, %arg369, %arg370, %arg371, %arg372, %arg373, %arg374, %arg375, %arg376, %arg377, %arg378, %arg379, %arg380, %arg381, %arg382, %arg383, %arg384, %arg33, %arg34, %arg35, %arg36, %arg37, %arg38, %arg39, %arg40, %arg41, %arg42, %arg43, %arg44, %arg45, %arg46, %arg47, %arg48, %arg49, %arg50, %arg51, %arg52, %arg53, %arg54, %arg55, %arg56, %arg57, %arg58, %arg59, %arg60, %arg61, %arg62, %arg63, %arg64) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %35 = mpmd.transfer %34#5 : (!mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %36:97 = mpmd.fragment_call<mesh="m2", origin=["block_17"(1), "block_16"(1), "block_15"(1), "block_14"(1), "block_13"(1), "block_12"(1)]> @"p8_block_17:12_bwd_calls1to2.mpmd_train_step"(%arg145, %arg146, %arg147, %arg148, %arg149, %arg150, %arg152, %arg153, %arg154, %arg155, %arg156, %arg157, %arg158, %arg159, %arg160, %15#4, %18, %25#0, %25#1, %25#2, %25#3, %25#4, %25#5, %25#6, %25#7, %25#8, %25#9, %25#10, %25#11, %25#12, %25#13, %25#14, %25#15, %arg129, %arg130, %arg131, %arg132, %arg133, %arg134, %arg136, %arg137, %arg138, %arg139, %arg140, %arg141, %arg142, %arg143, %arg144, %15#3, %25#16, %25#17, %25#18, %25#19, %25#20, %25#21, %25#22, %25#23, %25#24, %25#25, %25#26, %25#27, %25#28, %25#29, %25#30, %25#31, %arg113, %arg114, %arg115, %arg116, %arg117, %arg118, %arg120, %arg121, %arg122, %arg123, %arg124, %arg125, %arg126, %arg127, %arg128, %15#2, %25#32, %25#33, %25#34, %25#35, %25#36, %25#37, %25#38, %25#39, %25#40, %25#41, %25#42, %25#43, %25#44, %25#45, %25#46, %25#47, %arg97, %arg98, %arg99, %arg100, %arg101, %arg102, %arg104, %arg105, %arg106, %arg107, %arg108, %arg109, %arg110, %arg111, %arg112, %15#1, %25#48, %25#49, %25#50, %25#51, %25#52, %25#53, %25#54, %25#55, %25#56, %25#57, %25#58, %25#59, %25#60, %25#61, %25#62, %25#63, %arg81, %arg82, %arg83, %arg84, %arg85, %arg86, %arg88, %arg89, %arg90, %arg91, %arg92, %arg93, %arg94, %arg95, %arg96, %15#0, %25#64, %25#65, %25#66, %25#67, %25#68, %25#69, %25#70, %25#71, %25#72, %25#73, %25#74, %25#75, %25#76, %25#77, %25#78, %25#79, %arg65, %arg66, %arg67, %arg68, %arg69, %arg70, %arg72, %arg73, %arg74, %arg75, %arg76, %arg77, %arg78, %arg79, %arg80, %14, %25#81, %25#82, %25#83, %25#84, %25#85, %25#86, %25#87, %25#88, %25#89, %25#90, %25#91, %25#92, %25#93, %25#94, %25#95, %25#96) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %37 = mpmd.transfer %36#80 : (!mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %38:97 = mpmd.fragment_call<mesh="m1", origin=["block_11"(1), "block_10"(1), "block_9"(1), "block_8"(1), "block_7"(1), "block_6"(1)]> @"p9_block_11:6_bwd_calls1to2.mpmd_train_step"(%arg49, %arg50, %arg51, %arg52, %arg53, %arg54, %arg56, %arg57, %arg58, %arg59, %arg60, %arg61, %arg62, %arg63, %arg64, %13#4, %37, %27#0, %27#1, %27#2, %27#3, %27#4, %27#5, %27#6, %27#7, %27#8, %27#9, %27#10, %27#11, %27#12, %27#13, %27#14, %27#15, %arg33, %arg34, %arg35, %arg36, %arg37, %arg38, %arg40, %arg41, %arg42, %arg43, %arg44, %arg45, %arg46, %arg47, %arg48, %13#3, %27#16, %27#17, %27#18, %27#19, %27#20, %27#21, %27#22, %27#23, %27#24, %27#25, %27#26, %27#27, %27#28, %27#29, %27#30, %27#31, %arg369, %arg370, %arg371, %arg372, %arg373, %arg374, %arg376, %arg377, %arg378, %arg379, %arg380, %arg381, %arg382, %arg383, %arg384, %13#2, %27#32, %27#33, %27#34, %27#35, %27#36, %27#37, %27#38, %27#39, %27#40, %27#41, %27#42, %27#43, %27#44, %27#45, %27#46, %27#47, %arg353, %arg354, %arg355, %arg356, %arg357, %arg358, %arg360, %arg361, %arg362, %arg363, %arg364, %arg365, %arg366, %arg367, %arg368, %13#1, %27#48, %27#49, %27#50, %27#51, %27#52, %27#53, %27#54, %27#55, %27#56, %27#57, %27#58, %27#59, %27#60, %27#61, %27#62, %27#63, %arg337, %arg338, %arg339, %arg340, %arg341, %arg342, %arg344, %arg345, %arg346, %arg347, %arg348, %arg349, %arg350, %arg351, %arg352, %13#0, %27#64, %27#65, %27#66, %27#67, %27#68, %27#69, %27#70, %27#71, %27#72, %27#73, %27#74, %27#75, %27#76, %27#77, %27#78, %27#79, %arg321, %arg322, %arg323, %arg324, %arg325, %arg326, %arg328, %arg329, %arg330, %arg331, %arg332, %arg333, %arg334, %arg335, %arg336, %12, %27#81, %27#82, %27#83, %27#84, %27#85, %27#86, %27#87, %27#88, %27#89, %27#90, %27#91, %27#92, %27#93, %27#94, %27#95, %27#96) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %39 = mpmd.transfer %38#80 : (!mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %40:96 = mpmd.fragment_call<mesh="m0", origin=["block_5"(1), "block_4"(1), "block_3"(1), "block_2"(1), "block_1"(1), "block_0"(1)]> @"p10_block_5:0_bwd_calls1to2.mpmd_train_step"(%arg305, %arg306, %arg307, %arg308, %arg309, %arg310, %arg312, %arg313, %arg314, %arg315, %arg316, %arg317, %arg318, %arg319, %arg320, %11#4, %39, %29#0, %29#1, %29#2, %29#3, %29#4, %29#5, %29#6, %29#7, %29#8, %29#9, %29#10, %29#11, %29#12, %29#13, %29#14, %29#15, %arg289, %arg290, %arg291, %arg292, %arg293, %arg294, %arg296, %arg297, %arg298, %arg299, %arg300, %arg301, %arg302, %arg303, %arg304, %11#3, %29#16, %29#17, %29#18, %29#19, %29#20, %29#21, %29#22, %29#23, %29#24, %29#25, %29#26, %29#27, %29#28, %29#29, %29#30, %29#31, %arg273, %arg274, %arg275, %arg276, %arg277, %arg278, %arg280, %arg281, %arg282, %arg283, %arg284, %arg285, %arg286, %arg287, %arg288, %11#2, %29#32, %29#33, %29#34, %29#35, %29#36, %29#37, %29#38, %29#39, %29#40, %29#41, %29#42, %29#43, %29#44, %29#45, %29#46, %29#47, %arg193, %arg194, %arg195, %arg196, %arg197, %arg198, %arg200, %arg201, %arg202, %arg203, %arg204, %arg205, %arg206, %arg207, %arg208, %11#1, %29#48, %29#49, %29#50, %29#51, %29#52, %29#53, %29#54, %29#55, %29#56, %29#57, %29#58, %29#59, %29#60, %29#61, %29#62, %29#63, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg24, %arg25, %arg26, %arg27, %arg28, %arg29, %arg30, %arg31, %arg32, %11#0, %29#64, %29#65, %29#66, %29#67, %29#68, %29#69, %29#70, %29#71, %29#72, %29#73, %29#74, %29#75, %29#76, %29#77, %29#78, %29#79, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg1155, %29#80, %29#81, %29#82, %29#83, %29#84, %29#85, %29#86, %29#87, %29#88, %29#89, %29#90, %29#91, %29#92, %29#93, %29#94, %29#95) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %41:6 = mpmd.fragment_call<mesh="m2", origin=["block_12", "block_13", "block_14", "block_15", "block_16", "block_17"]> @"p2_block_12:17_fwd_calls0to3.mpmd_train_step"(%35, %arg65, %arg66, %arg67, %arg68, %arg69, %arg70, %arg71, %arg72, %arg73, %arg74, %arg75, %arg76, %arg77, %arg78, %arg79, %arg80, %arg81, %arg82, %arg83, %arg84, %arg85, %arg86, %arg87, %arg88, %arg89, %arg90, %arg91, %arg92, %arg93, %arg94, %arg95, %arg96, %arg97, %arg98, %arg99, %arg100, %arg101, %arg102, %arg103, %arg104, %arg105, %arg106, %arg107, %arg108, %arg109, %arg110, %arg111, %arg112, %arg113, %arg114, %arg115, %arg116, %arg117, %arg118, %arg119, %arg120, %arg121, %arg122, %arg123, %arg124, %arg125, %arg126, %arg127, %arg128, %arg129, %arg130, %arg131, %arg132, %arg133, %arg134, %arg135, %arg136, %arg137, %arg138, %arg139, %arg140, %arg141, %arg142, %arg143, %arg144, %arg145, %arg146, %arg147, %arg148, %arg149, %arg150, %arg151, %arg152, %arg153, %arg154, %arg155, %arg156, %arg157, %arg158, %arg159, %arg160) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %42 = mpmd.transfer %41#5 : (!mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %43:290 = mpmd.fragment_call<mesh="m3", origin=["block_18", "block_19", "block_20", "block_21", "block_22", "block_23", "block_23"(1), "block_22"(1), "block_21"(1), "block_20"(1), "block_19"(1), "block_18"(1)]> @"p11_block_18:23_fwd_bwd_call3.mpmd_train_step"(%42, %arg161, %arg162, %arg163, %arg164, %arg165, %arg166, %arg167, %arg168, %arg169, %arg170, %arg171, %arg172, %arg173, %arg174, %arg175, %arg176, %arg177, %arg178, %arg179, %arg180, %arg181, %arg182, %arg183, %arg184, %arg185, %arg186, %arg187, %arg188, %arg189, %arg190, %arg191, %arg192, %arg209, %arg210, %arg211, %arg212, %arg213, %arg214, %arg215, %arg216, %arg217, %arg218, %arg219, %arg220, %arg221, %arg222, %arg223, %arg224, %arg225, %arg226, %arg227, %arg228, %arg229, %arg230, %arg231, %arg232, %arg233, %arg234, %arg235, %arg236, %arg237, %arg238, %arg239, %arg240, %arg241, %arg242, %arg243, %arg244, %arg245, %arg246, %arg247, %arg248, %arg249, %arg250, %arg251, %arg252, %arg253, %arg254, %arg255, %arg256, %arg257, %arg258, %arg259, %arg260, %arg261, %arg262, %arg263, %arg264, %arg265, %arg266, %arg267, %arg268, %arg269, %arg270, %arg271, %arg272, %arg1158, %32#0, %32#1, %32#2, %32#3, %32#4, %32#5, %32#6, %32#7, %32#8, %32#9, %32#10, %32#11, %32#12, %32#13, %32#14, %32#15, %32#16, %32#17, %32#18, %32#19, %32#20, %32#21, %32#22, %32#23, %32#24, %32#25, %32#26, %32#27, %32#28, %32#29, %32#30, %32#31, %32#32, %32#33, %32#34, %32#35, %32#36, %32#37, %32#38, %32#39, %32#40, %32#41, %32#42, %32#43, %32#44, %32#45, %32#46, %32#47, %32#48, %32#49, %32#50, %32#51, %32#52, %32#53, %32#54, %32#55, %32#56, %32#57, %32#58, %32#59, %32#60, %32#61, %32#62, %32#63, %32#64, %32#65, %32#66, %32#67, %32#68, %32#69, %32#70, %32#71, %32#72, %32#73, %32#74, %32#75, %32#76, %32#77, %32#78, %32#79, %32#80, %32#82, %32#83, %32#84, %32#85, %32#86, %32#87, %32#88, %32#89, %32#90, %32#91, %32#92, %32#93, %32#94, %32#95, %32#96, %32#97, %arg546, %arg547, %arg548, %arg549, %arg550, %arg551, %arg552, %arg553, %arg554, %arg555, %arg556, %arg557, %arg558, %arg559, %arg560, %arg561, %arg562, %arg563, %arg564, %arg565, %arg566, %arg567, %arg568, %arg569, %arg570, %arg571, %arg572, %arg573, %arg574, %arg575, %arg576, %arg577, %arg594, %arg595, %arg596, %arg597, %arg598, %arg599, %arg600, %arg601, %arg602, %arg603, %arg604, %arg605, %arg606, %arg607, %arg608, %arg609, %arg610, %arg611, %arg612, %arg613, %arg614, %arg615, %arg616, %arg617, %arg618, %arg619, %arg620, %arg621, %arg622, %arg623, %arg624, %arg625, %arg626, %arg627, %arg628, %arg629, %arg630, %arg631, %arg632, %arg633, %arg634, %arg635, %arg636, %arg637, %arg638, %arg639, %arg640, %arg641, %arg642, %arg643, %arg644, %arg645, %arg646, %arg647, %arg648, %arg649, %arg650, %arg651, %arg652, %arg653, %arg654, %arg655, %arg656, %arg657, %arg930, %arg931, %arg932, %arg933, %arg934, %arg935, %arg936, %arg937, %arg938, %arg939, %arg940, %arg941, %arg942, %arg943, %arg944, %arg945, %arg946, %arg947, %arg948, %arg949, %arg950, %arg951, %arg952, %arg953, %arg954, %arg955, %arg956, %arg957, %arg958, %arg959, %arg960, %arg961, %arg978, %arg979, %arg980, %arg981, %arg982, %arg983, %arg984, %arg985, %arg986, %arg987, %arg988, %arg989, %arg990, %arg991, %arg992, %arg993, %arg994, %arg995, %arg996, %arg997, %arg998, %arg999, %arg1000, %arg1001, %arg1002, %arg1003, %arg1004, %arg1005, %arg1006, %arg1007, %arg1008, %arg1009, %arg1010, %arg1011, %arg1012, %arg1013, %arg1014, %arg1015, %arg1016, %arg1017, %arg1018, %arg1019, %arg1020, %arg1021, %arg1022, %arg1023, %arg1024, %arg1025, %arg1026, %arg1027, %arg1028, %arg1029, %arg1030, %arg1031, %arg1032, %arg1033, %arg1034, %arg1035, %arg1036, %arg1037, %arg1038, %arg1039, %arg1040, %arg1041, %1) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<f32>, sharding=<@mesh, []>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<i32>, sharding=<@mesh, []>>) -> (!mpmd.mesh_tensor<"m3", tensor<f32>, sharding=<@mesh, []>>, !mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %44 = mpmd.transfer %43#1 : (!mpmd.mesh_tensor<"m3", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %45:97 = mpmd.fragment_call<mesh="m2", origin=["block_17"(1), "block_16"(1), "block_15"(1), "block_14"(1), "block_13"(1), "block_12"(1)]> @"p8_block_17:12_bwd_calls1to2.mpmd_train_step"(%arg145, %arg146, %arg147, %arg148, %arg149, %arg150, %arg152, %arg153, %arg154, %arg155, %arg156, %arg157, %arg158, %arg159, %arg160, %30#4, %33, %36#0, %36#1, %36#2, %36#3, %36#4, %36#5, %36#6, %36#7, %36#8, %36#9, %36#10, %36#11, %36#12, %36#13, %36#14, %36#15, %arg129, %arg130, %arg131, %arg132, %arg133, %arg134, %arg136, %arg137, %arg138, %arg139, %arg140, %arg141, %arg142, %arg143, %arg144, %30#3, %36#16, %36#17, %36#18, %36#19, %36#20, %36#21, %36#22, %36#23, %36#24, %36#25, %36#26, %36#27, %36#28, %36#29, %36#30, %36#31, %arg113, %arg114, %arg115, %arg116, %arg117, %arg118, %arg120, %arg121, %arg122, %arg123, %arg124, %arg125, %arg126, %arg127, %arg128, %30#2, %36#32, %36#33, %36#34, %36#35, %36#36, %36#37, %36#38, %36#39, %36#40, %36#41, %36#42, %36#43, %36#44, %36#45, %36#46, %36#47, %arg97, %arg98, %arg99, %arg100, %arg101, %arg102, %arg104, %arg105, %arg106, %arg107, %arg108, %arg109, %arg110, %arg111, %arg112, %30#1, %36#48, %36#49, %36#50, %36#51, %36#52, %36#53, %36#54, %36#55, %36#56, %36#57, %36#58, %36#59, %36#60, %36#61, %36#62, %36#63, %arg81, %arg82, %arg83, %arg84, %arg85, %arg86, %arg88, %arg89, %arg90, %arg91, %arg92, %arg93, %arg94, %arg95, %arg96, %30#0, %36#64, %36#65, %36#66, %36#67, %36#68, %36#69, %36#70, %36#71, %36#72, %36#73, %36#74, %36#75, %36#76, %36#77, %36#78, %36#79, %arg65, %arg66, %arg67, %arg68, %arg69, %arg70, %arg72, %arg73, %arg74, %arg75, %arg76, %arg77, %arg78, %arg79, %arg80, %22, %36#81, %36#82, %36#83, %36#84, %36#85, %36#86, %36#87, %36#88, %36#89, %36#90, %36#91, %36#92, %36#93, %36#94, %36#95, %36#96) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %46 = mpmd.transfer %45#80 : (!mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %47:97 = mpmd.fragment_call<mesh="m1", origin=["block_11"(1), "block_10"(1), "block_9"(1), "block_8"(1), "block_7"(1), "block_6"(1)]> @"p9_block_11:6_bwd_calls1to2.mpmd_train_step"(%arg49, %arg50, %arg51, %arg52, %arg53, %arg54, %arg56, %arg57, %arg58, %arg59, %arg60, %arg61, %arg62, %arg63, %arg64, %21#4, %46, %38#0, %38#1, %38#2, %38#3, %38#4, %38#5, %38#6, %38#7, %38#8, %38#9, %38#10, %38#11, %38#12, %38#13, %38#14, %38#15, %arg33, %arg34, %arg35, %arg36, %arg37, %arg38, %arg40, %arg41, %arg42, %arg43, %arg44, %arg45, %arg46, %arg47, %arg48, %21#3, %38#16, %38#17, %38#18, %38#19, %38#20, %38#21, %38#22, %38#23, %38#24, %38#25, %38#26, %38#27, %38#28, %38#29, %38#30, %38#31, %arg369, %arg370, %arg371, %arg372, %arg373, %arg374, %arg376, %arg377, %arg378, %arg379, %arg380, %arg381, %arg382, %arg383, %arg384, %21#2, %38#32, %38#33, %38#34, %38#35, %38#36, %38#37, %38#38, %38#39, %38#40, %38#41, %38#42, %38#43, %38#44, %38#45, %38#46, %38#47, %arg353, %arg354, %arg355, %arg356, %arg357, %arg358, %arg360, %arg361, %arg362, %arg363, %arg364, %arg365, %arg366, %arg367, %arg368, %21#1, %38#48, %38#49, %38#50, %38#51, %38#52, %38#53, %38#54, %38#55, %38#56, %38#57, %38#58, %38#59, %38#60, %38#61, %38#62, %38#63, %arg337, %arg338, %arg339, %arg340, %arg341, %arg342, %arg344, %arg345, %arg346, %arg347, %arg348, %arg349, %arg350, %arg351, %arg352, %21#0, %38#64, %38#65, %38#66, %38#67, %38#68, %38#69, %38#70, %38#71, %38#72, %38#73, %38#74, %38#75, %38#76, %38#77, %38#78, %38#79, %arg321, %arg322, %arg323, %arg324, %arg325, %arg326, %arg328, %arg329, %arg330, %arg331, %arg332, %arg333, %arg334, %arg335, %arg336, %20, %38#81, %38#82, %38#83, %38#84, %38#85, %38#86, %38#87, %38#88, %38#89, %38#90, %38#91, %38#92, %38#93, %38#94, %38#95, %38#96) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %48 = mpmd.transfer %47#80 : (!mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %49:96 = mpmd.fragment_call<mesh="m0", origin=["block_5"(1), "block_4"(1), "block_3"(1), "block_2"(1), "block_1"(1), "block_0"(1)]> @"p10_block_5:0_bwd_calls1to2.mpmd_train_step"(%arg305, %arg306, %arg307, %arg308, %arg309, %arg310, %arg312, %arg313, %arg314, %arg315, %arg316, %arg317, %arg318, %arg319, %arg320, %19#4, %48, %40#0, %40#1, %40#2, %40#3, %40#4, %40#5, %40#6, %40#7, %40#8, %40#9, %40#10, %40#11, %40#12, %40#13, %40#14, %40#15, %arg289, %arg290, %arg291, %arg292, %arg293, %arg294, %arg296, %arg297, %arg298, %arg299, %arg300, %arg301, %arg302, %arg303, %arg304, %19#3, %40#16, %40#17, %40#18, %40#19, %40#20, %40#21, %40#22, %40#23, %40#24, %40#25, %40#26, %40#27, %40#28, %40#29, %40#30, %40#31, %arg273, %arg274, %arg275, %arg276, %arg277, %arg278, %arg280, %arg281, %arg282, %arg283, %arg284, %arg285, %arg286, %arg287, %arg288, %19#2, %40#32, %40#33, %40#34, %40#35, %40#36, %40#37, %40#38, %40#39, %40#40, %40#41, %40#42, %40#43, %40#44, %40#45, %40#46, %40#47, %arg193, %arg194, %arg195, %arg196, %arg197, %arg198, %arg200, %arg201, %arg202, %arg203, %arg204, %arg205, %arg206, %arg207, %arg208, %19#1, %40#48, %40#49, %40#50, %40#51, %40#52, %40#53, %40#54, %40#55, %40#56, %40#57, %40#58, %40#59, %40#60, %40#61, %40#62, %40#63, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg24, %arg25, %arg26, %arg27, %arg28, %arg29, %arg30, %arg31, %arg32, %19#0, %40#64, %40#65, %40#66, %40#67, %40#68, %40#69, %40#70, %40#71, %40#72, %40#73, %40#74, %40#75, %40#76, %40#77, %40#78, %40#79, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg1156, %40#80, %40#81, %40#82, %40#83, %40#84, %40#85, %40#86, %40#87, %40#88, %40#89, %40#90, %40#91, %40#92, %40#93, %40#94, %40#95) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> (!mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %50:289 = mpmd.fragment_call<mesh="m2", origin=["block_17"(1), "block_16"(1), "block_15"(1), "block_14"(1), "block_13"(1), "block_12"(1)]> @"p12_block_17:12_bwd_call3.mpmd_train_step"(%arg145, %arg146, %arg147, %arg148, %arg149, %arg150, %arg152, %arg153, %arg154, %arg155, %arg156, %arg157, %arg158, %arg159, %arg160, %41#4, %44, %45#0, %45#1, %45#2, %45#3, %45#4, %45#5, %45#6, %45#7, %45#8, %45#9, %45#10, %45#11, %45#12, %45#13, %45#14, %45#15, %arg129, %arg130, %arg131, %arg132, %arg133, %arg134, %arg136, %arg137, %arg138, %arg139, %arg140, %arg141, %arg142, %arg143, %arg144, %41#3, %45#16, %45#17, %45#18, %45#19, %45#20, %45#21, %45#22, %45#23, %45#24, %45#25, %45#26, %45#27, %45#28, %45#29, %45#30, %45#31, %arg113, %arg114, %arg115, %arg116, %arg117, %arg118, %arg120, %arg121, %arg122, %arg123, %arg124, %arg125, %arg126, %arg127, %arg128, %41#2, %45#32, %45#33, %45#34, %45#35, %45#36, %45#37, %45#38, %45#39, %45#40, %45#41, %45#42, %45#43, %45#44, %45#45, %45#46, %45#47, %arg97, %arg98, %arg99, %arg100, %arg101, %arg102, %arg104, %arg105, %arg106, %arg107, %arg108, %arg109, %arg110, %arg111, %arg112, %41#1, %45#48, %45#49, %45#50, %45#51, %45#52, %45#53, %45#54, %45#55, %45#56, %45#57, %45#58, %45#59, %45#60, %45#61, %45#62, %45#63, %arg81, %arg82, %arg83, %arg84, %arg85, %arg86, %arg88, %arg89, %arg90, %arg91, %arg92, %arg93, %arg94, %arg95, %arg96, %41#0, %45#64, %45#65, %45#66, %45#67, %45#68, %45#69, %45#70, %45#71, %45#72, %45#73, %45#74, %45#75, %45#76, %45#77, %45#78, %45#79, %arg65, %arg66, %arg67, %arg68, %arg69, %arg70, %arg72, %arg73, %arg74, %arg75, %arg76, %arg77, %arg78, %arg79, %arg80, %35, %45#81, %45#82, %45#83, %45#84, %45#85, %45#86, %45#87, %45#88, %45#89, %45#90, %45#91, %45#92, %45#93, %45#94, %45#95, %45#96, %arg450, %arg451, %arg452, %arg453, %arg454, %arg455, %arg456, %arg457, %arg458, %arg459, %arg460, %arg461, %arg462, %arg463, %arg464, %arg465, %arg466, %arg467, %arg468, %arg469, %arg470, %arg471, %arg472, %arg473, %arg474, %arg475, %arg476, %arg477, %arg478, %arg479, %arg480, %arg481, %arg482, %arg483, %arg484, %arg485, %arg486, %arg487, %arg488, %arg489, %arg490, %arg491, %arg492, %arg493, %arg494, %arg495, %arg496, %arg497, %arg498, %arg499, %arg500, %arg501, %arg502, %arg503, %arg504, %arg505, %arg506, %arg507, %arg508, %arg509, %arg510, %arg511, %arg512, %arg513, %arg514, %arg515, %arg516, %arg517, %arg518, %arg519, %arg520, %arg521, %arg522, %arg523, %arg524, %arg525, %arg526, %arg527, %arg528, %arg529, %arg530, %arg531, %arg532, %arg533, %arg534, %arg535, %arg536, %arg537, %arg538, %arg539, %arg540, %arg541, %arg542, %arg543, %arg544, %arg545, %arg834, %arg835, %arg836, %arg837, %arg838, %arg839, %arg840, %arg841, %arg842, %arg843, %arg844, %arg845, %arg846, %arg847, %arg848, %arg849, %arg850, %arg851, %arg852, %arg853, %arg854, %arg855, %arg856, %arg857, %arg858, %arg859, %arg860, %arg861, %arg862, %arg863, %arg864, %arg865, %arg866, %arg867, %arg868, %arg869, %arg870, %arg871, %arg872, %arg873, %arg874, %arg875, %arg876, %arg877, %arg878, %arg879, %arg880, %arg881, %arg882, %arg883, %arg884, %arg885, %arg886, %arg887, %arg888, %arg889, %arg890, %arg891, %arg892, %arg893, %arg894, %arg895, %arg896, %arg897, %arg898, %arg899, %arg900, %arg901, %arg902, %arg903, %arg904, %arg905, %arg906, %arg907, %arg908, %arg909, %arg910, %arg911, %arg912, %arg913, %arg914, %arg915, %arg916, %arg917, %arg918, %arg919, %arg920, %arg921, %arg922, %arg923, %arg924, %arg925, %arg926, %arg927, %arg928, %arg929, %0, %arg71, %arg87, %arg103, %arg119, %arg135, %arg151) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<i32>, sharding=<@mesh, []>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>) -> (!mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %51 = mpmd.transfer %50#0 : (!mpmd.mesh_tensor<"m2", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %52:289 = mpmd.fragment_call<mesh="m1", origin=["block_11"(1), "block_10"(1), "block_9"(1), "block_8"(1), "block_7"(1), "block_6"(1)]> @"p13_block_11:6_bwd_call3.mpmd_train_step"(%arg49, %arg50, %arg51, %arg52, %arg53, %arg54, %arg56, %arg57, %arg58, %arg59, %arg60, %arg61, %arg62, %arg63, %arg64, %34#4, %51, %47#0, %47#1, %47#2, %47#3, %47#4, %47#5, %47#6, %47#7, %47#8, %47#9, %47#10, %47#11, %47#12, %47#13, %47#14, %47#15, %arg33, %arg34, %arg35, %arg36, %arg37, %arg38, %arg40, %arg41, %arg42, %arg43, %arg44, %arg45, %arg46, %arg47, %arg48, %34#3, %47#16, %47#17, %47#18, %47#19, %47#20, %47#21, %47#22, %47#23, %47#24, %47#25, %47#26, %47#27, %47#28, %47#29, %47#30, %47#31, %arg369, %arg370, %arg371, %arg372, %arg373, %arg374, %arg376, %arg377, %arg378, %arg379, %arg380, %arg381, %arg382, %arg383, %arg384, %34#2, %47#32, %47#33, %47#34, %47#35, %47#36, %47#37, %47#38, %47#39, %47#40, %47#41, %47#42, %47#43, %47#44, %47#45, %47#46, %47#47, %arg353, %arg354, %arg355, %arg356, %arg357, %arg358, %arg360, %arg361, %arg362, %arg363, %arg364, %arg365, %arg366, %arg367, %arg368, %34#1, %47#48, %47#49, %47#50, %47#51, %47#52, %47#53, %47#54, %47#55, %47#56, %47#57, %47#58, %47#59, %47#60, %47#61, %47#62, %47#63, %arg337, %arg338, %arg339, %arg340, %arg341, %arg342, %arg344, %arg345, %arg346, %arg347, %arg348, %arg349, %arg350, %arg351, %arg352, %34#0, %47#64, %47#65, %47#66, %47#67, %47#68, %47#69, %47#70, %47#71, %47#72, %47#73, %47#74, %47#75, %47#76, %47#77, %47#78, %47#79, %arg321, %arg322, %arg323, %arg324, %arg325, %arg326, %arg328, %arg329, %arg330, %arg331, %arg332, %arg333, %arg334, %arg335, %arg336, %24, %47#81, %47#82, %47#83, %47#84, %47#85, %47#86, %47#87, %47#88, %47#89, %47#90, %47#91, %47#92, %47#93, %47#94, %47#95, %47#96, %arg418, %arg419, %arg420, %arg421, %arg422, %arg423, %arg424, %arg425, %arg426, %arg427, %arg428, %arg429, %arg430, %arg431, %arg432, %arg433, %arg434, %arg435, %arg436, %arg437, %arg438, %arg439, %arg440, %arg441, %arg442, %arg443, %arg444, %arg445, %arg446, %arg447, %arg448, %arg449, %arg706, %arg707, %arg708, %arg709, %arg710, %arg711, %arg712, %arg713, %arg714, %arg715, %arg716, %arg717, %arg718, %arg719, %arg720, %arg721, %arg722, %arg723, %arg724, %arg725, %arg726, %arg727, %arg728, %arg729, %arg730, %arg731, %arg732, %arg733, %arg734, %arg735, %arg736, %arg737, %arg738, %arg739, %arg740, %arg741, %arg742, %arg743, %arg744, %arg745, %arg746, %arg747, %arg748, %arg749, %arg750, %arg751, %arg752, %arg753, %arg754, %arg755, %arg756, %arg757, %arg758, %arg759, %arg760, %arg761, %arg762, %arg763, %arg764, %arg765, %arg766, %arg767, %arg768, %arg769, %arg802, %arg803, %arg804, %arg805, %arg806, %arg807, %arg808, %arg809, %arg810, %arg811, %arg812, %arg813, %arg814, %arg815, %arg816, %arg817, %arg818, %arg819, %arg820, %arg821, %arg822, %arg823, %arg824, %arg825, %arg826, %arg827, %arg828, %arg829, %arg830, %arg831, %arg832, %arg833, %arg1090, %arg1091, %arg1092, %arg1093, %arg1094, %arg1095, %arg1096, %arg1097, %arg1098, %arg1099, %arg1100, %arg1101, %arg1102, %arg1103, %arg1104, %arg1105, %arg1106, %arg1107, %arg1108, %arg1109, %arg1110, %arg1111, %arg1112, %arg1113, %arg1114, %arg1115, %arg1116, %arg1117, %arg1118, %arg1119, %arg1120, %arg1121, %arg1122, %arg1123, %arg1124, %arg1125, %arg1126, %arg1127, %arg1128, %arg1129, %arg1130, %arg1131, %arg1132, %arg1133, %arg1134, %arg1135, %arg1136, %arg1137, %arg1138, %arg1139, %arg1140, %arg1141, %arg1142, %arg1143, %arg1144, %arg1145, %arg1146, %arg1147, %arg1148, %arg1149, %arg1150, %arg1151, %arg1152, %arg1153, %2, %arg39, %arg55, %arg327, %arg343, %arg359, %arg375) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<i32>, sharding=<@mesh, []>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>) -> (!mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>)
    %53 = mpmd.transfer %52#0 : (!mpmd.mesh_tensor<"m1", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>) -> !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>
    %54:290 = mpmd.fragment_call<mesh="m0", origin=["block_5"(1), "block_4"(1), "block_3"(1), "block_2"(1), "block_1"(1), "block_0"(1)]> @"p14_block_5:0_bwd_call3.mpmd_train_step"(%arg305, %arg306, %arg307, %arg308, %arg309, %arg310, %arg312, %arg313, %arg314, %arg315, %arg316, %arg317, %arg318, %arg319, %arg320, %23#4, %53, %49#0, %49#1, %49#2, %49#3, %49#4, %49#5, %49#6, %49#7, %49#8, %49#9, %49#10, %49#11, %49#12, %49#13, %49#14, %49#15, %arg289, %arg290, %arg291, %arg292, %arg293, %arg294, %arg296, %arg297, %arg298, %arg299, %arg300, %arg301, %arg302, %arg303, %arg304, %23#3, %49#16, %49#17, %49#18, %49#19, %49#20, %49#21, %49#22, %49#23, %49#24, %49#25, %49#26, %49#27, %49#28, %49#29, %49#30, %49#31, %arg273, %arg274, %arg275, %arg276, %arg277, %arg278, %arg280, %arg281, %arg282, %arg283, %arg284, %arg285, %arg286, %arg287, %arg288, %23#2, %49#32, %49#33, %49#34, %49#35, %49#36, %49#37, %49#38, %49#39, %49#40, %49#41, %49#42, %49#43, %49#44, %49#45, %49#46, %49#47, %arg193, %arg194, %arg195, %arg196, %arg197, %arg198, %arg200, %arg201, %arg202, %arg203, %arg204, %arg205, %arg206, %arg207, %arg208, %23#1, %49#48, %49#49, %49#50, %49#51, %49#52, %49#53, %49#54, %49#55, %49#56, %49#57, %49#58, %49#59, %49#60, %49#61, %49#62, %49#63, %arg17, %arg18, %arg19, %arg20, %arg21, %arg22, %arg24, %arg25, %arg26, %arg27, %arg28, %arg29, %arg30, %arg31, %arg32, %23#0, %49#64, %49#65, %49#66, %49#67, %49#68, %49#69, %49#70, %49#71, %49#72, %49#73, %49#74, %49#75, %49#76, %49#77, %49#78, %49#79, %arg1, %arg2, %arg3, %arg4, %arg5, %arg6, %arg8, %arg9, %arg10, %arg11, %arg12, %arg13, %arg14, %arg15, %arg16, %arg1157, %49#80, %49#81, %49#82, %49#83, %49#84, %49#85, %49#86, %49#87, %49#88, %49#89, %49#90, %49#91, %49#92, %49#93, %49#94, %49#95, %arg386, %arg387, %arg388, %arg389, %arg390, %arg391, %arg392, %arg393, %arg394, %arg395, %arg396, %arg397, %arg398, %arg399, %arg400, %arg401, %arg402, %arg403, %arg404, %arg405, %arg406, %arg407, %arg408, %arg409, %arg410, %arg411, %arg412, %arg413, %arg414, %arg415, %arg416, %arg417, %arg578, %arg579, %arg580, %arg581, %arg582, %arg583, %arg584, %arg585, %arg586, %arg587, %arg588, %arg589, %arg590, %arg591, %arg592, %arg593, %arg658, %arg659, %arg660, %arg661, %arg662, %arg663, %arg664, %arg665, %arg666, %arg667, %arg668, %arg669, %arg670, %arg671, %arg672, %arg673, %arg674, %arg675, %arg676, %arg677, %arg678, %arg679, %arg680, %arg681, %arg682, %arg683, %arg684, %arg685, %arg686, %arg687, %arg688, %arg689, %arg690, %arg691, %arg692, %arg693, %arg694, %arg695, %arg696, %arg697, %arg698, %arg699, %arg700, %arg701, %arg702, %arg703, %arg704, %arg705, %arg770, %arg771, %arg772, %arg773, %arg774, %arg775, %arg776, %arg777, %arg778, %arg779, %arg780, %arg781, %arg782, %arg783, %arg784, %arg785, %arg786, %arg787, %arg788, %arg789, %arg790, %arg791, %arg792, %arg793, %arg794, %arg795, %arg796, %arg797, %arg798, %arg799, %arg800, %arg801, %arg962, %arg963, %arg964, %arg965, %arg966, %arg967, %arg968, %arg969, %arg970, %arg971, %arg972, %arg973, %arg974, %arg975, %arg976, %arg977, %arg1042, %arg1043, %arg1044, %arg1045, %arg1046, %arg1047, %arg1048, %arg1049, %arg1050, %arg1051, %arg1052, %arg1053, %arg1054, %arg1055, %arg1056, %arg1057, %arg1058, %arg1059, %arg1060, %arg1061, %arg1062, %arg1063, %arg1064, %arg1065, %arg1066, %arg1067, %arg1068, %arg1069, %arg1070, %arg1071, %arg1072, %arg1073, %arg1074, %arg1075, %arg1076, %arg1077, %arg1078, %arg1079, %arg1080, %arg1081, %arg1082, %arg1083, %arg1084, %arg1085, %arg1086, %arg1087, %arg1088, %arg1089, %arg385, %arg7, %arg23, %arg199, %arg279, %arg295, %arg311, %arg0) {mpmd.is_sdy_partitioned} : (!mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<2x8192x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<i32>, sharding=<@mesh, []>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<i32>, sharding=<@mesh, []>>) -> (!mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<i32>, sharding=<@mesh, []>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<i32>, sharding=<@mesh, []>>)
    return %54#289, %54#193, %54#199, %54#211, %54#205, %54#212, %54#213, %54#214, %54#215, %54#253, %54#271, %54#216, %54#247, %54#254, %54#272, %54#255, %54#273, %54#194, %54#200, %54#217, %54#206, %54#218, %54#219, %54#220, %54#221, %54#256, %54#274, %54#222, %54#248, %54#257, %54#275, %54#258, %54#276, %52#193, %52#199, %52#211, %52#205, %52#212, %52#213, %52#214, %52#215, %52#253, %52#271, %52#216, %52#247, %52#254, %52#272, %52#255, %52#273, %52#194, %52#200, %52#217, %52#206, %52#218, %52#219, %52#220, %52#221, %52#256, %52#274, %52#222, %52#248, %52#257, %52#275, %52#258, %52#276, %50#193, %50#199, %50#211, %50#205, %50#212, %50#213, %50#214, %50#215, %50#253, %50#271, %50#216, %50#247, %50#254, %50#272, %50#255, %50#273, %50#194, %50#200, %50#217, %50#206, %50#218, %50#219, %50#220, %50#221, %50#256, %50#274, %50#222, %50#248, %50#257, %50#275, %50#258, %50#276, %50#195, %50#201, %50#223, %50#207, %50#224, %50#225, %50#226, %50#227, %50#259, %50#277, %50#228, %50#249, %50#260, %50#278, %50#261, %50#279, %50#196, %50#202, %50#229, %50#208, %50#230, %50#231, %50#232, %50#233, %50#262, %50#280, %50#234, %50#250, %50#263, %50#281, %50#264, %50#282, %50#197, %50#203, %50#235, %50#209, %50#236, %50#237, %50#238, %50#239, %50#265, %50#283, %50#240, %50#251, %50#266, %50#284, %50#267, %50#285, %50#198, %50#204, %50#241, %50#210, %50#242, %50#243, %50#244, %50#245, %50#268, %50#286, %50#246, %50#252, %50#269, %50#287, %50#270, %50#288, %43#194, %43#200, %43#212, %43#206, %43#213, %43#214, %43#215, %43#216, %43#254, %43#272, %43#217, %43#248, %43#255, %43#273, %43#256, %43#274, %43#195, %43#201, %43#218, %43#207, %43#219, %43#220, %43#221, %43#222, %43#257, %43#275, %43#223, %43#249, %43#258, %43#276, %43#259, %43#277, %54#195, %54#201, %54#223, %54#207, %54#224, %54#225, %54#226, %54#227, %54#259, %54#277, %54#228, %54#249, %54#260, %54#278, %54#261, %54#279, %43#196, %43#202, %43#224, %43#208, %43#225, %43#226, %43#227, %43#228, %43#260, %43#278, %43#229, %43#250, %43#261, %43#279, %43#262, %43#280, %43#197, %43#203, %43#230, %43#209, %43#231, %43#232, %43#233, %43#234, %43#263, %43#281, %43#235, %43#251, %43#264, %43#282, %43#265, %43#283, %43#198, %43#204, %43#236, %43#210, %43#237, %43#238, %43#239, %43#240, %43#266, %43#284, %43#241, %43#252, %43#267, %43#285, %43#268, %43#286, %43#199, %43#205, %43#242, %43#211, %43#243, %43#244, %43#245, %43#246, %43#269, %43#287, %43#247, %43#253, %43#270, %43#288, %43#271, %43#289, %54#196, %54#202, %54#229, %54#208, %54#230, %54#231, %54#232, %54#233, %54#262, %54#280, %54#234, %54#250, %54#263, %54#281, %54#264, %54#282, %54#197, %54#203, %54#235, %54#209, %54#236, %54#237, %54#238, %54#239, %54#265, %54#283, %54#240, %54#251, %54#266, %54#284, %54#267, %54#285, %54#198, %54#204, %54#241, %54#210, %54#242, %54#243, %54#244, %54#245, %54#268, %54#286, %54#246, %54#252, %54#269, %54#287, %54#270, %54#288, %52#195, %52#201, %52#223, %52#207, %52#224, %52#225, %52#226, %52#227, %52#259, %52#277, %52#228, %52#249, %52#260, %52#278, %52#261, %52#279, %52#196, %52#202, %52#229, %52#208, %52#230, %52#231, %52#232, %52#233, %52#262, %52#280, %52#234, %52#250, %52#263, %52#281, %52#264, %52#282, %52#197, %52#203, %52#235, %52#209, %52#236, %52#237, %52#238, %52#239, %52#265, %52#283, %52#240, %52#251, %52#266, %52#284, %52#267, %52#285, %52#198, %52#204, %52#241, %52#210, %52#242, %52#243, %52#244, %52#245, %52#268, %52#286, %52#246, %52#252, %52#269, %52#287, %52#270, %52#288, %54#192, %54#0, %54#1, %54#2, %54#3, %54#4, %54#5, %54#6, %54#7, %54#8, %54#9, %54#10, %54#11, %54#12, %54#13, %54#14, %54#15, %54#16, %54#17, %54#18, %54#19, %54#20, %54#21, %54#22, %54#23, %54#24, %54#25, %54#26, %54#27, %54#28, %54#29, %54#30, %54#31, %52#1, %52#2, %52#3, %52#4, %52#5, %52#6, %52#7, %52#8, %52#9, %52#10, %52#11, %52#12, %52#13, %52#14, %52#15, %52#16, %52#17, %52#18, %52#19, %52#20, %52#21, %52#22, %52#23, %52#24, %52#25, %52#26, %52#27, %52#28, %52#29, %52#30, %52#31, %52#32, %50#1, %50#2, %50#3, %50#4, %50#5, %50#6, %50#7, %50#8, %50#9, %50#10, %50#11, %50#12, %50#13, %50#14, %50#15, %50#16, %50#17, %50#18, %50#19, %50#20, %50#21, %50#22, %50#23, %50#24, %50#25, %50#26, %50#27, %50#28, %50#29, %50#30, %50#31, %50#32, %50#33, %50#34, %50#35, %50#36, %50#37, %50#38, %50#39, %50#40, %50#41, %50#42, %50#43, %50#44, %50#45, %50#46, %50#47, %50#48, %50#49, %50#50, %50#51, %50#52, %50#53, %50#54, %50#55, %50#56, %50#57, %50#58, %50#59, %50#60, %50#61, %50#62, %50#63, %50#64, %50#65, %50#66, %50#67, %50#68, %50#69, %50#70, %50#71, %50#72, %50#73, %50#74, %50#75, %50#76, %50#77, %50#78, %50#79, %50#80, %50#81, %50#82, %50#83, %50#84, %50#85, %50#86, %50#87, %50#88, %50#89, %50#90, %50#91, %50#92, %50#93, %50#94, %50#95, %50#96, %43#2, %43#3, %43#4, %43#5, %43#6, %43#7, %43#8, %43#9, %43#10, %43#11, %43#12, %43#13, %43#14, %43#15, %43#16, %43#17, %43#18, %43#19, %43#20, %43#21, %43#22, %43#23, %43#24, %43#25, %43#26, %43#27, %43#28, %43#29, %43#30, %43#31, %43#32, %43#33, %54#32, %54#33, %54#34, %54#35, %54#36, %54#37, %54#38, %54#39, %54#40, %54#41, %54#42, %54#43, %54#44, %54#45, %54#46, %54#47, %43#34, %43#35, %43#36, %43#37, %43#38, %43#39, %43#40, %43#41, %43#42, %43#43, %43#44, %43#45, %43#46, %43#47, %43#48, %43#49, %43#50, %43#51, %43#52, %43#53, %43#54, %43#55, %43#56, %43#57, %43#58, %43#59, %43#60, %43#61, %43#62, %43#63, %43#64, %43#65, %43#66, %43#67, %43#68, %43#69, %43#70, %43#71, %43#72, %43#73, %43#74, %43#75, %43#76, %43#77, %43#78, %43#79, %43#80, %43#81, %43#82, %43#83, %43#84, %43#85, %43#86, %43#87, %43#88, %43#89, %43#90, %43#91, %43#92, %43#93, %43#94, %43#95, %43#96, %43#97, %54#48, %54#49, %54#50, %54#51, %54#52, %54#53, %54#54, %54#55, %54#56, %54#57, %54#58, %54#59, %54#60, %54#61, %54#62, %54#63, %54#64, %54#65, %54#66, %54#67, %54#68, %54#69, %54#70, %54#71, %54#72, %54#73, %54#74, %54#75, %54#76, %54#77, %54#78, %54#79, %54#80, %54#81, %54#82, %54#83, %54#84, %54#85, %54#86, %54#87, %54#88, %54#89, %54#90, %54#91, %54#92, %54#93, %54#94, %54#95, %52#33, %52#34, %52#35, %52#36, %52#37, %52#38, %52#39, %52#40, %52#41, %52#42, %52#43, %52#44, %52#45, %52#46, %52#47, %52#48, %52#49, %52#50, %52#51, %52#52, %52#53, %52#54, %52#55, %52#56, %52#57, %52#58, %52#59, %52#60, %52#61, %52#62, %52#63, %52#64, %52#65, %52#66, %52#67, %52#68, %52#69, %52#70, %52#71, %52#72, %52#73, %52#74, %52#75, %52#76, %52#77, %52#78, %52#79, %52#80, %52#81, %52#82, %52#83, %52#84, %52#85, %52#86, %52#87, %52#88, %52#89, %52#90, %52#91, %52#92, %52#93, %52#94, %52#95, %52#96, %54#96, %54#97, %54#98, %54#99, %54#100, %54#101, %54#102, %54#103, %54#104, %54#105, %54#106, %54#107, %54#108, %54#109, %54#110, %54#111, %54#112, %54#113, %54#114, %54#115, %54#116, %54#117, %54#118, %54#119, %54#120, %54#121, %54#122, %54#123, %54#124, %54#125, %54#126, %54#127, %52#97, %52#98, %52#99, %52#100, %52#101, %52#102, %52#103, %52#104, %52#105, %52#106, %52#107, %52#108, %52#109, %52#110, %52#111, %52#112, %52#113, %52#114, %52#115, %52#116, %52#117, %52#118, %52#119, %52#120, %52#121, %52#122, %52#123, %52#124, %52#125, %52#126, %52#127, %52#128, %50#97, %50#98, %50#99, %50#100, %50#101, %50#102, %50#103, %50#104, %50#105, %50#106, %50#107, %50#108, %50#109, %50#110, %50#111, %50#112, %50#113, %50#114, %50#115, %50#116, %50#117, %50#118, %50#119, %50#120, %50#121, %50#122, %50#123, %50#124, %50#125, %50#126, %50#127, %50#128, %50#129, %50#130, %50#131, %50#132, %50#133, %50#134, %50#135, %50#136, %50#137, %50#138, %50#139, %50#140, %50#141, %50#142, %50#143, %50#144, %50#145, %50#146, %50#147, %50#148, %50#149, %50#150, %50#151, %50#152, %50#153, %50#154, %50#155, %50#156, %50#157, %50#158, %50#159, %50#160, %50#161, %50#162, %50#163, %50#164, %50#165, %50#166, %50#167, %50#168, %50#169, %50#170, %50#171, %50#172, %50#173, %50#174, %50#175, %50#176, %50#177, %50#178, %50#179, %50#180, %50#181, %50#182, %50#183, %50#184, %50#185, %50#186, %50#187, %50#188, %50#189, %50#190, %50#191, %50#192, %43#98, %43#99, %43#100, %43#101, %43#102, %43#103, %43#104, %43#105, %43#106, %43#107, %43#108, %43#109, %43#110, %43#111, %43#112, %43#113, %43#114, %43#115, %43#116, %43#117, %43#118, %43#119, %43#120, %43#121, %43#122, %43#123, %43#124, %43#125, %43#126, %43#127, %43#128, %43#129, %54#128, %54#129, %54#130, %54#131, %54#132, %54#133, %54#134, %54#135, %54#136, %54#137, %54#138, %54#139, %54#140, %54#141, %54#142, %54#143, %43#130, %43#131, %43#132, %43#133, %43#134, %43#135, %43#136, %43#137, %43#138, %43#139, %43#140, %43#141, %43#142, %43#143, %43#144, %43#145, %43#146, %43#147, %43#148, %43#149, %43#150, %43#151, %43#152, %43#153, %43#154, %43#155, %43#156, %43#157, %43#158, %43#159, %43#160, %43#161, %43#162, %43#163, %43#164, %43#165, %43#166, %43#167, %43#168, %43#169, %43#170, %43#171, %43#172, %43#173, %43#174, %43#175, %43#176, %43#177, %43#178, %43#179, %43#180, %43#181, %43#182, %43#183, %43#184, %43#185, %43#186, %43#187, %43#188, %43#189, %43#190, %43#191, %43#192, %43#193, %54#144, %54#145, %54#146, %54#147, %54#148, %54#149, %54#150, %54#151, %54#152, %54#153, %54#154, %54#155, %54#156, %54#157, %54#158, %54#159, %54#160, %54#161, %54#162, %54#163, %54#164, %54#165, %54#166, %54#167, %54#168, %54#169, %54#170, %54#171, %54#172, %54#173, %54#174, %54#175, %54#176, %54#177, %54#178, %54#179, %54#180, %54#181, %54#182, %54#183, %54#184, %54#185, %54#186, %54#187, %54#188, %54#189, %54#190, %54#191, %52#129, %52#130, %52#131, %52#132, %52#133, %52#134, %52#135, %52#136, %52#137, %52#138, %52#139, %52#140, %52#141, %52#142, %52#143, %52#144, %52#145, %52#146, %52#147, %52#148, %52#149, %52#150, %52#151, %52#152, %52#153, %52#154, %52#155, %52#156, %52#157, %52#158, %52#159, %52#160, %52#161, %52#162, %52#163, %52#164, %52#165, %52#166, %52#167, %52#168, %52#169, %52#170, %52#171, %52#172, %52#173, %52#174, %52#175, %52#176, %52#177, %52#178, %52#179, %52#180, %52#181, %52#182, %52#183, %52#184, %52#185, %52#186, %52#187, %52#188, %52#189, %52#190, %52#191, %52#192, %43#0 : !mpmd.mesh_tensor<"m0", tensor<i32>, sharding=<@mesh, []>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<i32>, sharding=<@mesh, []>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m2", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m0", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<4096xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x4096xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<4096x1024xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024xf32>, sharding=<@mesh, [{"data"}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2x1024xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<8x2xf32>, sharding=<@mesh, [{"data"}, {}]>>, !mpmd.mesh_tensor<"m1", tensor<1024x8x2xf32>, sharding=<@mesh, [{"data"}, {}, {}]>>, !mpmd.mesh_tensor<"m3", tensor<f32>, sharding=<@mesh, []>>
  }
 ```