In [1]:
import tensorflow as tf
from tensorflow import keras

import numpy as np

import h5py
import os

import rb_equivariant_cnn as conv
import rb_equivariant_gcnn as gconv
import rb_equivariant_se2ncnn as dn_conv

os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

2024-08-13 20:59:02.536214: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-08-13 20:59:02.566170: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Setup

In [2]:
RB_CHANNELS = 4
HORIZONTAL_SIZE = 48
HEIGHT = 32

BATCH_SIZE = 1

SIMULATION_NAME = '96_96_64_10000.0_0.71_0.03_0.3_1000.2'

# Data

In [3]:
sim_file = os.path.join('data', f'{SIMULATION_NAME}.h5')

class generator:
    def __init__(self, filename):
        self.filename = filename

    def __call__(self):
        with h5py.File(self.filename, 'r') as hf:
            for snap in hf['data']:
                yield snap, snap

dataset = tf.data.Dataset.from_generator(
     generator(sim_file),
     output_signature=(
         tf.TensorSpec(shape=(HORIZONTAL_SIZE, HORIZONTAL_SIZE, HEIGHT, RB_CHANNELS), dtype=tf.float64),
         tf.TensorSpec(shape=(HORIZONTAL_SIZE, HORIZONTAL_SIZE, HEIGHT, RB_CHANNELS), dtype=tf.float64)))

# dataset = dataset.shuffle(10, reshuffle_each_iteration=True)
dataset = dataset.batch(BATCH_SIZE, False)

2024-08-13 20:37:44.148520: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.
2024-08-13 20:37:44.150009: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1928] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 76949 MB memory:  -> device: 0, name: NVIDIA A100 80GB PCIe, pci bus id: 0000:01:00.0, compute capability: 8.0


# Data Augmentation

In [4]:

#TODO  Rotate and Flip Vectors
class RandomRot(keras.layers.RandomRotation):
    def call(self, inputs: tf.Tensor, *args, **kwargs) -> tf.Tensor:
        in_shape = tf.shape(inputs)
        inputs = tf.reshape(inputs, in_shape[:-2]+[np.prod(in_shape[-2:])])
        outputs = super().call(inputs, *args, **kwargs)
        return tf.reshape(outputs, in_shape)
    
class RandomFlip(keras.layers.RandomFlip):
    def call(self, inputs: tf.Tensor, *args, **kwargs) -> tf.Tensor:
        in_shape = tf.shape(inputs)
        inputs = tf.reshape(inputs, in_shape[:-2]+[np.prod(in_shape[-2:])])
        outputs = super().call(inputs, *args, **kwargs)
        return tf.reshape(outputs, in_shape)

# 3D Rayleigh-Bénard Convolution
- Equivariant to horizontal translations
- __No vertical parameter sharing__
- Height dependend bias
- Supports horizontal wrap and same padding
    - Wrap makes sense when using peridoc boundary conditions for Rayleigh-Bénard
    - Attention: This may destroy exact rotation equivariance in our experiments (nevertheless WRAP will be preferable in practice)
- Also supports vertical same padding
- Supports stride (including vertical stride)
- Uses 2D convolutions under the hood

In [6]:
model = keras.Sequential([
            keras.layers.InputLayer(shape=(HORIZONTAL_SIZE, HORIZONTAL_SIZE, HEIGHT, RB_CHANNELS),
                                    batch_size=BATCH_SIZE),
            
            # Data Augmentation
            RandomRot(factor=1, fill_mode='wrap', value_range=(0,1)),
            RandomFlip(mode='horizontal_and_vertical'),
            
            conv.RB3D_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(2,2,2), name='Conv1'),
            conv.BatchNorm(name='BatchNorm1'),
            keras.layers.Activation('relu', name='NonLinearity1'),
            
            keras.layers.Dropout(rate=0.2),
            conv.RB3D_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(2,2,2), name='Conv2'),
            conv.BatchNorm(name='BatchNorm2'),
            keras.layers.Activation('relu', name='NonLinearity2'),
            
            conv.SpatialPooling(ksize=(2,2,2), strides=(2,2,2), pooling_type='MAX'),
            
            keras.layers.Dropout(rate=0.2),
            conv.RB3D_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(2,2,2), name='Conv3'),
            conv.BatchNorm(name='BatchNorm3'),
            keras.layers.Activation('relu', name='NonLinearity3'),
            
            keras.layers.Dropout(rate=0.2),
            conv.RB3D_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(2,2,2), name='Conv4'),
            conv.BatchNorm(name='BatchNorm4'),
            keras.layers.Activation('relu', name='NonLinearity4'),
        ])

# output shape: batch_size, width, depth, height, channels
model.summary()

# 3D Rayleigh-Bénard $D_4$ Group Equivariant Convolution
- Equivariant to all symmetries of 3D Rayleigh-Bénard:
    - __90° rotations around a vertical axis__
    - __reflections through a vertical plane__
    - __horizontal translations__

In [7]:
G = 'D4' # 'C4' for rotations or 'D4' for rotations and reflections
model = keras.Sequential([
            keras.layers.InputLayer(shape=(HORIZONTAL_SIZE, HORIZONTAL_SIZE, HEIGHT, RB_CHANNELS),
                                    batch_size=BATCH_SIZE),
            # add transformation dimension
            keras.layers.Reshape((HORIZONTAL_SIZE, HORIZONTAL_SIZE, 1, HEIGHT, RB_CHANNELS)), 
            
            gconv.RB3D_G_Conv('Z2', G, h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(2, 2, 2), 
                              name=f'Lift_{G}_Conv1'),
            gconv.RB3D_G_Conv(G,    G, h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(2, 2, 2), 
                              name=f'{G}_Conv2'),
            gconv.RB3D_G_Conv(G,    G, h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(2, 2, 2), 
                              name=f'{G}_Conv3'),
            gconv.RB3D_G_Conv(G,    G, h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(2, 2, 2), 
                              name=f'{G}_Conv4'),
        ])

# output shape: batch_size, width, depth, transformations, height, channels
model.summary()

# 3D Rayleigh-Bénard $D_N$ Group Equivariant Convolution
- Equivariant to all symmetries of 3D Rayleigh-Bénard:
    - __arbitrary discrete__ rotations around a vertical axis
    - reflections through a vertical plane
    - horizontal translations

In [8]:
ORIENTATIONS = 8

model = keras.Sequential([
            keras.layers.InputLayer(shape=(HORIZONTAL_SIZE, HORIZONTAL_SIZE, HEIGHT, RB_CHANNELS),
                                    batch_size=BATCH_SIZE),
            
            dn_conv.RB3D_LiftDN_Conv(orientations=ORIENTATIONS, h_ksize=5, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP',
                                     v_padding='SAME', strides=(2, 2, 2), name='Lift_DN_Conv1'),
            dn_conv.RB3D_DN_Conv(h_ksize=5, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', 
                                 v_padding='SAME', strides=(2, 2, 2), name='DN_Conv2'),
            dn_conv.RB3D_DN_Conv(h_ksize=5, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', 
                                 v_padding='SAME', strides=(2, 2, 2), name='DN_Conv3'),
            dn_conv.RB3D_DN_Conv(h_ksize=5, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', 
                                 v_padding='SAME', strides=(2, 2, 2), name='DN_Conv4'),
        ])

# output shape: batch_size, width, depth, transformations, height, channels
model.summary()

# Autoencoder

#### Convolutional Autoencoder

In [5]:
model = keras.Sequential([
            keras.layers.InputLayer(shape=(HORIZONTAL_SIZE, HORIZONTAL_SIZE, HEIGHT, RB_CHANNELS),
                                    batch_size=BATCH_SIZE),
            
            ###############
            #   Encoder   #
            ###############
            conv.RB3D_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(1,1,1), name='En_Conv1'),
            conv.SpatialPooling(ksize=(2,2,2), pooling_type='MAX', strides=(2,2,2), padding='VALID', name='Pool1'),
            conv.RB3D_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(1,1,1), name='En_Conv2'),
            conv.SpatialPooling(ksize=(2,2,2), pooling_type='MAX', strides=(2,2,2), padding='VALID', name='Pool2'),
            conv.RB3D_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(1,1,1), name='En_Conv3'),
            conv.SpatialPooling(ksize=(2,2,2), pooling_type='MAX', strides=(2,2,2), padding='VALID', name='Pool3'),
            conv.RB3D_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(1,1,1), name='En_Conv4'),
            conv.SpatialPooling(ksize=(2,2,2), pooling_type='MAX', strides=(2,2,2), padding='VALID', name='Pool4'),
            
            ###############
            #   Decoder   #
            ###############
            conv.UpSampling(size=(2,2,2), name='UpSampling1'),
            conv.RB3D_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(1,1,1), name='De_Conv1'),
            conv.UpSampling(size=(2,2,2), name='UpSampling2'),
            conv.RB3D_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(1,1,1), name='De_Conv2'),
            conv.UpSampling(size=(2,2,2), name='UpSampling3'),
            conv.RB3D_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(1,1,1), name='De_Conv3'),
            conv.UpSampling(size=(2,2,2), name='UpSampling4'),
            conv.RB3D_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(1,1,1), name='De_Conv4'),
        ])

# output shape: batch_size, width, depth, height, channels
model.summary()

In [6]:
model.compile(
    loss=tf.keras.losses.MeanSquaredError, 
    optimizer=keras.optimizers.Adam(learning_rate=0.01),
    metrics=["mse"]
)

hist = model.fit(dataset, epochs=100)

Epoch 1/100


I0000 00:00:1723572595.409978 3706001 service.cc:145] XLA service 0x7f9a2c808170 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1723572595.410061 3706001 service.cc:153]   StreamExecutor device (0): NVIDIA A100 80GB PCIe, Compute Capability 8.0
2024-08-13 20:09:55.570028: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-08-13 20:09:55.904096: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907
2024-08-13 20:09:57.521247: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng0{} for conv (f32[8,256,96,96]{3,2,1,0}, u8[0]{0}) custom-call(f32[8,272,98,98]{3,2,1,0}, f32[256,272,3,3]{3,2,1,0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv_b

      1/Unknown [1m25s[0m 25s/step - loss: 0.6133 - mse: 0.6133

I0000 00:00:1723572606.242888 3706001 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


     29/Unknown [1m313s[0m 10s/step - loss: 126.5626 - mse: 126.5626

KeyboardInterrupt: 

### $D_4$ Group Equivariant Convolutional Autoencoder

In [4]:
G = 'D4' # 'C4' for rotations or 'D4' for rotations and reflections

model = keras.Sequential([
            keras.layers.InputLayer(shape=(HORIZONTAL_SIZE, HORIZONTAL_SIZE, HEIGHT, RB_CHANNELS),
                                    batch_size=BATCH_SIZE),
            
            # add transformation dimension
            keras.layers.Reshape((HORIZONTAL_SIZE, HORIZONTAL_SIZE, 1, HEIGHT, RB_CHANNELS)), 
            
            ###############
            #   Encoder   #
            ###############
            gconv.RB3D_G_Conv('Z2', G, h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(1,1,1), 
                              name=f'En_Lift_{G}_Conv1'),
            gconv.SpatialPooling(ksize=(2,2,2), pooling_type='MAX', strides=(2,2,2), padding='VALID', name='SpatialPool1'),
            gconv.RB3D_G_Conv(G, G, h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(1,1,1), 
                              name=f'En_{G}-Conv2'),
            gconv.SpatialPooling(ksize=(2,2,2), pooling_type='MAX', strides=(2,2,2), padding='VALID', name='SpatialPool2'),
            gconv.RB3D_G_Conv(G, G, h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(1,1,1),
                              name=f'En_{G}-Conv3'),
            gconv.SpatialPooling(ksize=(2,2,2), pooling_type='MAX', strides=(2,2,2), padding='VALID', name='SpatialPool3'),
            gconv.RB3D_G_Conv(G, G, h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(1,1,1),
                              name=f'En_{G}-Conv4'),
            gconv.SpatialPooling(ksize=(2,2,2), pooling_type='MAX', strides=(2,2,2), padding='VALID', name='SpatialPool4'),
            
            ###############
            #   Decoder   #
            ###############
            gconv.UpSampling(size=(2,2,2), name='UpSampling1'),
            gconv.RB3D_G_Conv(G, G, h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(1,1,1), 
                              name=f'De_{G}_Conv1'),
            gconv.UpSampling(size=(2,2,2), name='UpSampling2'),
            gconv.RB3D_G_Conv(G, G, h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(1,1,1), 
                              name=f'De_{G}-Conv2'),
            gconv.UpSampling(size=(2,2,2), name='UpSampling3'),
            gconv.RB3D_G_Conv(G, G, h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(1,1,1),
                              name=f'De_{G}-Conv3'),
            gconv.UpSampling(size=(2,2,2), name='UpSampling4'),
            gconv.RB3D_G_Conv(G, G, h_ksize=3, v_ksize=5, channels=RB_CHANNELS, h_padding='WRAP', v_padding='SAME', strides=(1,1,1),
                              name=f'De_{G}-Conv4'),
            gconv.TransformationPooling(tf.reduce_mean, keepdims=False)
        ])

# output shape: batch_size, width, depth, height, channels
model.summary()

In [6]:
model.compile(
    loss=tf.keras.losses.MeanSquaredError, 
    optimizer=keras.optimizers.Adam(learning_rate=0.01),
    metrics=["mse"]
)

hist = model.fit(dataset, epochs=100)

Epoch 1/100


I0000 00:00:1723574285.984347 3716555 service.cc:145] XLA service 0x7ff3e4106c20 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1723574285.984419 3716555 service.cc:153]   StreamExecutor device (0): NVIDIA A100 80GB PCIe, Compute Capability 8.0
2024-08-13 20:38:06.183460: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-08-13 20:38:06.569298: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907
2024-08-13 20:38:09.318688: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng3{k11=0} for conv (f32[1,2048,96,96]{3,2,1,0}, u8[0]{0}) custom-call(f32[1,272,98,98]{3,2,1,0}, f32[2048,272,3,3]{3,2,1,0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn

     32/Unknown [1m92s[0m 1s/step - loss: 177750895755264.0000 - mse: 177750895755264.0000

### $D_N$ Group Equivariant Convolutional Autoencoder

In [3]:
ORIENTATIONS = 8

model = keras.Sequential([
            keras.layers.InputLayer(shape=(HORIZONTAL_SIZE, HORIZONTAL_SIZE, HEIGHT, RB_CHANNELS),
                                    batch_size=BATCH_SIZE),
            
            ###############
            #   Encoder   #
            ###############
            dn_conv.RB3D_LiftDN_Conv(orientations=ORIENTATIONS, h_ksize=3, v_ksize=5, channels=RB_CHANNELS, 
                                     h_padding='WRAP', v_padding='SAME', strides=(1,1,1), name='En_Lift_DN_Conv1'),
            dn_conv.SpatialPooling(ksize=(2,2,2), pooling_type='MAX', strides=(2,2,2), padding='VALID', name='SpatialPool1'),
            dn_conv.RB3D_DN_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, 
                                 h_padding='WRAP', v_padding='SAME', strides=(1,1,1), name='En_DN-Conv2'),
            dn_conv.SpatialPooling(ksize=(2,2,2), pooling_type='MAX', strides=(2,2,2), padding='VALID', name='SpatialPool2'),
            dn_conv.RB3D_DN_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, 
                                 h_padding='WRAP', v_padding='SAME', strides=(1,1,1), name='En_DN-Conv3'),
            dn_conv.SpatialPooling(ksize=(2,2,2), pooling_type='MAX', strides=(2,2,2), padding='VALID', name='SpatialPool3'),
            dn_conv.RB3D_DN_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, 
                                 h_padding='WRAP', v_padding='SAME', strides=(1,1,1), name='En_DN-Conv4'),
            dn_conv.SpatialPooling(ksize=(2,2,2), pooling_type='MAX', strides=(2,2,2), padding='VALID', name='SpatialPool4'),
            
            ###############
            #   Decoder   #
            ###############
            dn_conv.UpSampling(size=(2,2,2), name='UpSampling1'),
            dn_conv.RB3D_DN_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, 
                                 h_padding='WRAP', v_padding='SAME', strides=(1,1,1), name='De_DN_Conv1'),
            dn_conv.UpSampling(size=(2,2,2), name='UpSampling2'),
            dn_conv.RB3D_DN_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, 
                                 h_padding='WRAP', v_padding='SAME', strides=(1,1,1), name='De_DN-Conv2'),
            dn_conv.UpSampling(size=(2,2,2), name='UpSampling3'),
            dn_conv.RB3D_DN_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, 
                                 h_padding='WRAP', v_padding='SAME', strides=(1,1,1), name='De_DN-Conv3'),
            dn_conv.UpSampling(size=(2,2,2), name='UpSampling4'),
            dn_conv.RB3D_DN_Conv(h_ksize=3, v_ksize=5, channels=RB_CHANNELS, 
                                 h_padding='WRAP', v_padding='SAME', strides=(1,1,1), name='De_DN-Conv4'),
            dn_conv.TransformationPooling(tf.reduce_mean, keepdims=False)
        ])

# output shape: batch_size, width, depth, transformations, height, channels
model.summary()

2024-08-13 20:59:13.948168: W tensorflow/core/common_runtime/gpu/gpu_bfc_allocator.cc:47] Overriding orig_value setting because the TF_FORCE_GPU_ALLOW_GROWTH environment variable is set. Original config value was 0.
2024-08-13 20:59:13.949784: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1928] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 76434 MB memory:  -> device: 0, name: NVIDIA A100 80GB PCIe, pci bus id: 0000:01:00.0, compute capability: 8.0


In [6]:
model.compile(
    loss=tf.keras.losses.MeanSquaredError, 
    optimizer=keras.optimizers.Adam(learning_rate=0.01),
    metrics=["mse"]
)

hist = model.fit(dataset, epochs=100)

Epoch 1/100


I0000 00:00:1723573649.336577 3713413 service.cc:145] XLA service 0x7f42f480fb80 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1723573649.336648 3713413 service.cc:153]   StreamExecutor device (0): NVIDIA A100 80GB PCIe, Compute Capability 8.0
2024-08-13 20:27:29.615343: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-08-13 20:27:30.066592: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:465] Loaded cuDNN version 8907
2024-08-13 20:27:38.522024: E external/local_xla/xla/service/slow_operation_alarm.cc:65] Trying algorithm eng0{} for conv (f32[8,4096,96,96]{3,2,1,0}, u8[0]{0}) custom-call(f32[8,272,98,98]{3,2,1,0}, f32[4096,272,3,3]{3,2,1,0}), window={size=3x3}, dim_labels=bf01_oi01->bf01, custom_call_target="__cudnn$convForward", backend_config={"operation_queue_id":"0","wait_on_operation_queues":[],"cudnn_conv