# Understanding the Cylinder Flow Dataset

In [32]:
import os
import json
import tensorflow as tf
import functools

In [33]:
root_dir = os.getcwd()

In [34]:
dataset_folder = 'Cylinder_flow/'

In [35]:
#Adopted Functions from Deep mind to load the dataset
def _parse(proto, meta):
  """Parses a trajectory from tf.Example."""
  feature_lists = {k: tf.io.VarLenFeature(tf.string)
                   for k in meta['field_names']}
  features = tf.io.parse_single_example(proto, feature_lists)
  out = {}
  for key, field in meta['features'].items():
    data = tf.io.decode_raw(features[key].values, getattr(tf, field['dtype']))
    data = tf.reshape(data, field['shape'])
    if field['type'] == 'static':
      data = tf.tile(data, [meta['trajectory_length'], 1, 1])
    elif field['type'] == 'dynamic_varlen':
      length = tf.io.decode_raw(features['length_'+key].values, tf.int32)
      length = tf.reshape(length, [-1])
      data = tf.RaggedTensor.from_row_lengths(data, row_lengths=length)
    elif field['type'] != 'dynamic':
      raise ValueError('invalid data format')
    out[key] = data
  return out


def load_dataset(split):
  """Load dataset."""
  with open(os.path.join(root_dir, dataset_folder+'meta.json'), 'r') as fp:
    meta = json.loads(fp.read())
  ds = tf.data.TFRecordDataset(os.path.join(root_dir, dataset_folder+split+'.tfrecord'))
  ds = ds.map(functools.partial(_parse, meta=meta), num_parallel_calls=8)
  ds = ds.prefetch(1)
  return ds

## Lets look at the test dataset 

In [36]:
ds = load_dataset('test')

In [37]:
ds = ds.flat_map(tf.data.Dataset.from_tensor_slices)

In [43]:
# Lets fetch the dataset, and assign it to a variable 'l'
l = list(ds.prefetch(0))


KeyboardInterrupt



We can see that the size of test dataset is 60,000. Each item in the dataset is the data of corresponding timestep. Each Item of the dataset, is a dictionary containing 5 different type of data, specified as keys.They are:

* cells : Shape is N1*3, where N1 is the number of traingular grids and 3 specifies the id of three nodes that are connected.
* mesh_pos: Shape is N*2, where N is the number of nodes involved, and 2 species 2-D position coordinate of the node.
* node_type: Shape is N*1 , specifying type of each node.
* velocity: Shape is N*2, where N is the number of nodes involved, and 2 species 2-D velocity of flow at each node.
* pressure: Shape is N*1, where N is the number of nodes involved, and 2 species 2-D pressure of flow at each node.

In [45]:
#We can see that the size of test dataset is 60,000
print("Size of dataset:",len(l))

Size of dataset: 60000


In [48]:
# Concentrate on Shape of each data item
l[0]

{'cells': <tf.Tensor: shape=(3612, 3), dtype=int32, numpy=
 array([[   0,    1,    2],
        [   3,    4,    5],
        [   6,    7,    0],
        ...,
        [1917, 1915, 1516],
        [1522, 1919, 1520],
        [1919, 1917, 1520]])>,
 'mesh_pos': <tf.Tensor: shape=(1923, 2), dtype=float32, numpy=
 array([[0.        , 0.39398578],
        [0.01234996, 0.39554158],
        [0.        , 0.40217   ],
        ...,
        [1.5816092 , 0.41      ],
        [1.6       , 0.40646887],
        [1.6       , 0.41      ]], dtype=float32)>,
 'node_type': <tf.Tensor: shape=(1923, 1), dtype=int32, numpy=
 array([[4],
        [0],
        [4],
        ...,
        [6],
        [5],
        [6]])>,
 'velocity': <tf.Tensor: shape=(1923, 2), dtype=float32, numpy=
 array([[0.33411723, 0.        ],
        [0.33384922, 0.23244236],
        [0.16675676, 0.        ],
        ...,
        [0.        , 0.        ],
        [1.1650559 , 0.00757535],
        [0.        , 0.        ]], dtype=float32)>,
 '

In [49]:
# Now lets look at data of two consequetive datasets

In [56]:
l[0]['cells'] , l[600]['cells']

(<tf.Tensor: shape=(3612, 3), dtype=int32, numpy=
 array([[   0,    1,    2],
        [   3,    4,    5],
        [   6,    7,    0],
        ...,
        [1917, 1915, 1516],
        [1522, 1919, 1520],
        [1919, 1917, 1520]])>,
 <tf.Tensor: shape=(3276, 3), dtype=int32, numpy=
 array([[   0,    1,    2],
        [   3,    4,    5],
        [   6,    0,    7],
        ...,
        [1751, 1749, 1342],
        [1348, 1753, 1346],
        [1753, 1751, 1346]])>)

In [57]:
# We can see that the correspoding position data of two consequetive timesteps are only slightly different
l[0]['mesh_pos'] , l[1]['mesh_pos']

(<tf.Tensor: shape=(1923, 2), dtype=float32, numpy=
 array([[0.        , 0.39398578],
        [0.01234996, 0.39554158],
        [0.        , 0.40217   ],
        ...,
        [1.5816092 , 0.41      ],
        [1.6       , 0.40646887],
        [1.6       , 0.41      ]], dtype=float32)>,
 <tf.Tensor: shape=(1923, 2), dtype=float32, numpy=
 array([[0.        , 0.39398578],
        [0.01234996, 0.39554158],
        [0.        , 0.40217   ],
        ...,
        [1.5816092 , 0.41      ],
        [1.6       , 0.40646887],
        [1.6       , 0.41      ]], dtype=float32)>)