# 1. Visualize the dataset

In [2]:
import numpy as np
data = np.load('/dli/task/bootcamp/data/kf/KFvorticity_Re100_N50_T500.npy')

%matplotlib inline
import matplotlib.pyplot as plt

FileNotFoundError: [Errno 2] No such file or directory: '/dli/task/bootcamp/data/kf/KFvorticity_Re100_N50_T500.npy'

In [None]:
data.shape # 50 trajectories, each trajectory has 501 time frames, each time frame is represented as a 64 × 64 image

In [None]:
data[0].shape

In [None]:
fig = plt.figure(figsize=(7, 7))
ax = fig.add_subplot(2, 2, 1)
ax.imshow(data[0][0])
ax.set_title('1st trajectory: 1st time frame')
ax = fig.add_subplot(2, 2, 2)
ax.imshow(data[0][1])
ax.set_title('1st trajectory: 2nd time frame')
ax = fig.add_subplot(2, 2, 3)
ax.imshow(data[0][2])
ax.set_title('1st trajectory: 3rd time frame')
ax = fig.add_subplot(2, 2, 4)
ax.imshow(data[0][3])
ax.set_title('1st trajectory: 4th time frame')
plt.tight_layout()
fig.show()

# 2. Prepare the input and output dataset

In [None]:
import numpy as np
from neuralop.utils import UnitGaussianNormalizer
from neuralop.datasets.tensor_dataset import TensorDataset
from neuralop.datasets.transforms import Normalizer, PositionalEmbedding, MGPTensorDataset
data = np.load('/dli/task/bootcamp/data/kf/KFvorticity_Re100_N50_T500.npy')

ns_input = data[:, 0:500, :, :] # define the input as [50, 0:500, 64, 64]
ns_output = data[:, 1:501, :, :] # define the output one-frame off [50, 1:501, 64, 64]
grid_boundaries=[[0,1],[0,1]]
batch_size=config.data.batch_size
test_batch_size=config.data.test_batch_sizes.pop(0)
num_workers = 2
pin_memory=True
persistent_workers=True
train_resolution=config.data.train_resolution
test_resolutions=config.data.test_resolutions
n_tests=config.data.n_tests

n_train=config.data.n_train
positional_encoding=config.data.positional_encoding
test_batch_sizes=config.data.test_batch_sizes


x_train = torch.zeros(40*500, 64, 64) # training input
for i in range(40):
    for j in range(500):
        x_train[500*i+j, :, :] = torch.Tensor(ns_input[i, j, :, :])
        # mix the trajectory index and the time index to convert the time series into an image-to-image mapping
x_train = x_train.unsqueeze(1).clone()

y_train = torch.zeros(40*500, 64, 64) # training output
for i in range(40):
    for j in range(500):
        y_train[500*i+j, :, :] = torch.Tensor(ns_output[i, j, :, :])
        # mix the trajectory index and the time index to convert the time series into an image-to-image mapping
y_train = y_train.unsqueeze(1).clone()

x_test = torch.zeros(10*500, 64, 64) # testing input
for i in range(40, 50):
    for j in range(500):
        x_test[500*(i-40)+j, :, :] = torch.Tensor(ns_input[i, j, :, :])
        # mix the trajectory index and the time index to convert the time series into an image-to-image mapping
x_test = x_test.unsqueeze(1).clone()

y_test = torch.zeros(10*500, 64, 64) # testing output
for i in range(40, 50):
    for j in range(500):
        y_test[500*(i-40)+j, :, :] = torch.Tensor(ns_output[i, j, :, :])
        # mix the trajectory index and the time index to convert the time series into an image-to-image mapping
y_test = y_test.unsqueeze(1).clone()

'''
# Original data shape is (50, 501, 64, 64)
# ns_input = ns_output shape is (50, 500, 64, 64),
# then convert the time series into an image-to-image mapping of (50*500, 64, 64)
# here, x_train.shape = y_train.shape = torch.Size([20000, 1, 64, 64]),
# x_test.shape = y_test.shape = torch.Size([5000, 1, 64, 64])
'''

reduce_dims = list(range(x_train.ndim))
input_encoder = UnitGaussianNormalizer(x_train, reduce_dim=reduce_dims)
x_train = input_encoder.encode(x_train)
x_test = input_encoder.encode(x_test.contiguous())

reduce_dims = list(range(y_train.ndim))
output_encoder = UnitGaussianNormalizer(y_train, reduce_dim=reduce_dims)
y_train = output_encoder.encode(y_train)

train_db = TensorDataset(x_train, y_train, transform_x=PositionalEmbedding(grid_boundaries, 0))
train_loader = torch.utils.data.DataLoader(train_db,
                                           batch_size=batch_size, shuffle=True,
                                           num_workers=num_workers, pin_memory=pin_memory, persistent_workers=persistent_workers)

test_db = TensorDataset(x_test, y_test,transform_x=PositionalEmbedding(grid_boundaries, 0))
test_loader = torch.utils.data.DataLoader(test_db,
                                          batch_size=test_batch_size, shuffle=False,
                                          num_workers=num_workers, pin_memory=pin_memory, persistent_workers=persistent_workers)

test_loaders =  {train_resolution: test_loader}

'''
# 经过DataLoader后x_train, x_test均被拆为多个torch.Size([64, 3, 64, 64])
# 经过Encoder处理后归一化，此后plot需要decode
'''

# 4. Visualize the output

## Ground truth

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
for sample in test_loader:
    x, y = sample['x'], sample['y']
    x_recurrent = x[:,-3:]
    for i in range(4):
        out = model(x_recurrent[:,-3:].to(device))
        out = output_encoder.decode(out)
        out = out.cpu()
        y = output_encoder.decode(y.to(device))
        y = y.cpu()
        fig = plt.figure(figsize=(4, 4))
        ax = fig.add_subplot(1, 2, 1)
        ax.imshow(out[0][0].detach().numpy())
        ax.set_title('Model predict')
        ax = fig.add_subplot(1, 2, 2)
        ax.imshow(y[0+i][0].detach().numpy())
        ax.set_title('Ground truth')
        x_recurrent = torch.cat((x_recurrent, out), 1)
    break

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
for sample in test_loader:
    x, y = sample['x'], sample['y']
    out = model(x.to(device))
    out = output_encoder.decode(out)
    out = out.cpu()
    y = output_encoder.decode(y.to(device))
    y = y.cpu()
    for i in range(4):
        fig = plt.figure(figsize=(4, 4))
        ax = fig.add_subplot(1, 2, 1)
        ax.imshow(out[0+i][0].detach().numpy())
        ax.set_title('Model predict')
        ax = fig.add_subplot(1, 2, 2)
        ax.imshow(y[0+i][0].detach().numpy())
        ax.set_title('Ground truth')
    break