In [1]:
import numpy as np
import os
from scipy.signal import convolve
import burgers_1d_batch
from phi.tf.flow import *

import tensorflow as tf
from tensorflow.keras import layers, models
import concurrent.futures

import math
import random
import pickle

import json

class DataLoader:
    def __init__(self, simulation_path, k):
        self.simulation_path = simulation_path
        self.k = k
        self.num_simulations, self.num_timesteps = self._count_simulations_and_timesteps()
        self.data = self._load_velocity_data()
        self.batched_data = None
        self.batched_std = None
        

    def _count_simulations_and_timesteps(self):
        # Count the number of simulation directories
        simulation_dirs = [d for d in os.listdir(self.simulation_path) if os.path.isdir(os.path.join(self.simulation_path, d))]
        num_simulations = len(simulation_dirs)

        # Count the number of velocity files in the first simulation directory
        first_sim_dir = os.path.join(self.simulation_path, simulation_dirs[0])
        velocity_files = [f for f in os.listdir(first_sim_dir) if f.startswith('velocity') and f.endswith('.npz')]
        num_timesteps = len(velocity_files)
        print(f"Number of simulations: {num_simulations}, Number of timesteps: {num_timesteps}")
        return num_simulations, num_timesteps
    

    def _load_velocity_data(self):
            def load_data_with_progress(sim):
                result = self._load_simulation_data(sim)
                progress = (sim + 1) / self.num_simulations * 100  # Calculate progress
                print(f"Progress: {progress:.2f}%")
                return result

            with concurrent.futures.ThreadPoolExecutor() as executor:
                data = list(executor.map(load_data_with_progress, range(self.num_simulations)))
            # Filter out empty arrays from each array in data before flattening
            return [element for array in data if array for element in array]                

    def _load_simulation_data(self, sim):
        sim_dir = os.path.join(self.simulation_path, f'sim_{sim:06d}')
        # Load params.json in the simulation directory
        params_file = os.path.join(sim_dir, 'params.json')
        if not os.path.exists(params_file):
            print(f"Params file {params_file} not found, skipping to the next simulation.")
            return []
        
        params_file = json.load(open(params_file))

        A_values = np.array(params_file["A_values"])
        ω_values = np.array(params_file["ω_values"])
        φ_values = np.array(params_file["φ_values"])
        l_values = np.array(params_file["l_values"])
        combined_array = np.column_stack((A_values, ω_values, φ_values, l_values))


        if not os.path.exists(sim_dir):
            print(f"Simulation directory {sim_dir} not found, skipping to the next simulation.")
            return []  # Return empty list for missing simulations
        sim_data = []
        for t in range(self.num_timesteps - self.k-1):
            try:
                velocity_data = np.stack([np.load(os.path.join(sim_dir, f'velocity_{(t + i):06d}.npz'))['data'] for i in range(0, self.k + 1)], axis=0)
                advection_diffusion_data = np.stack([np.load(os.path.join(sim_dir, f'advection_diffusion_{(t + i):06d}.npz'))['data'] for i in range(0, self.k+1)], axis=0)


                sim_data.append((sim, velocity_data, advection_diffusion_data, t, combined_array))

            except FileNotFoundError:
                print(f"File not found for simulation {sim}, timestep {t}, skipping to the next timestep.")


        print(f"Loaded simulation {sim + 1}/{self.num_simulations}")
        return sim_data
    
    def _compute_batch_std(self, batch_data):

        velocity_data_list = []
        advection_diffusion_data_list = []

        for sim, velocity_data, advection_diffusion_data, _ , _ in batch_data:
            velocity_data_list.append(velocity_data)
            advection_diffusion_data_list.append(advection_diffusion_data)

        # Convert lists to numpy arrays
        velocity_data_array = np.array(velocity_data_list)
        advection_diffusion_data_array = np.array(advection_diffusion_data_list)


        velocity_std = np.std(velocity_data_array.flatten())
        advection_diffusion_std = np.std(advection_diffusion_data_array.flatten())

        return velocity_std, advection_diffusion_std
    def prepare_batches(self, batch_size):
        self.batched_data = []
        self.batched_std = []

        random.shuffle(self.data)  # Shuffle the data before creating batches
        total_batches = int(math.ceil(len(self.data) / batch_size))
        
        for batch_idx in range(total_batches):
            start_idx = batch_idx * batch_size
            end_idx = start_idx + batch_size
            batch_data = self.data[start_idx:end_idx]

            
            self.batched_data.append(batch_data)
            self.batched_std.append(self._compute_batch_std(batch_data))

        return self.batched_data
    
    def save_data_as_pickle(self, data, filename):
        with open(filename, 'wb') as file:
            pickle.dump(data, file)

    def load_from_pickle(self, filename):
        with open(filename, 'rb') as file:
            self.data = pickle.load(file)
        return self.data


2024-07-19 16:19:40.575245: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-19 16:19:40.618629: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-19 16:19:40.637587: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-19 16:19:40.637624: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-19 16:19:40.650650: I tensorflow/core/platform/cpu_feature_gua

In [2]:

dataloader = DataLoader('./output', 1)

Number of simulations: 124, Number of timesteps: 2000
File not found for simulation 5, timestep 125, skipping to the next timestep.
File not found for simulation 5, timestep 126, skipping to the next timestep.
File not found for simulation 5, timestep 127, skipping to the next timestep.
File not found for simulation 5, timestep 128, skipping to the next timestep.
File not found for simulation 5, timestep 129, skipping to the next timestep.
File not found for simulation 5, timestep 130, skipping to the next timestep.
File not found for simulation 5, timestep 131, skipping to the next timestep.
File not found for simulation 5, timestep 132, skipping to the next timestep.
File not found for simulation 5, timestep 133, skipping to the next timestep.
File not found for simulation 5, timestep 134, skipping to the next timestep.
File not found for simulation 5, timestep 135, skipping to the next timestep.
File not found for simulation 5, timestep 136, skipping to the next timestep.
File not f

In [3]:
def save_data_as_pickle(data, filename):
    with open(filename, 'wb') as file:
        pickle.dump(data, file)
save_data_as_pickle(dataloader.data, 'data_3.pkl')

In [1]:
import pickle
# Open the pickle file
def load_from_pickle(filename):
    with open(filename, 'rb') as file:
        data = pickle.load(file)
    return data

data = load_from_pickle('data.pkl')


In [2]:
from burgers_1d_batch import Burgers_1d

burgers = Burgers_1d(resolution = 128)

2024-07-19 18:11:30.504278: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-07-19 18:11:30.552388: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:479] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-07-19 18:11:30.571459: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:10575] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-07-19 18:11:30.571482: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1442] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-07-19 18:11:30.584047: I tensorflow/core/platform/cpu_feature_gua

In [11]:

from phi.tf.flow import *

test = [item[1] for item in data[0:10]]
forcing = np.stack([item[4] for item in data[0:10]], axis=0)

print(forcing.shape)

test = test[0]

def to_phiflow_format(data):
    return CenteredGrid(math.tensor(data, spatial('x')), extrapolation.PERIODIC, x=int(128), bounds=Box['x', slice(0, 2 * np.pi)])

# Assuming test[2][1] is a list of numpy arrays or a similar iterable of data that can be converted
centered_grids = [to_phiflow_format(a) for a in test]

print(type(centered_grids))

burgers.generate_forcing_with_params(T = 0, params = forcing)

# Assuming centered_grids is a list of CenteredGrid objects
# Assuming centered_grids is a list of CenteredGrid objects
centered_grid_batch = stack(centered_grids, batch('batch'))

import time 

start = time.time()
burgers.equation_batch(centered_grid_batch, [1 for i in range(6)])
end = time.time()

print(end - start)

(10, 20, 4)
<class 'list'>


2024-07-19 18:21:08.898670: W tensorflow/core/framework/op_kernel.cc:1827] INVALID_ARGUMENT: required broadcastable shapes
2024-07-19 18:21:08.898706: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: INVALID_ARGUMENT: required broadcastable shapes


InvalidArgumentError: {{function_node __wrapped__Mul_device_/job:localhost/replica:0/task:0/device:GPU:0}} required broadcastable shapes [Op:Mul] name: 

In [23]:
print(centered_grid_batch)

CenteredGrid[(batchᵇ=6, xˢ=128), size=[94m(x=6.283)[0m, extrapolation=periodic]


CenteredGrid[(batchᵇ=6, xˢ=128), size=[94m(x=6.283)[0m, extrapolation=periodic]
