In [1]:
from copy import deepcopy
import yaml
from pathlib import Path
import torch
import shutil
import matplotlib.pyplot as plt

%reload_ext autoreload
%autoreload 2

# Cut boxes


In [5]:
dataset_name = "plume_extension_training_vary_perm"
use_all_boxes = True
len_box = 128
number_datapoints = 10000


with open("paths.yaml", "r") as paths:
    paths = yaml.safe_load(paths)
    prepared1_dir = Path(paths["datasets_prepared_dir"]) / dataset_name

## Cut dataset into x boxes

In [6]:
prepared_pieces_dir = Path(paths["datasets_prepared_dir"]) / f"{dataset_name}_separate_boxes_len_{len_box}"
prepared_pieces_dir.mkdir(parents=True, exist_ok=True)
shutil.copy(prepared1_dir / "info.yaml", prepared_pieces_dir / "info.yaml")
info = yaml.safe_load(open(prepared_pieces_dir / "info.yaml", "r"))
orig_len = info["CellsNumber"][0]
number_boxes = orig_len // len_box
for box in range(number_boxes):
    (prepared_pieces_dir / f"Inputs Box {box}").mkdir(parents=True, exist_ok=True)
    (prepared_pieces_dir / f"Label Box {box}").mkdir(parents=True, exist_ok=True)

j = 0
for datapoint in zip((prepared1_dir / "Inputs").iterdir(), (prepared1_dir / "Labels").iterdir()):
    input = torch.load(datapoint[0])
    label = torch.load(datapoint[1])
    name = datapoint[0].stem

    if label.shape[2] == 128:
        lower = 32
        upper = 96
    elif label.shape[2] == 64:
        lower = 0
        upper = 64


    input_boxes = []
    label_boxes = []
    number_boxes = label.shape[1] // len_box
    for i in range(number_boxes):
        input_boxes.append(input[:, i * len_box : (i + 1) * len_box, lower:upper])
        label_boxes.append(label[:, i * len_box : (i + 1) * len_box, lower:upper])


    for i in range(number_boxes):
        torch.save(input_boxes[i], prepared_pieces_dir / f"Inputs Box {i}" / f"{name}.pt",)
        torch.save(label_boxes[i], prepared_pieces_dir / f"Label Box {i}" / f"{name}.pt",)
    
    j+=1
    if j == number_datapoints:
        break

## Store boxes for 2 levels in 2 datasets

In [7]:
# prepare 2nd level
prepared_dir_2ndlevel = Path(paths["datasets_prepared_dir"]) / f"{dataset_name}_len_{len_box}"
prepared_dir_2ndlevel.mkdir(parents=True, exist_ok=True)
(prepared_dir_2ndlevel / "Inputs").mkdir(parents=True, exist_ok=True)
(prepared_dir_2ndlevel / "Labels").mkdir(parents=True, exist_ok=True)

info = yaml.safe_load(open(prepared_pieces_dir / "info.yaml", "r"))
info_k    = deepcopy(info["Inputs"]["Permeability X [m^2]"])
info_T    = deepcopy(info["Labels"]["Temperature [C]"])
info["Inputs"]["Temperature [C]"] = info_T
info["Inputs"]["Permeability X [m^2]"] = info_k
info["Inputs"]["Pressure Gradient [-]"]["index"] = 1
info["Inputs"]["Permeability X [m^2]"]["index"] = 2
info["Inputs"]["Temperature [C]"]["index"] = 0
# assert indices of inputs double
idx_k = info["Inputs"]["Permeability X [m^2]"]["index"]
idx_t = info["Inputs"]["Temperature [C]"]["index"]
idx_g = info["Inputs"]["Pressure Gradient [-]"]["index"]
assert  idx_k != idx_t, "indices of inputs double"
info["Inputs"].pop("Material ID")
info["Inputs"].pop("SDF")
info["CellsNumber"][1] = 64
print(info["Inputs"])

yaml.safe_dump(info, open(prepared_dir_2ndlevel / "info.yaml", "w"))

start_at_box = 2 if len_box == 64 else 1
for box in range(start_at_box, number_boxes-1):
    for file_in_temp in (prepared_pieces_dir / f"Label Box {box}").iterdir():
        file_id = int(file_in_temp.stem.split("_")[1])
        new_id = file_id + (box) * 1000
        temp_in = torch.load(file_in_temp)[0]
        press_in = torch.load(file_in_temp)[1]
        file_inputs = prepared_pieces_dir / f"Inputs Box {box}" / f"RUN_{file_id}.pt"
        file_label = prepared_pieces_dir / f"Label Box {box+1}" / f"RUN_{file_id}.pt"
        p_in = torch.load(file_in_temp)[1]
        k_in = torch.load(file_inputs)[1]
        g_in = torch.load(file_inputs)[0]
        inputs = torch.zeros([3, *p_in.shape])
        inputs[idx_k] = k_in
        inputs[idx_g] = g_in
        inputs[idx_t] = temp_in#[-1:,:].repeat(int(1024/number_boxes), 1)

        outputs = torch.zeros([2, p_in.shape[0]*2, p_in.shape[1]])
        temp_out = torch.load(file_label)[0]
        press_out = torch.load(file_label)[1]
        outputs[0] = torch.cat((temp_in, temp_out), dim=0)
        outputs[1] = torch.cat((press_in, press_out), dim=0)
        
        if use_all_boxes or outputs[0, len_box, 32] > 0.15:

            torch.save(inputs, prepared_dir_2ndlevel / "Inputs" / f"RUN_{new_id}.pt")

            
            torch.save(outputs, prepared_dir_2ndlevel / "Labels" / f"RUN_{new_id}.pt")

            # plt.imshow(outputs[0])
            # plt.colorbar()
            # plt.show()
            # plt.imshow(outputs[1])
            # plt.colorbar()
            # plt.show()

        # print(inputs.shape, outputs.shape)
shutil.rmtree(prepared_pieces_dir)

{'Permeability X [m^2]': {'index': 2, 'max': 3.9777367510929196e-10, 'mean': 1.7458856582663884e-10, 'min': 6.226533177944304e-11, 'norm': 'Rescale', 'std': 8.052190858531816e-11}, 'Pressure Gradient [-]': {'index': 1, 'max': -0.0015206856187433004, 'mean': -0.002568797441199422, 'min': -0.0034874114207923412, 'norm': 'Rescale', 'std': 0.0005486220470629632}, 'Temperature [C]': {'index': 0, 'max': 15.589948654174805, 'mean': 10.87946891784668, 'min': 10.600004196166992, 'norm': 'Rescale', 'std': 0.20266199111938477}}


In [8]:
prepared_pieces_dir = Path(paths["datasets_prepared_dir"]) / f"{dataset_name}_separate_boxes_ox_len_{len_box}"
prepared_pieces_dir.mkdir(parents=True, exist_ok=True)
for box in range(3):
    (prepared_pieces_dir / f"Inputs Box {box}").mkdir(parents=True, exist_ok=True)
    (prepared_pieces_dir / f"Label Box {box}").mkdir(parents=True, exist_ok=True)
shutil.copy(prepared1_dir / "info.yaml", prepared_pieces_dir / "info.yaml")

j = 0
for datapoint in zip((prepared1_dir / "Inputs").iterdir(), (prepared1_dir / "Labels").iterdir()):
    input = torch.load(datapoint[0])
    label = torch.load(datapoint[1])
    name = datapoint[0].stem

    input_boxes = []
    label_boxes = []

    if label.shape[2] == 128:
        lower = 32
        upper = 96
    elif label.shape[2] == 64:
        lower = 0
        upper = 64


    input_boxes.append(input[:, 0:start_at_box*len_box, lower:upper])
    label_boxes.append(label[:, 0:start_at_box*len_box, lower:upper])

    input_boxes.append(input[:, start_at_box*len_box:(start_at_box+1)*len_box, lower:upper])
    label_boxes.append(label[:, start_at_box*len_box:(start_at_box+1)*len_box, lower:upper])

    input_boxes.append(input[:, (start_at_box+1)*len_box:, lower:upper])
    label_boxes.append(label[:, (start_at_box+1)*len_box:, lower:upper])


    torch.save(input_boxes[0], prepared_pieces_dir / f"Inputs Box {0}" / f"{name}.pt",)
    torch.save(label_boxes[0], prepared_pieces_dir / f"Label Box {0}" / f"{name}.pt",)

    torch.save(input_boxes[1], prepared_pieces_dir / f"Inputs Box {1}" / f"{name}.pt",)
    torch.save(label_boxes[1], prepared_pieces_dir / f"Label Box {1}" / f"{name}.pt",)

    torch.save(input_boxes[2], prepared_pieces_dir / f"Inputs Box {2}" / f"{name}.pt",)
    torch.save(label_boxes[2], prepared_pieces_dir / f"Label Box {2}" / f"{name}.pt",)
    
    j+=1
    if j == number_datapoints:
        break

In [9]:
# prepare 2nd level
prepared_dir_2ndlevel = Path(paths["datasets_prepared_dir"]) / f"{dataset_name}_ox_len_{len_box}"
prepared_dir_2ndlevel.mkdir(parents=True, exist_ok=True)
(prepared_dir_2ndlevel / "Inputs").mkdir(parents=True, exist_ok=True)
(prepared_dir_2ndlevel / "Labels").mkdir(parents=True, exist_ok=True)

info = yaml.safe_load(open(prepared_pieces_dir / "info.yaml", "r"))
info_k    = deepcopy(info["Inputs"]["Permeability X [m^2]"])
info_T    = deepcopy(info["Labels"]["Temperature [C]"])
info["Inputs"]["Temperature [C]"] = info_T
info["Inputs"]["Permeability X [m^2]"] = info_k
info["Inputs"]["Pressure Gradient [-]"]["index"] = 1
info["Inputs"]["Permeability X [m^2]"]["index"] = 2
info["Inputs"]["Temperature [C]"]["index"] = 0
# assert indices of inputs double
idx_k = info["Inputs"]["Permeability X [m^2]"]["index"]
idx_t = info["Inputs"]["Temperature [C]"]["index"]
idx_g = info["Inputs"]["Pressure Gradient [-]"]["index"]
assert  idx_k != idx_t, "indices of inputs double"
info["Inputs"].pop("Material ID")
info["Inputs"].pop("SDF")
info["CellsNumber"][1] = len_box
print(info["Inputs"])

yaml.safe_dump(info, open(prepared_dir_2ndlevel / "info.yaml", "w"))


for file_in_temp in (prepared_pieces_dir / f"Label Box 1").iterdir():
    file_id = int(file_in_temp.stem.split("_")[1])
    new_id = file_id
    temp_in = torch.load(file_in_temp)[0]
    press_in = torch.load(file_in_temp)[1]
    file_inputs = prepared_pieces_dir / f"Inputs Box 1" / f"RUN_{file_id}.pt"
    file_label = prepared_pieces_dir / f"Label Box 2" / f"RUN_{file_id}.pt"
    p_in = torch.load(file_in_temp)[1]
    k_in = torch.load(file_inputs)[1]
    g_in = torch.load(file_inputs)[0]
    inputs = torch.zeros([3, *p_in.shape])
    inputs[idx_k] = k_in
    inputs[idx_g] = g_in
    inputs[idx_t] = temp_in

    outputs = torch.zeros([2, p_in.shape[0]*(number_boxes-start_at_box), p_in.shape[1]])
    temp_out = torch.load(file_label)[0]
    press_out = torch.load(file_label)[1]
    outputs[0] = torch.cat((temp_in, temp_out), dim=0)
    outputs[1] = torch.cat((press_in, press_out), dim=0)
    
    if use_all_boxes or outputs[0, len_box, 32] > 0.15:
        
        torch.save(inputs, prepared_dir_2ndlevel / "Inputs" / f"RUN_{new_id}.pt")

        
        torch.save(outputs, prepared_dir_2ndlevel / "Labels" / f"RUN_{new_id}.pt")
shutil.rmtree(prepared_pieces_dir)

{'Permeability X [m^2]': {'index': 2, 'max': 3.9777367510929196e-10, 'mean': 1.7458856582663884e-10, 'min': 6.226533177944304e-11, 'norm': 'Rescale', 'std': 8.052190858531816e-11}, 'Pressure Gradient [-]': {'index': 1, 'max': -0.0015206856187433004, 'mean': -0.002568797441199422, 'min': -0.0034874114207923412, 'norm': 'Rescale', 'std': 0.0005486220470629632}, 'Temperature [C]': {'index': 0, 'max': 15.589948654174805, 'mean': 10.87946891784668, 'min': 10.600004196166992, 'norm': 'Rescale', 'std': 0.20266199111938477}}
