In [6]:
from copy import deepcopy
import yaml
from pathlib import Path
import torch
import shutil
import matplotlib.pyplot as plt

%reload_ext autoreload
%autoreload 2

# Cut boxes


In [7]:
dataset_name = "dataset_medium_k_3e-10_1000dp inputs_gksi"
with open("paths.yaml", "r") as paths:
    paths = yaml.safe_load(paths)
    prepared1_dir = Path(paths["datasets_prepared_dir"]) / dataset_name

## Cut dataset into x boxes

In [8]:
number_boxes = 16
number_datapoints = 10000
prepared_pieces_dir = Path(paths["datasets_prepared_dir"]) / f"{dataset_name} cut_{number_boxes}pieces separate_boxes"
prepared_pieces_dir.mkdir(parents=True, exist_ok=True)
for box in range(number_boxes):
    (prepared_pieces_dir / f"Inputs Box {box}").mkdir(parents=True, exist_ok=True)
    (prepared_pieces_dir / f"Label Box {box}").mkdir(parents=True, exist_ok=True)
shutil.copy(prepared1_dir / "info.yaml", prepared_pieces_dir / "info.yaml")

j = 0
for datapoint in zip((prepared1_dir / "Inputs").iterdir(), (prepared1_dir / "Labels").iterdir()):
    input = torch.load(datapoint[0])
    label = torch.load(datapoint[1])
    name = datapoint[0].stem

    input_boxes = []
    label_boxes = []
    for i in range(number_boxes):
        len_box = input.shape[1] // number_boxes
        input_boxes.append(input[:, i * len_box : (i + 1) * len_box, :])
        label_boxes.append(label[:, i * len_box : (i + 1) * len_box, :])


    for i in range(number_boxes):
        torch.save(input_boxes[i], prepared_pieces_dir / f"Inputs Box {i}" / f"{name}.pt",)
        torch.save(label_boxes[i], prepared_pieces_dir / f"Label Box {i}" / f"{name}.pt",)
    
    j+=1
    if j == number_datapoints:
        break

## Store boxes for 2 levels in 2 datasets

In [9]:
# prepare 1st level
prepared_dir_1stlevel = Path(paths["datasets_prepared_dir"]) / f"{dataset_name} cut_{number_boxes}pieces separate_boxes 1st level"
prepared_dir_1stlevel.mkdir(parents=True, exist_ok=True)

shutil.copy(prepared_pieces_dir / "info.yaml", prepared_dir_1stlevel / "info.yaml")
shutil.copytree(prepared_pieces_dir / "Inputs Box 0", prepared_dir_1stlevel / "Inputs")
shutil.copytree(prepared_pieces_dir / "Label Box 0", prepared_dir_1stlevel / "Labels")

PosixPath('/scratch/sc/pillerls/datasets_prepared/dataset_medium_k_3e-10_1000dp inputs_gksi cut_16pieces separate_boxes 1st level/Labels')

In [10]:
# prepare 2nd level
prepared_dir_2ndlevel = Path(paths["datasets_prepared_dir"]) / f"{dataset_name} cut_{number_boxes}pieces separate_boxes 2nd level gkt"
prepared_dir_2ndlevel.mkdir(parents=True, exist_ok=True)
(prepared_dir_2ndlevel / "Inputs").mkdir(parents=True, exist_ok=True)
(prepared_dir_2ndlevel / "Labels").mkdir(parents=True, exist_ok=True)

info = yaml.safe_load(open(prepared_dir_1stlevel / "info.yaml", "r"))
info_k    = deepcopy(info["Inputs"]["Permeability X [m^2]"])
info_T    = deepcopy(info["Labels"]["Temperature [C]"])
info["Inputs"]["Temperature [C]"] = info_T
info["Inputs"]["Permeability X [m^2]"] = info_k
info["Inputs"]["Pressure Gradient [-]"]["index"] = 1
info["Inputs"]["Permeability X [m^2]"]["index"] = 2
info["Inputs"]["Temperature [C]"]["index"] = 0
# assert indices of inputs double
idx_k = info["Inputs"]["Permeability X [m^2]"]["index"]
idx_t = info["Inputs"]["Temperature [C]"]["index"]
idx_g = info["Inputs"]["Pressure Gradient [-]"]["index"]
assert  idx_k != idx_t, "indices of inputs double"
info["Inputs"].pop("Material ID")
info["Inputs"].pop("SDF")
info["Labels"].pop("Liquid Pressure [Pa]")
print(info["Inputs"])

yaml.safe_dump(info, open(prepared_dir_2ndlevel / "info.yaml", "w"))

for box in range(2, number_boxes-1):
    for file_in_temp in (prepared_pieces_dir / f"Label Box {box}").iterdir():
        file_id = int(file_in_temp.stem.split("_")[1])
        new_id = file_id + (box) * 1000
        temp_in = torch.load(file_in_temp)[0]
        file_inputs = prepared_pieces_dir / f"Inputs Box {box}" / f"RUN_{file_id}.pt"
        file_label = prepared_pieces_dir / f"Label Box {box+1}" / f"RUN_{file_id}.pt"
        p_in = torch.load(file_in_temp)[1]
        k_in = torch.load(file_inputs)[1]
        g_in = torch.load(file_inputs)[0]
        inputs = torch.zeros([3, *p_in.shape])
        inputs[idx_k] = k_in
        inputs[idx_g] = g_in
        inputs[idx_t] = temp_in#[-1:,:].repeat(int(1024/number_boxes), 1)

        outputs = torch.zeros([1, p_in.shape[0]*2, p_in.shape[1]])
        temp_out = torch.load(file_label)[0]
        outputs[0] = torch.cat((temp_in, temp_out), dim=0)
        if outputs[0, -1, 31] > 0.05:
        
            torch.save(inputs, prepared_dir_2ndlevel / "Inputs" / f"RUN_{new_id}.pt")

            
            torch.save(outputs, prepared_dir_2ndlevel / "Labels" / f"RUN_{new_id}.pt")

            # plt.imshow(outputs[0])
            # plt.colorbar()
            # plt.show()
            # plt.imshow(outputs[1])
            # plt.colorbar()
            # plt.show()

        # print(inputs.shape, outputs.shape)

{'Permeability X [m^2]': {'index': 2, 'max': 2.9672317380935453e-10, 'mean': 8.87306061958526e-11, 'min': 1.0086227859862351e-11, 'norm': 'Rescale', 'std': 7.734657359037556e-11}, 'Pressure Gradient [-]': {'index': 1, 'max': -0.0015021893195807934, 'mean': -0.002550203585997224, 'min': -0.0034940664190799, 'norm': 'Rescale', 'std': 0.0005860578967258334}, 'Temperature [C]': {'index': 0, 'max': 15.601107597351074, 'mean': 10.706022262573242, 'min': 10.600000381469727, 'norm': 'Rescale', 'std': 0.33781468868255615}}
