In [1]:
# =============================
# library
# =============================
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import polars as pl
from tqdm import tqdm
import cupy as cp
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# =============================
# constant
# =============================
TEST_PATH = Path("../storage/leap/data/test.csv")
OUTPUT_DIR = Path("../storage/leap/output")
DATA_DIR = Path("../storage/leap/data")

In [3]:
# =============================
# settings
# =============================
fe = "163"
fe_dir = OUTPUT_DIR / "fe" / f"fe{fe}"
fe_dir.mkdir(parents=True, exist_ok=True)
fe_save_dir = fe_dir / "save"
fe_save_dir.mkdir(parents=True, exist_ok=True)

In [4]:
# =====================
# Data
# =====================
data_fe = "160"
data_dir = OUTPUT_DIR / "fe" / f"fe{data_fe}"

data_diff_fe = "161"
data_diff_dir = OUTPUT_DIR / "fe" / f"fe{data_diff_fe}"

data_add_fe = "162"
data_add_dir = OUTPUT_DIR / "fe" / f"fe{data_add_fe}"


In [5]:
# feature
# feature
seq_list = [
    data_dir / f"fe{data_fe}_state_t.npy",
    data_dir / f"fe{data_fe}_state_q0001.npy",
    data_dir / f"fe{data_fe}_state_q0002.npy",
    data_dir / f"fe{data_fe}_state_q0003.npy",
    data_dir / f"fe{data_fe}_state_u.npy",
    data_dir / f"fe{data_fe}_state_v.npy",
    data_dir / f"fe{data_fe}_pbuf_ozone.npy",
    data_dir / f"fe{data_fe}_pbuf_CH4.npy",
    data_dir / f"fe{data_fe}_pbuf_N2O.npy"
]


seq_diff_list = [
    data_diff_dir / f"fe{data_diff_fe}_state_t_diff.npy",
    data_diff_dir / f"fe{data_diff_fe}_state_q0001_diff.npy",
    data_diff_dir / f"fe{data_diff_fe}_state_q0002_diff.npy",
    data_diff_dir / f"fe{data_diff_fe}_state_q0003_diff.npy",
    data_diff_dir / f"fe{data_diff_fe}_state_u_diff.npy",
    data_diff_dir / f"fe{data_diff_fe}_state_v_diff.npy",
    data_diff_dir / f"fe{data_diff_fe}_pbuf_ozone_diff.npy",
    data_diff_dir / f"fe{data_diff_fe}_pbuf_CH4_diff.npy",
    data_diff_dir / f"fe{data_diff_fe}_pbuf_N2O_diff.npy"
]

seq_list += seq_diff_list


other_path = data_dir / f"fe{data_fe}_other.npy"

add_path = data_add_dir / f"fe{data_add_fe}_state_sum.npy"

# target
target_list = [
    data_dir / f"fe{data_fe}_ptend_t_target.npy",
    data_dir / f"fe{data_fe}_ptend_q0001_target.npy",
    data_dir / f"fe{data_fe}_ptend_q0002_target.npy",
    data_dir / f"fe{data_fe}_ptend_q0003_target.npy",
    data_dir / f"fe{data_fe}_ptend_u_target.npy",
    data_dir / f"fe{data_fe}_ptend_v_target.npy",
]
other_target_path = data_dir / f"fe{data_fe}_other_target.npy"

In [6]:
# =====================
# Main
# =====================

# seq
seq_feature = []
for p in tqdm(seq_list):
    tmp = np.load(p)
    tmp = tmp.reshape([-1, 60, 1])
    seq_feature.append(tmp)

seq_feature = np.concatenate(seq_feature, axis=2)
other_feature = np.load(other_path)
add_feature = np.load(add_path)

other_feature = np.concatenate(
    [other_feature, add_feature.reshape(-1, 1)], axis=1)
del add_feature


target_seq = []
for p in tqdm(target_list):
    tmp = np.load(p).astype(np.float32)
    tmp = tmp.reshape([-1, 60, 1])
    target_seq.append(tmp)
target_seq = np.concatenate(target_seq, axis=2)
other_target = np.load(other_target_path).astype(np.float32)

100%|██████████| 18/18 [02:22<00:00,  7.91s/it]
100%|██████████| 6/6 [00:17<00:00,  2.87s/it]


In [7]:
batch = int(len(seq_feature) / 384)

In [8]:
data_list = []
for i in tqdm(range(batch)):
    np.save(fe_dir / f"fe{fe}_{i}_seq_feature.npy",seq_feature[i * 384 : (i + 1)*384])
    np.save(fe_dir / f"fe{fe}_{i}_other_feature.npy",other_feature[i * 384 : (i + 1)*384])
    np.save(fe_dir / f"fe{fe}_{i}_seq_target.npy",target_seq[i * 384 : (i + 1)*384])
    np.save(fe_dir / f"fe{fe}_{i}_other_target.npy",other_target[i * 384 : (i + 1)*384])
    data_list.append(str(fe_dir / f"fe{fe}_{i}"))

100%|██████████| 26280/26280 [12:15<00:00, 35.72it/s]


In [9]:
data = pd.DataFrame()
data["data_path"] = data_list

In [10]:
data.to_parquet(fe_dir / f"fe{fe}_data_list.parquet")

In [11]:
data

Unnamed: 0,data_path
0,../storage/leap/output/fe/fe163/fe163_0
1,../storage/leap/output/fe/fe163/fe163_1
2,../storage/leap/output/fe/fe163/fe163_2
3,../storage/leap/output/fe/fe163/fe163_3
4,../storage/leap/output/fe/fe163/fe163_4
...,...
26275,../storage/leap/output/fe/fe163/fe163_26275
26276,../storage/leap/output/fe/fe163/fe163_26276
26277,../storage/leap/output/fe/fe163/fe163_26277
26278,../storage/leap/output/fe/fe163/fe163_26278
