In [1]:
# =============================
# library
# =============================
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import polars as pl
from tqdm import tqdm
import pickle

In [2]:
# =============================
# constant
# =============================
TRAIN_PATH = Path("../storage/leap/data/train.csv")
TEST_PATH = Path("../storage/leap/data/test.csv")
OUTPUT_DIR = Path("../storage/leap/output")
DATA_DIR = Path("../storage/leap/data")

In [3]:
# =============================
# settings
# =============================
fe = "134"
fe_dir = OUTPUT_DIR / "fe" / f"fe{fe}"
fe_dir.mkdir(parents=True, exist_ok=True)
fe_save_dir = fe_dir / "save"
fe_save_dir.mkdir(parents=True, exist_ok=True)
TEST_PATH1 = Path("../storage/leap/output/fe/fe100/fe100_test.parquet")

train_sc_path = OUTPUT_DIR / "fe" / "fe101" /  "fe101_train_mean_std.pkl"
train_target_path = OUTPUT_DIR / "fe" / "fe101" /  "fe101_target_mean_std.pkl"

In [4]:
# =============================
# columns
# =============================
state_t = [f'state_t_{i}' for i in range(60)]
state_q0001 = [f'state_q0001_{i}' for i in range(60)]
state_q0002 = [f'state_q0002_{i}' for i in range(60)]
state_q0003 = [f'state_q0003_{i}' for i in range(60)]
state_u = [f'state_u_{i}' for i in range(60)]
state_v = [f'state_v_{i}' for i in range(60)]
other = ['state_ps', 'pbuf_SOLIN', 'pbuf_LHFLX', 'pbuf_SHFLX',
       'pbuf_TAUX', 'pbuf_TAUY', 'pbuf_COSZRS', 'cam_in_ALDIF', 'cam_in_ALDIR',
       'cam_in_ASDIF', 'cam_in_ASDIR', 'cam_in_LWUP', 'cam_in_ICEFRAC',
       'cam_in_LANDFRAC', 'cam_in_OCNFRAC', 'cam_in_SNOWHLAND']
pbuf_ozone = [f'pbuf_ozone_{i}' for i in range(60)]
pbuf_CH4 = [f'pbuf_CH4_{i}' for i in range(60)]
pbuf_N2O = [f'pbuf_N2O_{i}' for i in range(60)]
cols_list = [state_t,state_q0001,state_q0002,state_q0003,
             state_u,state_v,pbuf_ozone,pbuf_CH4,pbuf_N2O,other]

In [5]:
# =============================
# target
# =============================
ptend_t = [f'ptend_t_{i}' for i in range(60)]
ptend_q0001 = [f'ptend_q0001_{i}' for i in range(60)]
ptend_q0002 = [f'ptend_q0002_{i}' for i in range(60)]
ptend_q0003 = [f'ptend_q0003_{i}' for i in range(60)]
ptend_u = [f'ptend_u_{i}' for i in range(60)]
ptend_v = [f'ptend_v_{i}' for i in range(60)]
other_target= ['cam_out_NETSW', 'cam_out_FLWDS', 'cam_out_PRECSC', 'cam_out_PRECC', 'cam_out_SOLS', 'cam_out_SOLL', 'cam_out_SOLSD', 'cam_out_SOLLD']
target_list = [ptend_t,ptend_q0001, ptend_q0002,ptend_q0003,ptend_u,ptend_v,other_target]

In [6]:
# =============================
# main
# =============================
df = pl.read_parquet(TEST_PATH1)

In [7]:
with open(train_sc_path  , 'rb') as f:
    train_sc_dict = pickle.load(f) 
with open(train_target_path  , 'rb') as f:
    target_sc_dict = pickle.load(f) 

In [8]:
for c in tqdm(cols_list):
    df_ = df[c].to_numpy()
    if len(c) == 60:
        prefix = "_".join(c[0].split("_")[:2])
        if (prefix == "state_q0002") |  (prefix == "state_q0003"):
            np.save(fe_dir / f"fe{fe}_{prefix}_raw.npy",df_)
        for i in range(60):
            df_[:,i] = (df_[:,i] - train_sc_dict[f"{prefix}_{i}"][0]) / train_sc_dict[f"{prefix}_{i}"][1]
        df_ = df_.astype(np.float32)
        np.save(fe_dir / f"fe{fe}_{prefix}.npy",df_)
    else:
        for n,c_ in enumerate(c):
            df_[:,n] = (df_[:,n] - train_sc_dict[c_][0]) / train_sc_dict[c_][1]
        df_ = df_.astype(np.float32)
        np.save(fe_dir / f"fe{fe}_other.npy",df_)

100%|██████████| 10/10 [00:01<00:00,  6.06it/s]
