In [12]:
# =============================
# library
# =============================
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import polars as pl
from tqdm import tqdm

In [13]:
# =============================
# constant
# =============================
TRAIN_PATH = Path("../storage/leap/data/train.csv")
TEST_PATH = Path("../storage/leap/data/test.csv")
OUTPUT_DIR = Path("../storage/leap/output")
DATA_DIR = Path("../storage/leap/data")

In [14]:
# =============================
# settings
# =============================
fe = "101"
fe_dir = OUTPUT_DIR / "fe" / f"fe{fe}"
fe_dir.mkdir(parents=True, exist_ok=True)
fe_save_dir = fe_dir / "save"
fe_save_dir.mkdir(parents=True, exist_ok=True)

TRAIN_PATH1 = Path("../storage/leap/output/fe/fe100/fe100_train.parquet")

In [15]:
# =============================
# columns
# =============================
state_t = [f'state_t_{i}' for i in range(60)]
state_q0001 = [f'state_q0001_{i}' for i in range(60)]
state_q0002 = [f'state_q0002_{i}' for i in range(60)]
state_q0003 = [f'state_q0003_{i}' for i in range(60)]
state_u = [f'state_u_{i}' for i in range(60)]
state_v = [f'state_v_{i}' for i in range(60)]
other = ['state_ps', 'pbuf_SOLIN', 'pbuf_LHFLX', 'pbuf_SHFLX',
       'pbuf_TAUX', 'pbuf_TAUY', 'pbuf_COSZRS', 'cam_in_ALDIF', 'cam_in_ALDIR',
       'cam_in_ASDIF', 'cam_in_ASDIR', 'cam_in_LWUP', 'cam_in_ICEFRAC',
       'cam_in_LANDFRAC', 'cam_in_OCNFRAC', 'cam_in_SNOWHLAND']
pbuf_ozone = [f'pbuf_ozone_{i}' for i in range(60)]
pbuf_CH4 = [f'pbuf_CH4_{i}' for i in range(60)]
pbuf_N2O = [f'pbuf_N2O_{i}' for i in range(60)]
cols_list = [state_t,state_q0001,state_q0002,state_q0003,
             state_u,state_v,pbuf_ozone,pbuf_CH4,pbuf_N2O,other]

In [16]:
# =============================
# target
# =============================
ptend_t = [f'ptend_t_{i}' for i in range(60)]
ptend_q0001 = [f'ptend_q0001_{i}' for i in range(60)]
ptend_q0002 = [f'ptend_q0002_{i}' for i in range(60)]
ptend_q0003 = [f'ptend_q0003_{i}' for i in range(60)]
ptend_u = [f'ptend_u_{i}' for i in range(60)]
ptend_v = [f'ptend_v_{i}' for i in range(60)]
other_target= ['cam_out_NETSW', 'cam_out_FLWDS', 'cam_out_PRECSC', 'cam_out_PRECC', 'cam_out_SOLS', 'cam_out_SOLL', 'cam_out_SOLSD', 'cam_out_SOLLD']
target_list = [ptend_t,ptend_q0001, ptend_q0002,ptend_q0003,ptend_u,ptend_v,other_target]

In [17]:
# =============================
# main
# =============================
df = pl.read_parquet(TRAIN_PATH1)

In [18]:
sc_dict = {}
for c in tqdm(cols_list):
    if len(c) == 60:
        prefix = "_".join(c[0].split("_")[:2])
        df_ = df[c].to_numpy()
        for i in range(60):
            mean = np.mean(df_[:,i]).item()
            std = np.std(df_[:,i]).item()
            std = max(std,1e-8)
            sc_dict[f"{prefix}_{i}"] = [mean,std]
    else:
        for prefix in c:
            df_ = df[prefix].to_numpy()
            #df_ = np.asarray(df_)
            mean = np.mean(df_).item()
            std = np.std(df_).item()
            std = max(std,1e-8)
            sc_dict[prefix] = [mean,std]

100%|██████████| 10/10 [00:09<00:00,  1.03it/s]


In [19]:
target_dict = {}
for c in tqdm(target_list):
    if len(c) == 60:
        prefix = "_".join(c[0].split("_")[:2])
        df_ = df[c].to_numpy()
        for i in range(60):
            mean = np.mean(df_[:,i]).item()
            std = np.std(df_[:,i]).item()
            if std == 0:
                std = 1e-15
            #std = max(std,1e-8)
            target_dict[f"{prefix}_{i}"] = [mean,std]
    else:
        for prefix in c:
            df_ = df[prefix].to_numpy()
            #df_ = cp.asarray(df_)
            mean = np.mean(df_).item()
            std = np.std(df_).item()
            if std == 0:
                std = 1e-15
            #std = max(std,1e-8)
            target_dict[prefix] = [mean,std]

100%|██████████| 7/7 [00:06<00:00,  1.07it/s]


In [20]:
import pickle
with open(fe_dir / f"fe{fe}_train_mean_std.pkl", "wb") as f:
    pickle.dump(sc_dict, f)
import pickle
with open(fe_dir / f"fe{fe}_target_mean_std.pkl", "wb") as f:
    pickle.dump(target_dict, f)

In [21]:
sc_dict

{'state_t_0': [216.08305367231628, 6.668770916514148],
 'state_t_1': [228.27779044489336, 8.641281088612388],
 'state_t_2': [237.58638295110504, 8.200393277536383],
 'state_t_3': [248.14284725316176, 6.745308323529102],
 'state_t_4': [256.4679771208904, 6.214607671485672],
 'state_t_5': [259.6864816751539, 8.173630895909007],
 'state_t_6': [255.50887743810492, 10.026824235547805],
 'state_t_7': [246.9504397993467, 9.96064008944706],
 'state_t_8': [237.24749846506413, 9.055613769962317],
 'state_t_9': [230.52146287400132, 8.643965367544808],
 'state_t_10': [225.26538135136255, 7.9837417312021195],
 'state_t_11': [221.0922454402649, 7.303135854791256],
 'state_t_12': [217.23256627666643, 6.63213076265934],
 'state_t_13': [213.98563212965843, 6.272363870046845],
 'state_t_14': [210.58793096461932, 6.525889344210344],
 'state_t_15': [207.05370116476143, 7.688341888431221],
 'state_t_16': [202.81917502332212, 10.07782148778124],
 'state_t_17': [200.01434575925083, 11.889721060082127],
 'sta

In [22]:
target_dict

{'ptend_t_0': [9.950876150603026e-06, 3.2786786324293e-05],
 'ptend_t_1': [-7.326890445470611e-06, 4.4540696322423004e-05],
 'ptend_t_2': [-3.201913682546687e-06, 5.2653680222895425e-05],
 'ptend_t_3': [-3.091983244695095e-06, 6.872012662622123e-05],
 'ptend_t_4': [-3.5792479605723227e-06, 9.13480650734226e-05],
 'ptend_t_5': [-2.181845797757316e-06, 0.0001104068646511213],
 'ptend_t_6': [-3.00623663042169e-07, 0.00010360256301697678],
 'ptend_t_7': [4.0230481274751026e-07, 7.891979327013092e-05],
 'ptend_t_8': [1.7797154277411512e-07, 5.0346770087067765e-05],
 'ptend_t_9': [-1.9556279340029954e-08, 3.875975316779509e-05],
 'ptend_t_10': [1.377147071253002e-07, 2.9541879778071687e-05],
 'ptend_t_11': [1.8774186447102192e-07, 2.2692078235391624e-05],
 'ptend_t_12': [2.977559926081653e-07, 1.6743932451346525e-05],
 'ptend_t_13': [3.8057558610221577e-07, 1.2606616241294976e-05],
 'ptend_t_14': [5.785970069667658e-07, 9.327015829005675e-06],
 'ptend_t_15': [9.491955239493268e-07, 7.3797832