In [3]:
# =============================
# library
# =============================
import pandas as pd
import numpy as np
from pathlib import Path
import matplotlib.pyplot as plt
import polars as pl
from sklearn.preprocessing import StandardScaler

In [4]:
# =============================
# constant
# =============================
TEST_PATH = Path("../storage/leap/data/test.csv")
OUTPUT_DIR = Path("../storage/leap/output")
DATA_DIR = Path("../storage/leap/data")

In [5]:
# =============================
# settings
# =============================
fe = "104"
fe_dir = OUTPUT_DIR / "fe" / f"fe{fe}"
fe_dir.mkdir(parents=True, exist_ok=True)
fe_save_dir = fe_dir / "save"
fe_save_dir.mkdir(parents=True, exist_ok=True)
TRAIN_PATH = Path("../storage/leap/data/train_0008/train_0008_2_concat.parquet")

In [6]:
# =============================
# columns
# =============================
state_t = [f'state_t_{i}' for i in range(60)]
state_q0001 = [f'state_q0001_{i}' for i in range(60)]
state_q0002 = [f'state_q0002_{i}' for i in range(60)]
state_q0003 = [f'state_q0003_{i}' for i in range(60)]
state_u = [f'state_u_{i}' for i in range(60)]
state_v = [f'state_v_{i}' for i in range(60)]
other = ['state_ps', 'pbuf_SOLIN', 'pbuf_LHFLX', 'pbuf_SHFLX',
       'pbuf_TAUX', 'pbuf_TAUY', 'pbuf_COSZRS', 'cam_in_ALDIF', 'cam_in_ALDIR',
       'cam_in_ASDIF', 'cam_in_ASDIR', 'cam_in_LWUP', 'cam_in_ICEFRAC',
       'cam_in_LANDFRAC', 'cam_in_OCNFRAC', 'cam_in_SNOWHLAND']
pbuf_ozone = [f'pbuf_ozone_{i}' for i in range(60)]
pbuf_CH4 = [f'pbuf_CH4_{i}' for i in range(60)]
pbuf_N2O = [f'pbuf_N2O_{i}' for i in range(60)]
cols_list = [state_t,state_q0001,state_q0002,state_q0003,
             state_u,state_v,pbuf_ozone,pbuf_CH4,pbuf_N2O,other]

In [7]:
# =============================
# target
# =============================
ptend_t = [f'ptend_t_{i}' for i in range(60)]
ptend_q0001 = [f'ptend_q0001_{i}' for i in range(60)]
ptend_q0002 = [f'ptend_q0002_{i}' for i in range(60)]
ptend_q0003 = [f'ptend_q0003_{i}' for i in range(60)]
ptend_u = [f'ptend_u_{i}' for i in range(60)]
ptend_v = [f'ptend_v_{i}' for i in range(60)]
other_target= ['cam_out_NETSW', 'cam_out_FLWDS', 'cam_out_PRECSC', 'cam_out_PRECC', 'cam_out_SOLS', 'cam_out_SOLL', 'cam_out_SOLSD', 'cam_out_SOLLD']
target_list = [ptend_t,ptend_q0001, ptend_q0002,ptend_q0003,ptend_u,ptend_v,other_target]

In [8]:
# =============================
# main
# =============================
df = pl.read_parquet(TRAIN_PATH)

In [9]:
sample = pd.read_csv(DATA_DIR / "sample_submission.csv")
sample = sample.iloc[:1,:].reset_index(drop=True)
for c in sample.columns[1:]:
    w = sample[c].values
    df = df.with_columns(pl.col(c) * w)

In [10]:
df.write_parquet(OUTPUT_DIR / "fe" / f"fe{fe}" / f"fe{fe}_train.parquet")

In [11]:
df

sample_id,state_t_0,state_t_1,state_t_2,state_t_3,state_t_4,state_t_5,state_t_6,state_t_7,state_t_8,state_t_9,state_t_10,state_t_11,state_t_12,state_t_13,state_t_14,state_t_15,state_t_16,state_t_17,state_t_18,state_t_19,state_t_20,state_t_21,state_t_22,state_t_23,state_t_24,state_t_25,state_t_26,state_t_27,state_t_28,state_t_29,state_t_30,state_t_31,state_t_32,state_t_33,state_t_34,state_t_35,…,ptend_v_31,ptend_v_32,ptend_v_33,ptend_v_34,ptend_v_35,ptend_v_36,ptend_v_37,ptend_v_38,ptend_v_39,ptend_v_40,ptend_v_41,ptend_v_42,ptend_v_43,ptend_v_44,ptend_v_45,ptend_v_46,ptend_v_47,ptend_v_48,ptend_v_49,ptend_v_50,ptend_v_51,ptend_v_52,ptend_v_53,ptend_v_54,ptend_v_55,ptend_v_56,ptend_v_57,ptend_v_58,ptend_v_59,cam_out_NETSW,cam_out_FLWDS,cam_out_PRECSC,cam_out_PRECC,cam_out_SOLS,cam_out_SOLL,cam_out_SOLSD,cam_out_SOLLD
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""train_0""",213.015399,222.982504,229.060155,244.718725,256.363167,264.674508,263.137177,253.361152,242.91291,235.829238,229.338916,224.513291,220.461853,216.633896,211.724516,206.362492,201.731377,197.998571,195.274716,197.218602,199.60527,204.183593,209.368983,214.901717,220.040567,224.758955,229.117382,233.126458,236.934911,240.630485,244.383587,248.090172,251.790612,255.330826,258.59554,261.492111,…,3.6847e-9,3.9531e-9,4.2460e-10,-1.7552e-10,-6.7376e-10,-1.7862e-9,-1.7459e-10,1.6805e-9,-1.0731e-9,-6.2402e-10,3.7018e-10,1.0093e-9,5.6612e-10,-5.6707e-10,-9.0547e-10,-1.7536e-10,6.5313e-10,5.8093e-10,-1.1141e-8,6.7917e-8,-5.0707e-8,-2.0211e-10,-2.1007e-9,2.9645e-8,-1.1891e-7,7.9435e-8,-2.8188e-8,-0.000001,0.000001,0.0,374.326122,0.0,0.0,0.0,0.0,0.0,0.0
"""train_1""",212.928805,219.215263,226.877896,245.088931,257.724639,266.303036,265.12261,255.492992,243.913295,236.504822,230.408508,225.792889,222.184908,218.837126,215.133234,211.383674,207.935616,204.266539,198.797919,199.98863,200.957086,204.487211,208.055326,212.181648,216.142603,220.222281,224.329648,228.490616,232.692928,236.763945,240.887229,244.892055,248.500952,252.02758,255.302474,258.18134,…,-0.000001,-8.7064e-7,5.0300e-7,-1.1449e-7,-1.5131e-7,-4.4983e-7,7.0812e-8,3.8577e-7,-2.3506e-7,-4.0316e-7,1.8155e-7,-4.0392e-7,1.0877e-7,-0.000001,-5.8285e-7,0.000001,-6.8549e-8,-0.000003,-9.0245e-7,0.000003,0.000002,1.0867e-7,-2.6583e-7,-8.2204e-7,-0.000003,-0.000006,-0.000012,0.000023,0.000001,0.0,368.961544,0.0,2.4889e-9,0.0,0.0,0.0,0.0
"""train_2""",213.879147,230.127471,232.440488,243.798987,253.231005,259.56906,257.504798,249.007208,238.955028,232.94693,227.048228,222.125972,216.73301,212.257303,207.047447,199.347922,191.294047,188.459846,190.43572,192.595264,196.177299,202.616964,209.761787,216.48727,222.673318,228.315575,233.576515,238.438382,242.860587,246.962451,250.877544,254.541771,258.037442,261.36587,264.301403,267.013562,…,0.000002,0.000003,8.7341e-7,0.000002,1.9450e-7,1.0955e-7,-1.1678e-7,1.7391e-7,3.2977e-7,-3.4538e-7,-8.4793e-8,0.000003,4.5718e-7,-0.000001,-0.000001,0.000001,0.000002,-5.5208e-7,4.2301e-7,0.000004,0.000005,0.000005,0.000003,0.000001,0.000001,-0.000005,-0.000016,-0.000012,0.000005,0.0,413.329358,0.0,0.0,0.0,0.0,0.0,0.0
"""train_3""",212.822607,221.729062,227.381841,242.259876,253.111267,261.355935,259.292877,250.935204,240.321327,233.932932,227.653495,222.602555,217.496219,212.638499,208.435859,203.10365,196.378121,192.376704,191.04675,193.304753,196.953294,203.10148,208.852518,214.498308,219.959933,225.77976,231.449655,236.458998,240.945168,245.181245,249.125938,252.819,256.293398,259.48904,262.374707,264.938798,…,-0.000003,0.000006,0.000009,0.000002,-3.1147e-7,-2.2540e-7,-0.000002,-0.000019,-0.000027,-0.000028,-0.000015,-0.000005,0.000005,0.000006,0.000004,0.000006,0.00001,0.000011,0.000005,0.000005,0.000014,0.000027,0.000037,0.000049,0.000052,0.00004,-0.000007,-0.000038,-0.000041,0.0,417.334622,0.0,1.3297e-7,0.0,0.0,0.0,0.0
"""train_4""",214.12286,219.787635,227.753731,244.986192,258.572526,266.770542,265.346858,256.232703,244.570049,236.831076,230.766214,226.369996,223.045796,220.400407,217.21911,214.564411,212.113943,210.28956,204.195758,203.362237,202.547046,204.763257,207.16086,210.213263,213.414852,216.892985,220.564116,224.648944,228.957761,233.086338,237.255039,241.247852,244.960457,248.539418,251.86142,255.032982,…,-6.0350e-7,0.000001,6.6993e-7,7.2094e-7,-5.6767e-8,3.5311e-7,-1.4315e-8,-4.7537e-7,3.4649e-8,-4.2537e-7,-2.2724e-7,-9.6077e-7,-2.5037e-7,0.000001,-0.000002,-0.000003,7.1734e-7,0.000001,0.000002,0.000001,-4.5389e-8,7.1304e-7,7.3742e-7,-4.4891e-7,-6.9074e-7,-6.3748e-7,6.2717e-7,0.000022,-0.000018,0.0,365.823816,0.0,0.0,0.0,0.0,0.0,0.0
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
"""train_856699""",221.187103,226.352975,236.535549,244.926666,248.61201,246.259331,238.50307,232.078002,226.674986,223.381948,221.424677,220.095198,219.245587,218.970121,218.770346,218.502665,217.947594,217.98634,217.170769,216.088385,214.805092,214.35364,214.851799,216.221468,217.880209,219.516177,221.325253,223.446962,225.77996,228.201563,230.711562,233.245946,235.733657,238.206758,240.716762,243.199707,…,-1.3820e-9,2.0563e-8,-7.7391e-8,4.6357e-9,-3.4233e-9,-7.1200e-8,-1.5269e-7,-2.0135e-7,-8.7707e-8,1.6523e-7,2.0816e-7,1.0222e-7,-1.7261e-7,-1.9469e-7,1.0566e-8,-8.1143e-9,-2.7226e-7,-6.9284e-7,-0.000002,-0.000001,-6.1374e-7,3.5630e-7,0.000001,0.000001,0.000002,0.000001,-0.000002,-0.000008,0.000011,37.786375,321.328592,1.7822e-8,3.4158e-8,1.1861e-7,6.7697e-7,23.788084,16.911529
"""train_856700""",208.503157,228.572727,232.604764,240.169234,244.444724,245.95778,238.617967,232.626109,227.378977,225.347062,224.967762,224.720095,224.263587,223.491187,222.285655,221.029766,219.896398,218.973177,217.949389,215.119061,212.160979,210.252554,209.134708,208.898253,209.08148,209.621639,210.397508,211.50501,212.892911,214.439522,216.120503,218.076249,220.354989,222.84311,225.412847,227.959097,…,6.9390e-12,-3.8016e-11,-6.1241e-11,4.1925e-11,6.7408e-11,5.7122e-11,8.9830e-11,6.0674e-11,1.7459e-11,-8.2869e-12,-1.1217e-11,5.3426e-12,2.4992e-11,1.6549e-11,-1.4798e-11,3.6880e-11,1.9742e-12,1.7872e-11,-3.7419e-11,-1.7283e-10,-3.0020e-10,-1.8665e-10,3.1033e-10,-1.7309e-11,-8.5407e-8,5.5765e-7,-0.000001,0.000001,-5.3174e-7,15.257785,180.084729,1.0191e-8,1.0191e-8,0.521506,2.195795,15.543283,14.388648
"""train_856701""",211.498267,233.867707,238.70206,240.93841,244.79861,247.414092,240.465712,234.161907,228.329206,225.924888,225.307517,224.813847,224.147727,223.277559,221.896788,220.214229,218.804129,217.810821,216.776231,214.107523,211.610735,210.081165,209.350833,209.482645,209.781555,210.323062,211.093403,212.120203,213.370362,214.774195,216.286575,218.029288,220.259625,222.843051,225.495474,228.178514,…,-2.3874e-9,-2.8286e-9,-1.1032e-10,6.7215e-9,1.7949e-9,-1.4457e-8,8.9773e-8,-6.6244e-8,3.7578e-9,1.0247e-8,-9.8870e-9,-4.2378e-9,-1.2549e-9,1.1238e-8,1.1266e-8,-6.6895e-10,-1.4186e-9,-2.0885e-8,1.7886e-7,-3.3243e-7,2.7584e-8,2.3146e-7,-7.7786e-8,-1.5670e-8,3.5209e-7,-5.9616e-7,5.4882e-7,0.000002,-0.000003,9.54512,203.905765,6.5152e-9,6.5152e-9,0.057483,0.359297,12.281326,11.617389
"""train_856702""",213.687595,224.331743,231.563532,241.531658,247.5907,247.361025,239.942349,233.966628,228.329086,224.989066,222.49216,220.73631,219.810218,219.42788,219.168738,219.119234,218.727942,218.911339,218.431555,217.346961,215.912699,215.294392,215.491015,216.63515,218.193029,219.711129,221.161468,222.747355,224.518831,226.37291,228.344998,230.425875,232.561586,234.568737,236.473331,238.311079,…,-4.8036e-7,6.4283e-7,-7.1518e-7,-9.1792e-7,1.7715e-7,-6.8069e-7,-0.000001,-2.5668e-7,-0.000001,-0.000003,6.2680e-7,0.000003,3.5347e-7,-0.000003,-0.00001,-0.000013,-0.000011,-0.000011,-0.000013,-0.00001,-0.000004,-0.000002,0.00001,0.000053,0.000046,0.000024,0.000012,0.000013,-0.000042,115.202187,268.92534,1.2209e-8,1.5585e-8,18.908758,55.097867,37.744758,21.810664


In [12]:
sample

Unnamed: 0,sample_id,ptend_t_0,ptend_t_1,ptend_t_2,ptend_t_3,ptend_t_4,ptend_t_5,ptend_t_6,ptend_t_7,ptend_t_8,...,ptend_v_58,ptend_v_59,cam_out_NETSW,cam_out_FLWDS,cam_out_PRECSC,cam_out_PRECC,cam_out_SOLS,cam_out_SOLL,cam_out_SOLSD,cam_out_SOLLD
0,test_0,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
