In [26]:
import sys

sys.path.append("../")

import pickle

from fft import *
from data_loading import *
from maths import linear_interpolate, rmse


In [4]:
n = 10000000
quantile = 0.9935

variable_levels = {"U": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 16, 19, 23, 28, 32, 34, 35],
                   "V": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 17, 20, 23, 28, 32, 34, 35]}

variables = ["U", "V"]


In [5]:
longitudes = np.random.randint(0, 576, size=n, dtype="uint16")
latitudes = np.random.randint(0, 361, size=n, dtype="uint16")
times = np.random.randint(0, 365 * 8, size=n, dtype="uint16")
levels = np.random.randint(0, 36, size=n, dtype="uint8")

zeros = np.zeros(shape=n, dtype="float16")

data = pd.DataFrame({"time": times, "lev": levels, "lat": latitudes, "lon": longitudes,
                     "U_est": zeros, "V_est": zeros, "U": zeros, "V": zeros})
data = data.sort_values(by="lev", ignore_index=True)
data.head()

Unnamed: 0,time,lev,lat,lon,U_est,V_est,U,V
0,2452,0,41,223,0.0,0.0,0.0,0.0
1,766,0,174,458,0.0,0.0,0.0,0.0
2,869,0,297,468,0.0,0.0,0.0,0.0
3,2272,0,26,264,0.0,0.0,0.0,0.0
4,2787,0,266,269,0.0,0.0,0.0,0.0


In [6]:
print(f"Size: {data.memory_usage().sum() / (1000 ** 2)} MB")
print(f"Duplicates: {100 * data.reset_index().duplicated(subset=['lat', 'lon', 'lev', 'time']).sum() / n:.3f}%")


Size: 150.000128 MB
Duplicates: 0.023%


In [7]:
indices = data[["time", "lev", "lat", "lon"]].values.T

for variable in variables:
    targets = load_variable("MERRA2.tavg3_3d_asm_Nv.YAVG{:0>2}{:0>2}.nc4", variable, cache=False)

    data[variable] = targets[*indices]
    del targets

  0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/12 [00:00<?, ?it/s]

In [14]:
variable = "U"

estimate = np.zeros((8 * 365, 36, 361, 576), dtype="float16")

for lev in tqdm(variable_levels[variable]):
    with open(f"../models/3D-dft/{variable}/{quantile}/{lev}.bin", "rb") as file:
        fft = pickle.load(file)

    estimate[:, lev] = idft3_at_level(*fft)[0].astype("float16")

  0%|          | 0/20 [00:00<?, ?it/s]

In [46]:
i = -1
for lev in tqdm(range(36)):
    if lev in variable_levels[variable]:
        i += 1
        continue

    lower = variable_levels[variable][i]
    upper = variable_levels[variable][i + 1]
    t = (lev - lower) / (upper - lower)
    estimate[:, lev] = linear_interpolate((estimate[:, lower], estimate[:, upper]), 0, t)

  0%|          | 0/36 [00:00<?, ?it/s]

In [49]:
for variable in ["V"]:
    estimate = np.zeros((8 * 365, 36, 361, 576), dtype="float16")

    for lev in tqdm(variable_levels[variable]):
        with open(f"../models/3D-dft/{variable}/{quantile}/{lev}.bin", "rb") as file:
            fft = pickle.load(file)

        estimate[:, lev] = idft3_at_level(*fft)[0].astype("float16")

    i = -1
    for lev in tqdm(range(36)):
        if lev in variable_levels[variable]:
            i += 1
            continue

        lower = variable_levels[variable][i]
        upper = variable_levels[variable][i + 1]
        t = (lev - lower) / (upper - lower)
        estimate[:, lev] = linear_interpolate((estimate[:, lower], estimate[:, upper]), 0, t)

    data[f"{variable}_est"] = estimate[*indices]


  0%|          | 0/20 [00:00<?, ?it/s]

  0%|          | 0/36 [00:00<?, ?it/s]

In [52]:
data.to_feather(f"subset/{''.join(variables)}-{quantile}-{n}.ft")
