In [1]:
import os
from pathlib import Path
import torch
from torch.utils.data import DataLoader

from hydra import compose, initialize
from omegaconf import OmegaConf

from dataloaders.aurora_dataload import LargeClimateDataset, aurora_batch_collate
from utils import seed_everything
from bfm_finetune.paths import REPO_FOLDER, STORAGE_DIR

from aurora import Aurora, Batch, Metadata, rollout

seed_everything(42)

  from .autonotebook import tqdm as notebook_tqdm
STORAGE_DIR: /projects/prjs1134/data/projects/biodt/storage


In [2]:
# Batch path (overriden) # bfm_cfg.evaluation.test_data
test_data_path = str(STORAGE_DIR / "monthly_batches" / "batches") # COMPLETE BATCHES
stats_path = str(
    STORAGE_DIR
    / "monthly_batches"
    / "statistics"
    / "monthly_batches_stats_splitted_channels.json"
)

bfm_config_path = REPO_FOLDER / "bfm-model/bfm_model/bfm/configs"
cwd = Path(os.getcwd())
bfm_config_path = str(bfm_config_path.relative_to(cwd))
bfm_config_path = f"../bfm-model/bfm_model/bfm/configs"
print(bfm_config_path)
with initialize(version_base=None, config_path=bfm_config_path, job_name="test_app"):
    bfm_cfg = compose(config_name="train_config.yaml")


test_dataset = LargeClimateDataset(
    data_dir=test_data_path,
    scaling_settings=bfm_cfg.data.scaling,
    num_species=28,
    atmos_levels=bfm_cfg.data.atmos_levels,
    model_patch_size=bfm_cfg.model.patch_size,
    max_files=1, # TODO Change this to iterate over the whole dataset!
    mode="finetune"
)
# print(test_dataset[0])
print("Reading test data from :", test_data_path)
test_dataloader = DataLoader(
    test_dataset,
    batch_size=1,
    num_workers=0,
    collate_fn = aurora_batch_collate,
    drop_last=True,
    shuffle=False,
)

../bfm-model/bfm_model/bfm/configs
We scale the dataset True with normalize
Reading test data from : /projects/prjs1134/data/projects/biodt/storage/monthly_batches/batches


In [3]:
print(f"Total length of samples {len(test_dataset)}")
batch = next(iter(test_dataloader))
for name, t in batch.atmos_vars.items():
    print(name, t.shape)


Total length of samples 1
z torch.Size([1, 2, 13, 160, 280])
t torch.Size([1, 2, 13, 160, 280])
u torch.Size([1, 2, 13, 160, 280])
v torch.Size([1, 2, 13, 160, 280])
q torch.Size([1, 2, 13, 160, 280])


In [4]:
model = Aurora(use_lora=False)  # The pretrained version does not use LoRA.
model.load_checkpoint("microsoft/aurora", "aurora-0.25-pretrained.ckpt")

model.eval()
model = model.to("cuda")
model

Aurora(
  (encoder): Perceiver3DEncoder(
    (surf_mlp): MLP(
      (net): Sequential(
        (0): Linear(in_features=512, out_features=2048, bias=True)
        (1): GELU(approximate='none')
        (2): Linear(in_features=2048, out_features=512, bias=True)
        (3): Dropout(p=0.0, inplace=False)
      )
    )
    (surf_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
    (pos_embed): Linear(in_features=512, out_features=512, bias=True)
    (scale_embed): Linear(in_features=512, out_features=512, bias=True)
    (lead_time_embed): Linear(in_features=512, out_features=512, bias=True)
    (absolute_time_embed): Linear(in_features=512, out_features=512, bias=True)
    (atmos_levels_embed): Linear(in_features=512, out_features=512, bias=True)
    (surf_token_embeds): LevelPatchEmbed(
      (weights): ParameterDict(
          (10u): Parameter containing: [torch.cuda.FloatTensor of size 512x1x2x4x4 (cuda:0)]
          (10v): Parameter containing: [torch.cuda.FloatTensor of siz

In [5]:
with torch.inference_mode():
    for batch in test_dataloader:
        print("Input timestamp", batch.metadata.time)
        output = model(batch)
        print("Prediction timestamp", output.metadata.time)
        print(output)       

Input timestamp (datetime.datetime(2000, 1, 1, 0, 0), datetime.datetime(2000, 2, 1, 0, 0))
Prediction timestamp (datetime.datetime(2000, 1, 1, 6, 0), datetime.datetime(2000, 2, 1, 6, 0))
Batch(surf_vars={'2t': tensor([[[[23.9078, 33.9475, 27.5874,  ..., 35.4730, 28.8002, 25.5126],
          [ 3.1884, 13.2598,  9.4724,  ..., 14.6177, 10.9763, 28.4597],
          [10.7639, 15.9552, 23.3073,  ..., 17.5337, 25.3703, 20.0836],
          ...,
          [ 1.5668, 10.1005,  5.5000,  ..., 11.6880,  6.9773, 24.3605],
          [ 8.4951, 11.6789, 19.7208,  ..., 13.8539, 22.2126, 15.2720],
          [17.0880,  6.6258, 14.1729,  ...,  8.5999, 16.5563, 18.1276]]],


        [[[23.9355, 33.9960, 27.6492,  ..., 35.5579, 28.8837, 25.5606],
          [ 3.2196, 13.2924,  9.5364,  ..., 14.6789, 11.0515, 28.5593],
          [10.7540, 16.0114, 23.3586,  ..., 17.6239, 25.4440, 20.2195],
          ...,
          [ 1.5541, 10.0740,  5.4956,  ..., 11.6820,  7.0019, 24.4286],
          [ 8.4499, 11.6882, 19.7144

In [6]:
output.surf_vars["2t"].shape # t2m # THIS ONE WE NEED ! The temperature

torch.Size([2, 1, 160, 280])

In [7]:
with torch.inference_mode():
    for batch in test_dataloader:
        print("Input timestamp", batch.metadata.time)
        preds = [pred.to("cpu") for pred in rollout(model, batch, steps=2)]


print("Prediction timestamp", output.metadata.time)
print(output)

Input timestamp (datetime.datetime(2000, 1, 1, 0, 0), datetime.datetime(2000, 2, 1, 0, 0))


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 1 but got size 2 for tensor number 1 in the list.