In [3]:
from ioncast import *
from torch.utils.data import DataLoader

In [2]:
# Create sequence dataset
gim_dir = "/mnt/ionosphere-data/jpld/webdataset"

gim_dataset = JPLDGIMDataset(
    data_dir=gim_dir,
    date_start=None,
    date_end=None,
)

sequence_dataset = Sequences([gim_dataset], delta_minutes=30, sequence_length=10)


JPLD
Directory  : /mnt/ionosphere-data/jpld/webdataset
Loading tar files index from cache: /mnt/ionosphere-data/jpld/webdataset/tar_files_index
Start date : 2010-05-13 00:00:00
End date   : 2024-07-31 23:45:00
Delta      : 15 minutes
Loading dates from cache: /mnt/ionosphere-data/jpld/webdataset/dates_index_2010-05-13T00:00:00_2024-07-31T23:45:00
TEC maps total    : 498,623
TEC maps available: 498,481
TEC maps dropped  : 142

Sequences
Start date              : 2010-05-13 00:00:00
End date                : 2024-07-31 23:45:00
Delta                   : 30 minutes
Sequence length         : 10
Sequence duration       : 300 minutes
Number of sequences     : 249,232
First sequence          : ['2010-05-13T00:00:00', '2010-05-13T00:30:00', '2010-05-13T01:00:00', '2010-05-13T01:30:00', '2010-05-13T02:00:00', '2010-05-13T02:30:00', '2010-05-13T03:00:00', '2010-05-13T03:30:00', '2010-05-13T04:00:00', '2010-05-13T04:30:00']
Last sequence           : ['2024-07-31T19:00:00', '2024-07-31T19:30:00',

In [4]:
# Batch into 5 using a DataLoader
dataloader = DataLoader(
    sequence_dataset,
    batch_size=5,
)

In [7]:
# Iterate through the DataLoader
for sequence_batch in dataloader:
    n_img_datasets = 1
    image_datasets = sequence_batch[:n_img_datasets]  # torch.Size(torch.Size([B, T, C_i, H, W])
    global_param_datasets = sequence_batch[n_img_datasets:-1]  # torch.Size(torch.Size([B, T, F_i])
    timestamps = sequence_batch[-1]  # List[Tuple] -> [T, B]
    
    # Process the datasets as needed
    print(f"Image datasets shape: {image_datasets}")
    print(f"Timestamps: {timestamps}")

    break

Image datasets shape: [tensor([[[[[-0.1833, -0.1833, -0.1833,  ..., -0.1833, -0.1833, -0.1833],
           [-0.1593, -0.1593, -0.1593,  ..., -0.1593, -0.1593, -0.1593],
           [-0.1475, -0.1475, -0.1475,  ..., -0.1475, -0.1475, -0.1475],
           ...,
           [-1.6913, -1.6913, -1.6913,  ..., -1.6913, -1.6913, -1.6913],
           [-1.7863, -1.7863, -1.7863,  ..., -1.7863, -1.7863, -1.7863],
           [-1.8369, -1.8369, -1.8369,  ..., -1.8369, -1.8369, -1.8369]]],


         [[[-0.2202, -0.2202, -0.2202,  ..., -0.2202, -0.2202, -0.2202],
           [-0.2078, -0.2078, -0.2078,  ..., -0.2078, -0.2078, -0.2078],
           [-0.1955, -0.1955, -0.1955,  ..., -0.1955, -0.1955, -0.1955],
           ...,
           [-2.0654, -2.0654, -2.0654,  ..., -2.0654, -2.0654, -2.0654],
           [-2.1995, -2.1995, -2.1995,  ..., -2.1995, -2.1995, -2.1995],
           [-2.3515, -2.3515, -2.3515,  ..., -2.3515, -2.3515, -2.3515]]],


         [[[-0.2202, -0.2202, -0.2202,  ..., -0.2202, -0.2202

In [None]:
print(len(timestamps))
print(len(timestamps[0]))
print(timestamps[0][0])  # Example of accessing the first timestamp
print(type(timestamps[0][0]))  # Should be a datetime object

10
5
2010-05-13T00:00:00
<class 'str'>


In [24]:
# Constants
H, W = 32, 32  # Arbitrary height/width

# Day-in-month for non-leap year
day_in_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]

def parse_fast(ts_str):
    year = int(ts_str[0:4])
    month = int(ts_str[5:7])
    day = int(ts_str[8:10])
    hour = int(ts_str[11:13])
    minute = int(ts_str[14:16])
    tod = (hour * 60 + minute) / (60 * 24)
    doy = (sum(day_in_month[:month - 1]) + day + hour / 24) / 365
    return tod, doy

In [29]:
# Step 1: Transpose to shape (B, T)
timestamps_TB = list(zip(*timestamps))  # Now shape (B, T)

# Step 2: Apply parse_fast and flatten to shape (B, 2*T)
features = []
for ts_list in timestamps_TB:  # For each sample (length T)
    sample_features = []
    for ts in ts_list:
        tod, doy = parse_fast(ts)

        print(f"Parsed time: {ts} -> TOD: {tod}, DOY: {doy}")

        sin_tod = np.sin(2 * np.pi * tod)
        cos_tod = np.cos(2 * np.pi * tod)
        sin_doy = np.sin(2 * np.pi * doy)
        cos_doy = np.cos(2 * np.pi * doy)

        sample_features.extend([sin_tod, cos_tod, sin_doy, cos_doy])  # Append all

        print(f"Features for {ts}: {sample_features}")
    features.append(sample_features)  # Each is (4*T,)

# Step 3: Convert to array: (B, 4*T)
features_array = np.array(features)  # shape (B, 4*T)

# Step 4: Expand to (B, 4*T, H, W)
# First add singleton dims, then broadcast
features_array = features_array[:, :, None, None]  # (B, 4*T, 1, 1)
features_broadcasted = np.broadcast_to(features_array, (features_array.shape[0], features_array.shape[1], H, W))

Parsed time: 2010-05-13T00:00:00 -> TOD: 0.0, DOY: 0.3643835616438356
Features for 2010-05-13T00:00:00: [np.float64(0.0), np.float64(1.0), np.float64(0.7526668275320084), np.float64(-0.6584015846980488)]
Parsed time: 2010-05-13T00:30:00 -> TOD: 0.020833333333333332, DOY: 0.3643835616438356
Features for 2010-05-13T00:30:00: [np.float64(0.0), np.float64(1.0), np.float64(0.7526668275320084), np.float64(-0.6584015846980488), np.float64(0.13052619222005157), np.float64(0.9914448613738104), np.float64(0.7526668275320084), np.float64(-0.6584015846980488)]
Parsed time: 2010-05-13T01:00:00 -> TOD: 0.041666666666666664, DOY: 0.3644977168949771
Features for 2010-05-13T01:00:00: [np.float64(0.0), np.float64(1.0), np.float64(0.7526668275320084), np.float64(-0.6584015846980488), np.float64(0.13052619222005157), np.float64(0.9914448613738104), np.float64(0.7526668275320084), np.float64(-0.6584015846980488), np.float64(0.25881904510252074), np.float64(0.9659258262890683), np.float64(0.7521943897673482

In [32]:
print(features_broadcasted.shape)  # (B, 4*T, H, W)

print(features_broadcasted[:, :, 0:3, 0:3])  # Example of accessing the first feature

(5, 40, 32, 32)
[[[[ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
   [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]
   [ 0.00000000e+00  0.00000000e+00  0.00000000e+00]]

  [[ 1.00000000e+00  1.00000000e+00  1.00000000e+00]
   [ 1.00000000e+00  1.00000000e+00  1.00000000e+00]
   [ 1.00000000e+00  1.00000000e+00  1.00000000e+00]]

  [[ 7.52666828e-01  7.52666828e-01  7.52666828e-01]
   [ 7.52666828e-01  7.52666828e-01  7.52666828e-01]
   [ 7.52666828e-01  7.52666828e-01  7.52666828e-01]]

  ...

  [[ 3.82683432e-01  3.82683432e-01  3.82683432e-01]
   [ 3.82683432e-01  3.82683432e-01  3.82683432e-01]
   [ 3.82683432e-01  3.82683432e-01  3.82683432e-01]]

  [[ 7.50774756e-01  7.50774756e-01  7.50774756e-01]
   [ 7.50774756e-01  7.50774756e-01  7.50774756e-01]
   [ 7.50774756e-01  7.50774756e-01  7.50774756e-01]]

  [[-6.60558299e-01 -6.60558299e-01 -6.60558299e-01]
   [-6.60558299e-01 -6.60558299e-01 -6.60558299e-01]
   [-6.60558299e-01 -6.60558299e-01 -6.60558299e-01]]]


 [[[ 1.30