In [1]:
from ioncast import *
from torch.utils.data import DataLoader
import datetime

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Create sequence dataset
gim_dir = "/mnt/ionosphere-data/jpld/webdataset"
omni_dir = "/mnt/ionosphere-data/omniweb/cleaned"
celestrak_file = "/mnt/ionosphere-data/celestrak/kp_ap_processed_timeseries.csv"

date_start = datetime.datetime(2022, 1, 1)
date_end = datetime.datetime(2022, 7, 31)

gim_dataset = JPLDGIMDataset(
    data_dir=gim_dir,
    date_start=date_start,
    date_end=date_end,
)

omni_dataset = OMNIDataset(
    file_dir=omni_dir,
    date_start=date_start,
    date_end=date_end,
)

celestrak_dataset = CelestrakDataset(
    file_name=celestrak_file,
    date_start=date_start,
    date_end=date_end,
)

sequence_dataset = Sequences([gim_dataset, omni_dataset, celestrak_dataset], delta_minutes=15, sequence_length=10)


JPLD
Directory  : /mnt/ionosphere-data/jpld/webdataset
Loading tar files index from cache: /mnt/ionosphere-data/jpld/webdataset/tar_files_index
Start date : 2022-01-01 00:00:00
End date   : 2022-07-31 00:00:00
Delta      : 15 minutes
Loading dates from cache: /mnt/ionosphere-data/jpld/webdataset/dates_index_2022-01-01T00:00:00_2022-07-31T00:00:00
TEC maps total    : 20,256
TEC maps available: 20,256
TEC maps dropped  : 0

OMNIWeb dataset
File                 : /mnt/ionosphere-data/omniweb/cleaned/omni_5min_full_cleaned.csv
Rows                 : 2,103,840
Delta minutes        : 15
Normalize            : True
Rewind minutes       : 50
column:['B_mag', 'Bx_GSE', 'By_GSM', 'Bz_GSM', 'RMS_B_scalar', 'RMS_B_vector', 'V_flow', 'Vx', 'Vy', 'Vz', 'Density', 'Temp', 'SYM_D', 'SYM_H', 'ASY_D', 'ASY_H']
2103840


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.data['Datetime'] = pd.to_datetime(self.data['Datetime']) # this line wasnt present previously but is necessary if the col contains strings instead of pandas timestamps for the date.to_pydatetime() to run as expected


Start date           : 2022-01-01 00:00:00
End date             : 2022-07-31 00:00:00
Rows after processing: 57,684

Celestrak dataset
File                 : /mnt/ionosphere-data/celestrak/kp_ap_processed_timeseries.csv
Rows                 : 198,320
Delta minutes        : 15
Normalize            : True
Rewind minutes       : 180
column:['Kp', 'Ap']
2379829
Start date           : 2022-01-01 00:00:00
End date             : 2022-07-31 00:00:00
Rows after processing: 20,257

Sequences
Start date              : 2022-01-01 00:00:00
End date                : 2022-07-31 00:00:00
Delta                   : 15 minutes
Sequence length         : 10
Sequence duration       : 150 minutes
Number of sequences     : 19,196
First sequence          : ['2022-01-01T00:00:00', '2022-01-01T00:15:00', '2022-01-01T00:30:00', '2022-01-01T00:45:00', '2022-01-01T01:00:00', '2022-01-01T01:15:00', '2022-01-01T01:30:00', '2022-01-01T01:45:00', '2022-01-01T02:00:00', '2022-01-01T02:15:00']
Last sequence           : [

In [3]:
# Batch into 5 using a DataLoader
dataloader = DataLoader(
    sequence_dataset,
    batch_size=5,
)

In [4]:
from ioncast import stack_features

# Iterate through the DataLoader
for batch in dataloader:

    # print(batch)  # Print the first item in the batch

    stacked_batch = stack_features(batch)
    print(stacked_batch)  # Print the stacked features

    break

tensor([[[[-2.2730, -2.2730, -2.2730,  ..., -2.2730, -2.2730, -2.2730],
          [-2.1995, -2.1995, -2.1995,  ..., -2.1995, -2.1995, -2.1995],
          [-2.1995, -2.1995, -2.1995,  ..., -2.1995, -2.1995, -2.1995],
          ...,
          [ 0.3258,  0.3258,  0.3258,  ...,  0.3258,  0.3258,  0.3258],
          [ 0.2944,  0.2944,  0.2944,  ...,  0.2944,  0.2944,  0.2944],
          [ 0.2621,  0.2621,  0.2621,  ...,  0.2621,  0.2621,  0.2621]],

         [[ 0.5056,  0.5056,  0.5056,  ...,  0.5056,  0.5056,  0.5056],
          [ 0.5056,  0.5056,  0.5056,  ...,  0.5056,  0.5056,  0.5056],
          [ 0.5056,  0.5056,  0.5056,  ...,  0.5056,  0.5056,  0.5056],
          ...,
          [ 0.5056,  0.5056,  0.5056,  ...,  0.5056,  0.5056,  0.5056],
          [ 0.5056,  0.5056,  0.5056,  ...,  0.5056,  0.5056,  0.5056],
          [ 0.5056,  0.5056,  0.5056,  ...,  0.5056,  0.5056,  0.5056]],

         [[-1.3153, -1.3153, -1.3153,  ..., -1.3153, -1.3153, -1.3153],
          [-1.3153, -1.3153, -