### Libraries

In [None]:
from torch.utils.data import DataLoader, random_split

In [2]:
from data.preprocessing import *
from data.data_utils import *
from models.macro_architectures import *
from models.micro_architectures import *
from utils import *


# DATA

In [2]:
start_time = '20230101'
end_time = '20230830'
scrap_date = interval_time(start_time, end_time)

In [3]:
import_train(scrap_date)

Downloading https://www.ngdc.noaa.gov/dscovr/data/2023/09/oe_fc1_dscovr_s20230921000000_e20230921235959_p20230922035719_pub.nc.gz to data/compressed\fc1_20230921.nc.gz


100%|██████████| 377922/377922 [00:00<00:00, 483025.67it/s]


Downloading https://www.ngdc.noaa.gov/dscovr/data/2023/09/oe_mg1_dscovr_s20230921000000_e20230921235959_p20230922030934_pub.nc.gz to data/compressed\mg1_20230921.nc.gz


100%|██████████| 183610480/183610480 [03:16<00:00, 935089.18it/s] 


Downloading https://www.ngdc.noaa.gov/dscovr/data/2023/09/oe_fc1_dscovr_s20230922000000_e20230922235959_p20230924014806_pub.nc.gz to data/compressed\fc1_20230922.nc.gz


100%|██████████| 422672/422672 [00:01<00:00, 417331.95it/s]


Downloading https://www.ngdc.noaa.gov/dscovr/data/2023/09/oe_mg1_dscovr_s20230922000000_e20230922235959_p20230924010103_pub.nc.gz to data/compressed\mg1_20230922.nc.gz


100%|██████████| 183590176/183590176 [03:03<00:00, 999370.92it/s] 


In [None]:
months = list(set([day[:6] for day in scrap_date]))
import_Dst(months)

In [3]:
l1_sample, l2_sample = preprocessing()
dst, kp, ap = import_targets(scrap_date)

array([[-446.075     ,   47.2625    ,    8.3375    , ...,    0.73001426,
          17.871565  ,    9.731378  ],
       [-446.1       ,   46.433334  ,    8.433333  , ...,    0.6353522 ,
          15.668235  ,    6.660771  ],
       [-446.11252   ,   46.675003  ,    8.675     , ...,    0.5244114 ,
          12.89051   ,    7.266075  ],
       ...,
       [-449.5       ,    9.971429  ,  -15.785714  , ...,    8.531774  ,
          46.694794  ,  271.5274    ],
       [-450.91428   ,    9.928572  ,  -18.942858  , ...,    6.4501734 ,
          39.44411   ,  267.90884   ],
       [-453.02222   ,   11.711111  ,  -18.533333  , ...,    6.371899  ,
          38.000023  ,  267.42593   ]], dtype=float32)

# L1 (raw) data

## Hour based

In [None]:
l1_sample

## Minute based

In [None]:
l1_sample_hour = l1_sample.resample('60min').mean()
l1_sample_hour

# L2 (cleaned) data

## Minute based

In [None]:
l2_sample

## Hour based

In [None]:
l2_sample_hour = l2_sample.resample('60min').mean()
l2_sample_hour

# Dst data

In [None]:
dst

# Kp data

In [None]:
kp

# ap data

In [None]:
ap

# Datasets
### Descriptions:
**hn_dl**: hour normal dataloader

**mn_dl**: minute normal dataloader

**hr_dl**: minute normal dataloader

**mr_dl**: minute normal dataloader



In [None]:
device = get_default_device()

In [None]:
sequence_length_hour = 10  #hour
sequence_length_minute = 600 #minute
pred_length = 6 #hours

In [None]:
##Normal
hour_Normal_dataset = NormalTrainingDataset(l1_sample_hour, dst, kp, ap, sequence_length_hour, pred_length, hour = True)
minute_Normal_dataset = NormalTrainingDataset(l1_sample, dst, kp, ap, sequence_length_minute, pred_length, hour = False)
##Refined(new method)
hour_Refined_dataset = RefinedTrainingDataset(l1_sample_hour, l2_sample_hour, dst,kp,ap,sequence_length_hour, pred_length, hour = True)
minute_Refined_dataset = RefinedTrainingDataset(l1_sample, l2_sample, dst,kp,ap,sequence_length_minute, pred_length, hour = False)

In [None]:
#Test:15% training: 85%

test_size = round(0.15*len(hour_Normal_dataset))

train_hn_ds, test_hn_ds = random_split(hour_Normal_dataset , [len(hour_Normal_dataset) - test_size, test_size])

batch_size = 32  #Change based on GPU capacity

train_hn_dl = DataLoader(train_hn_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)
train_hn_dl = DeviceDataLoader(train_hn_dl, device)
test_hn_dl = DataLoader(test_hn_ds, batch_size*2, num_workers=4, pin_memory=True)
test_hn_dl = DeviceDataLoader(test_hn_dl, device)

In [None]:
#Test:15% training: 85%

test_size = round(0.15*len(minute_Normal_dataset))

train_mn_ds, test_mn_ds = random_split(minute_Normal_dataset , [len(minute_Normal_dataset) - test_size, test_size])

batch_size = 32  #Change based on GPU capacity

train_mn_dl = DataLoader(train_mn_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)
train_mn_dl = DeviceDataLoader(train_mn_dl, device)
test_mn_dl = DataLoader(test_mn_ds, batch_size*2, num_workers=4, pin_memory=True)
test_mn_dl = DeviceDataLoader(test_mn_dl, device)


In [None]:
#Test:15% training: 85%

test_size = round(0.15*len(hour_Refined_dataset))

train_hr_ds, test_hr_ds = random_split(hour_Refined_dataset , [len(hour_Refined_dataset) - test_size, test_size])

batch_size = 32  #Change based on GPU capacity

train_hr_dl = DataLoader(train_hr_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)
train_hr_dl = DeviceDataLoader(train_hr_dl, device)
test_hr_dl = DataLoader(test_hr_ds, batch_size*2, num_workers=4, pin_memory=True)
test_hr_dl = DeviceDataLoader(test_hr_dl, device)

In [None]:
#Test:15% training: 85%

test_size = round(0.15*len(minute_Refined_dataset))

train_mr_ds, test_mr_ds = random_split(minute_Refined_dataset , [len(minute_Refined_dataset) - test_size, test_size])

batch_size = 32  #Change based on GPU capacity

train_mr_dl = DataLoader(train_mr_ds, batch_size, shuffle=True, num_workers=4, pin_memory=True)
train_mr_dl = DeviceDataLoader(train_mr_dl, device)
test_mr_dl = DataLoader(test_mr_ds, batch_size*2, num_workers=4, pin_memory=True)
test_mr_dl = DeviceDataLoader(test_mr_dl, device)

# Models

## Refined models

## Normal models