In [1]:
import sys

sys.path.insert(0, '..')

In [9]:
import os
import numpy as np
import random
import torch
import torch.nn as nn
from torch.optim import AdamW
from torch.utils.data import DataLoader
from transformers import TimeSeriesTransformerConfig, TimeSeriesTransformerForPrediction, TimeSeriesTransformerModel
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import ReduceLROnPlateau
from scipy import stats

from core.dataset import MachoDataset
from core.trainer import PredictionTrainer, ClassificationTrainer
from core.model import ClassificationModel

In [3]:
random_seed = 42
torch.manual_seed(random_seed)
np.random.seed(random_seed)
random.seed(random_seed)
torch.backends.cudnn.deterministic = True

In [4]:
config = {
    'random_seed': random_seed,
    'data_root': '/home/mrizhko/AML/contra_periodic/data/macho/',
    'balanced_data_root': '/home/mrizhko/AML/AstroML/data/macho-balanced/',
    'weights_path': '/home/mrizhko/AML/AstroML/weights/',

    # Time Series Transformer
    'lags': None,  # ?
    'distribution_output': 'normal',
    'num_static_real_features': 0,  # if 0 we don't use real features
    'num_time_features': 1,
    'd_model': 256,
    'decoder_layers': 4,
    'encoder_layers': 4,
    'dropout': 0,
    'encoder_layerdrop': 0,
    'decoder_layerdrop': 0,
    'attention_dropout': 0,
    'activation_dropout': 0,

    # Data
    'window_length': 200,
    'prediction_length': 10,  # 1 5 10 25 50

    # Training
    'batch_size': 512,
    'lr': 0.0001,
    'weight_decay': 0,
    'epochs_pre_training': 1000,
    'epochs_fine_tuning': 100,
    
    # Learning Rate Scheduler
    'factor': 0.3,
    'patience': 10,

    'mode': 'pre-training',  # 'pre-training' 'fine-tuning' 'both'
    'save_weights': False,
    'config_from_run': None,  # 'MeriDK/AstroML/qtun67bq'
}

In [5]:
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
print('Using', device)

Using cuda:1


In [6]:
train_dataset = MachoDataset(config['balanced_data_root'], config['prediction_length'], mode='train')
train_dataloader = DataLoader(train_dataset, batch_size=config['batch_size'], shuffle=True)

In [7]:
transformer_config = TimeSeriesTransformerConfig(
    prediction_length=config['prediction_length'],
    context_length=config['window_length'] - config['prediction_length'] - 7,  # 7 is max(lags) for default lags
    num_time_features=config['num_time_features'],
    num_static_real_features=config['num_static_real_features'],
    encoder_layers=config['encoder_layers'],
    decoder_layers=config['decoder_layers'],
    d_model=config['d_model'],
    distribution_output='normal',
    scaling=None,
    dropout=config['dropout'],
    encoder_layerdrop=config['encoder_layerdrop'],
    decoder_layerdrop=config['decoder_layerdrop'],
    attention_dropout=config['attention_dropout'],
    activation_dropout=config['activation_dropout']
)

In [12]:
model = TimeSeriesTransformerForPrediction(transformer_config)
model = model.to(device)

optimizer = AdamW(model.parameters(), lr=config['lr'], weight_decay=config['weight_decay'])
scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=config['factor'], patience=config['patience'], verbose=True)

trainer = PredictionTrainer(model=model, optimizer=optimizer, scheduler=scheduler, device=device)

In [13]:
trainer.train(train_dataloader, train_dataloader, epochs=1000)

Epoch 0: Train Loss 18.0493 Val Loss 6.7345
Epoch 1: Train Loss 5.6621 Val Loss 4.7266
Epoch 2: Train Loss 4.8067 Val Loss 4.1574
Epoch 3: Train Loss 4.1338 Val Loss 3.8935
Epoch 4: Train Loss 3.7012 Val Loss 3.6397
Epoch 5: Train Loss 3.6048 Val Loss 3.5619
Epoch 6: Train Loss 3.5383 Val Loss 3.5117
Epoch 7: Train Loss 3.4992 Val Loss 3.474
Epoch 8: Train Loss 3.4729 Val Loss 3.4424
Epoch 9: Train Loss 3.4378 Val Loss 3.4245
Epoch 10: Train Loss 3.4192 Val Loss 3.4066
Epoch 11: Train Loss 3.4022 Val Loss 3.3945
Epoch 12: Train Loss 3.3885 Val Loss 3.3818
Epoch 13: Train Loss 3.4435 Val Loss 3.3707
Epoch 14: Train Loss 3.367 Val Loss 3.3612
Epoch 15: Train Loss 3.3595 Val Loss 3.3524
Epoch 16: Train Loss 3.3534 Val Loss 3.4153
Epoch 17: Train Loss 3.4032 Val Loss 3.3373
Epoch 18: Train Loss 3.5644 Val Loss 3.3321
Epoch 19: Train Loss 3.336 Val Loss 3.3362
Epoch 20: Train Loss 3.326 Val Loss 3.3205
Epoch 21: Train Loss 3.3207 Val Loss 3.3167
Epoch 22: Train Loss 3.3731 Val Loss 3.3126
E

In [17]:
val_dataset = MachoDataset(config['balanced_data_root'], config['prediction_length'], mode='val')
val_dataloader = DataLoader(val_dataset, batch_size=4, shuffle=False)

In [18]:
trainer.evaluate(val_dataloader, val_dataset)

100%|█████████████████████████████████████████| 412/412 [01:01<00:00,  6.70it/s]
100%|███████████████████████████████████████| 1648/1648 [00:40<00:00, 40.57it/s]

MASE: 0.8488476200236806 sMAPE: 0.8234540886405802





In [19]:
trainer.val_epoch(val_dataloader)

25.950208413088863

In [21]:
trainer.val_epoch(train_dataloader)

1.258371901512146