In [1]:
import sys

sys.path.insert(0, '..')

In [2]:
import os
import numpy as np
import random
import torch
import torch.nn as nn
from torch.optim import AdamW
from torch.utils.data import DataLoader
from transformers import TimeSeriesTransformerConfig, TimeSeriesTransformerForPrediction, TimeSeriesTransformerModel
import matplotlib.pyplot as plt
from torch.optim.lr_scheduler import ReduceLROnPlateau
from scipy import stats

from core.dataset import MachoDataset
from core.trainer import PredictionTrainer, ClassificationTrainer
from core.model import ClassificationModel

In [4]:
random_seed = 42
torch.manual_seed(random_seed)
np.random.seed(random_seed)
random.seed(random_seed)
torch.backends.cudnn.deterministic = True

In [5]:
config = {
    'random_seed': random_seed,
    'data_root': '/home/mrizhko/AML/contra_periodic/data/macho/',
    'balanced_data_root': '/home/mrizhko/AML/AstroML/data/macho-balanced/',
    'weights_path': '/home/mrizhko/AML/AstroML/weights/',

    # Time Series Transformer
    'lags': None,  # ?
    'distribution_output': 'normal',
    'num_static_real_features': 0,  # if 0 we don't use real features
    'num_time_features': 1,
    'd_model': 256,
    'decoder_layers': 4,
    'encoder_layers': 4,
    'dropout': 0,
    'encoder_layerdrop': 0,
    'decoder_layerdrop': 0,
    'attention_dropout': 0,
    'activation_dropout': 0,

    # Data
    'window_length': 200,
    'prediction_length': 10,  # 1 5 10 25 50

    # Training
    'batch_size': 512,
    'lr': 0.0001,
    'weight_decay': 0,
    'epochs_pre_training': 1000,
    'epochs_fine_tuning': 100,
    
    # Learning Rate Scheduler
    'factor': 0.3,
    'patience': 10,

    'mode': 'fine-tuning',  # 'pre-training' 'fine-tuning' 'both'
    'save_weights': False,
    'config_from_run': None,  # 'MeriDK/AstroML/qtun67bq'
}

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using', device)

In [7]:
train_dataset = MachoDataset(config['balanced_data_root'], config['prediction_length'], mode='train')
val_dataset = MachoDataset(config['balanced_data_root'], config['prediction_length'], mode='val')
test_dataset = MachoDataset(config['balanced_data_root'], config['prediction_length'], mode='test')

In [8]:
len(train_dataset), len(val_dataset), len(test_dataset)

In [10]:
len(train_dataset) // 8, len(val_dataset) // 8, len(test_dataset) // 8

In [11]:
train_dataset = MachoDataset(config['data_root'], config['prediction_length'], mode='train')
val_dataset = MachoDataset(config['data_root'], config['prediction_length'], mode='val')
test_dataset = MachoDataset(config['data_root'], config['prediction_length'], mode='test')

In [12]:
len(train_dataset), len(val_dataset), len(test_dataset)

In [13]:
len(train_dataset) + len(val_dataset) + len(test_dataset)

In [18]:
classes = {}

for el in train_dataset:
    cl = el[-1].item()
    classes[cl] = classes.get(cl, 0) + 1

print(classes)

In [19]:
classes = {}

for el in val_dataset:
    cl = el[-1].item()
    classes[cl] = classes.get(cl, 0) + 1

print(classes)

In [20]:
classes = {}

for el in test_dataset:
    cl = el[-1].item()
    classes[cl] = classes.get(cl, 0) + 1

print(classes)