In [1]:
import torch
from datasets import load_dataset
from informer_models import InformerConfig, InformerForSequenceClassification
from connect_later.dataset_preprocess_raw import create_train_dataloader_raw, create_test_dataloader_raw



In [2]:
dataset = load_dataset("BrachioLab/supernova-timeseries")
dataset

DatasetDict({
    train: Dataset({
        features: ['objid', 'times_wv', 'target', 'label', 'redshift'],
        num_rows: 6274
    })
    validation: Dataset({
        features: ['objid', 'times_wv', 'target', 'label', 'redshift'],
        num_rows: 782
    })
    test: Dataset({
        features: ['objid', 'times_wv', 'target', 'label', 'redshift'],
        num_rows: 792
    })
})

In [3]:
train_dataset = dataset['train']
validation_dataset = dataset['validation']
test_dataset = dataset['test']

In [4]:
model = InformerForSequenceClassification.from_pretrained("BrachioLab/supernova-classification")

num labels: 14
Using Fourier PE
classifier dropout: 0.2


In [5]:
config = InformerConfig.from_pretrained("BrachioLab/supernova-classification")
test_dataloader = create_test_dataloader_raw(
    config=config,
    dataset=test_dataset,
    batch_size=256,
    compute_loss=True
)

original dataset size: 792
remove nans dataset size: 792


In [6]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model.to(device)
model.eval()
y_true = []
y_pred = []
for i, batch in enumerate(test_dataloader):
    print(f"processing batch {i}")
    batch = {k: v.to(device) for k, v in batch.items() if k != "objid"}
    with torch.no_grad():
        outputs = model(**batch)
    y_true.extend(batch['labels'].cpu().numpy())
    y_pred.extend(torch.argmax(outputs.logits, dim=2).squeeze().cpu().numpy())
print(f"accuracy: {sum([1 for i, j in zip(y_true, y_pred) if i == j]) / len(y_true)}")

processing batch 0
processing batch 1
processing batch 2
processing batch 3
accuracy: 0.8017676767676768
