In [24]:
import pandas as pd
import numpy as np
from datasets import load_dataset
import openai
from openai import OpenAI
import time
from tqdm import tqdm

import importlib
import sys; sys.path.append("../src")
import supernova
importlib.reload(supernova)
from supernova import SupernovaExample, get_llm_generated_answer, isolate_individual_features, distill_relevant_features, calculate_expert_alignment_score, query_openai

In [2]:
# 0 microlens-single (mu-Lens-Single)
# 1 tidal disruption event (TDE)
# 2 eclipsing binary (EB)
# 3 type II supernova (SNII)
# 4 peculiar type Ia supernova (SNIax)
# 5 Mira variable
# 6 type Ibc supernova (SNIbc)
# 7 kilonova (KN)
# 8 M-dwarf
# 9 peculiar type Ia supernova (SNIa-91bg)
# 10 active galactic nuclei (AGN)
# 11 type Ia supernova (SNIa)
# 12 RR-Lyrae (RRL)
# 13 superluminous supernova (SLSN-I)

### Load Supernova Data

In [25]:
supernova_data = load_dataset("BrachioLab/supernova")
supernova_data = supernova_data['test'].to_pandas()
supernova_data = supernova_data.sample(3, random_state=11).reset_index(drop=True)

Using the latest cached version of the dataset since BrachioLab/supernova couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'default' at /home/runai-home/.cache/huggingface/datasets/BrachioLab___supernova/default/0.0.0/ef38ac4811eae862b65367d7f431e7c65a6e03d6 (last modified on Thu May  8 05:19:06 2025).


In [14]:
supernova_examples = []
for idx, row in tqdm(supernova_data.iterrows(), total=supernova_data.shape[0], desc="Processing Rows"):
    time_series_data = row['data']
    llm_label, explanation = get_llm_generated_answer(time_series_data)
    if llm_label is None:
        continue
    supernova_examples.append(SupernovaExample(
        file=row['filename'],
        time_series_data=time_series_data,
        ground_truth=row['label'],
        llm_label=llm_label,
        llm_explanation=explanation
    ))

Processing Rows: 100% 107/107 [02:47<00:00,  1.56s/it]


In [15]:
supernova_examples[0].llm_explanation

'The observed pattern shows periodic variations in flux across multiple wavelengths, characteristic of an eclipsing binary system. The repeated dips in flux suggest the presence of two stars passing in front of each other, causing dimming and brightening at regular intervals. The relatively stable flux levels between eclipses support this classification. The multi-wavelength coverage further indicates variable light output consistent with this kind of binary system.'

### Stage 1: Atomic claim extraction

In [16]:
for example in supernova_examples:
    claims = isolate_individual_features(example.llm_explanation)
    if claims is None:
        continue
    example.claims = [claim.strip() for claim in claims]

In [18]:
supernova_examples[0].claims

['The observed pattern shows periodic variations in flux.',
 'The periodic variations occur across multiple wavelengths.',
 'Periodic variations in flux are characteristic of an eclipsing binary system.',
 'The repeated dips in flux suggest two stars passing in front of each other.',
 'The passing of stars causes dimming and brightening at regular intervals.',
 'Relatively stable flux levels occur between eclipses.',
 'Stable flux levels between eclipses support the classification as an eclipsing binary.',
 'Multi-wavelength coverage indicates variable light output.',
 'Variable light output is consistent with an eclipsing binary system.']

### Stage 2: Distill relevant claims

In [19]:
for example in supernova_examples:
    relevant_claims = distill_relevant_features(example.time_series_data, example.llm_label, example.claims)
    example.relevant_claims = relevant_claims

In [20]:
supernova_examples[0].relevant_claims

['The observed pattern shows periodic variations in flux.',
 'The periodic variations occur across multiple wavelengths.',
 'Periodic variations in flux are characteristic of an eclipsing binary system.',
 'The repeated dips in flux suggest two stars passing in front of each other.',
 'The passing of stars causes dimming and brightening at regular intervals.',
 'Relatively stable flux levels occur between eclipses.',
 'Stable flux levels between eclipses support the classification as an eclipsing binary.',
 'Variable light output is consistent with an eclipsing binary system.']

### Stage 3: Calculate alignment scores

In [21]:
for example in supernova_examples:
    alignment_scores = []
    alignment_categories = []
    reasonings = []
    for claim in tqdm(example.relevant_claims):
        category, alignment_score, reasoning = calculate_expert_alignment_score(claim)
        if category is None:
            continue
        alignment_scores.append(alignment_score)
        alignment_categories.append(category)
    example.alignment_scores = alignment_scores
    example.alignment_categories = alignment_categories
    example.final_alignment = np.mean(alignment_scores)

100% 8/8 [00:49<00:00,  6.24s/it]
100% 8/8 [00:31<00:00,  3.90s/it]
100% 8/8 [00:50<00:00,  6.31s/it]
100% 7/7 [00:37<00:00,  5.37s/it]
100% 6/6 [00:27<00:00,  4.55s/it]
100% 7/7 [00:33<00:00,  4.73s/it]
100% 6/6 [00:35<00:00,  6.00s/it]
100% 6/6 [00:30<00:00,  5.01s/it]
100% 7/7 [00:35<00:00,  5.04s/it]
100% 3/3 [00:20<00:00,  6.80s/it]
100% 6/6 [00:27<00:00,  4.55s/it]
100% 5/5 [00:19<00:00,  3.94s/it]
100% 8/8 [00:44<00:00,  5.61s/it]
100% 8/8 [00:28<00:00,  3.57s/it]
100% 7/7 [00:25<00:00,  3.63s/it]
100% 5/5 [00:20<00:00,  4.02s/it]
100% 5/5 [00:27<00:00,  5.44s/it]
100% 6/6 [00:24<00:00,  4.04s/it]
100% 4/4 [00:10<00:00,  2.72s/it]
100% 7/7 [00:32<00:00,  4.65s/it]
100% 6/6 [00:29<00:00,  4.88s/it]
100% 7/7 [00:34<00:00,  4.99s/it]
100% 6/6 [00:23<00:00,  3.91s/it]
100% 8/8 [00:36<00:00,  4.61s/it]
100% 6/6 [00:25<00:00,  4.18s/it]
100% 5/5 [00:09<00:00,  1.91s/it]
100% 7/7 [00:30<00:00,  4.36s/it]
100% 9/9 [00:54<00:00,  6.08s/it]
100% 5/5 [00:23<00:00,  4.68s/it]
100% 5/5 [00:1

In [22]:
supernova_examples[0].alignment_scores

[0.9, 0.8, 1.0, 1.0, 1.0, 0.9, 0.9, 1.0]