In [1]:
import pandas as pd
import numpy as np
from datasets import load_dataset
import openai
from openai import OpenAI
import time
from tqdm import tqdm

import importlib
import sys; sys.path.append("../src")
import supernova
importlib.reload(supernova)
from supernova import SupernovaExample, get_llm_generated_answer, isolate_individual_features, distill_relevant_features, calculate_expert_alignment_score, parse_measurement_string, format_time_series_for_prompt, query_openai

### Load Supernova Data

In [2]:
supernova_data = load_dataset("BrachioLab/supernova")
supernova_data = supernova_data['test'].to_pandas()
supernova_data = supernova_data.sample(3, random_state=11).reset_index(drop=True)

In [3]:
supernova_examples = []
for idx, row in tqdm(supernova_data.iterrows(), total=supernova_data.shape[0], desc="Processing Rows"):
    time_series_string = row['data']
    time_series_dict = parse_measurement_string(time_series_string)
    llm_label, explanation = get_llm_generated_answer(time_series_dict)
    if llm_label is None:
        continue
    supernova_examples.append(SupernovaExample(
        time_series_text=time_series_string,
        time_series_data=time_series_dict,
        ground_truth=row['label'],
        llm_label=llm_label,
        llm_explanation=explanation
    ))

Processing Rows: 100% 3/3 [00:09<00:00,  3.01s/it]


In [4]:
supernova_examples[0].llm_explanation

'The time series data with recordings over a broad interval from 8590.9 to 59726.4248 suggests variability over significant timescales, characteristic of AGN which often show such long-term variability due to their dynamic central regions. Additionally, the fluctuation between positive and negative values is indicative of the complex processes in AGN rather than more transient or singular events like supernovae.'

### Stage 1: Atomic claim extraction

In [5]:
for example in supernova_examples:
    claims = isolate_individual_features(example.llm_explanation)
    if claims is None:
        continue
    example.claims = [claim.strip() for claim in claims]

In [6]:
supernova_examples[0].claims

['The time series data includes recordings over a broad interval from 8590.9 to 59726.4248.',
 'The data suggests variability over significant timescales.',
 'Long-term variability is characteristic of AGN.',
 'AGN often show long-term variability due to their dynamic central regions.',
 'The fluctuation between positive and negative values is observed in the data.',
 'Fluctuation between positive and negative values is indicative of complex processes in AGN.',
 'Complex processes in AGN differ from more transient or singular events like supernovae.']

### Stage 2: Distill relevant claims

In [7]:
for example in supernova_examples:
    relevant_claims = distill_relevant_features(example)
    example.relevant_claims = relevant_claims

100% 7/7 [00:26<00:00,  3.73s/it]
100% 7/7 [00:20<00:00,  2.99s/it]
100% 5/5 [00:17<00:00,  3.59s/it]


In [8]:
supernova_examples[0].relevant_claims

['The data suggests variability over significant timescales.',
 'Long-term variability is characteristic of AGN.',
 'AGN often show long-term variability due to their dynamic central regions.',
 'The fluctuation between positive and negative values is observed in the data.',
 'Fluctuation between positive and negative values is indicative of complex processes in AGN.']

### Stage 3: Calculate alignment scores

In [9]:
for example in supernova_examples:
    alignment_scores = []
    alignment_categories = []
    reasonings = []
    for claim in tqdm(example.relevant_claims):
        category, alignment_score, reasoning = calculate_expert_alignment_score(claim)
        if category is None:
            continue
        alignment_scores.append(alignment_score)
        alignment_categories.append(category)
    example.alignment_scores = alignment_scores
    example.alignment_categories = alignment_categories
    example.final_alignment = np.mean(alignment_scores)

100% 5/5 [00:19<00:00,  3.88s/it]
100% 1/1 [00:03<00:00,  3.68s/it]
100% 5/5 [00:16<00:00,  3.37s/it]


In [10]:
supernova_examples[0].alignment_scores

[0.7, 1.0, 0.9, 0.5, 0.5]

In [11]:
supernova_examples[0].alignment_categories

['Total event duration, measured from first detection to return to baseline, distinguishes short‑lived kilonovae and superluminous SNe from longer plateau or AGN variability phases.',
 'Total event duration, measured from first detection to return to baseline, distinguishes short‑lived kilonovae and superluminous SNe from longer plateau or AGN variability phases.',
 'Total event duration, measured from first detection to return to baseline, distinguishes short‑lived kilonovae and superluminous SNe from longer plateau or AGN variability phases.',
 'Periodic light curves with stable periods and distinctive Fourier amplitude‑ and phase‑ratios (e.g., φ21, φ31) flag pulsators and eclipsing binaries rather than one‑off transients.',
 'Periodic light curves with stable periods and distinctive Fourier amplitude‑ and phase‑ratios (e.g., φ21, φ31) flag pulsators and eclipsing binaries rather than one‑off transients.']

In [15]:
supernova_examples[0].relevant_claims

['The data suggests variability over significant timescales.',
 'Long-term variability is characteristic of AGN.',
 'AGN often show long-term variability due to their dynamic central regions.',
 'The fluctuation between positive and negative values is observed in the data.',
 'Fluctuation between positive and negative values is indicative of complex processes in AGN.']