In [1]:
import requests                         # for HTTP requests to fetch data from IRSA API
import pandas as pd                     # for handling tabular data
from astropy.coordinates import SkyCoord  # for converting RA/Dec into degrees
import astropy.units as u               # for specifying units
from io import StringIO                 # to treat text response as a file (for pandas)

# ==========================================================
# Function to query ZTF (Zwicky Transient Facility) lightcurves
# ==========================================================
def get_ztf_lightcurve(ra_deg, dec_deg, radius_arcsec=1.2, band="r", fmt="csv"):
    """
    Query the ZTF IRSA API for lightcurves around a given RA, Dec position.

    Parameters:
        ra_deg (float)  → Right Ascension in degrees
        dec_deg (float) → Declination in degrees
        radius_arcsec   → Search radius around target (arcseconds)
        band (str)      → Photometric band to fetch ('g', 'r', 'i')
        fmt (str)       → Response format (default: CSV)
    """
    base_url = "https://irsa.ipac.caltech.edu/cgi-bin/ZTF/nph_light_curves"
    
    # Convert radius from arcsec → degrees
    radius_deg = radius_arcsec / 3600.0
    
    # API query parameters
    params = {
        "POS": f"CIRCLE {ra_deg} {dec_deg} {radius_deg}",
        "BANDNAME": band,
        "FORMAT": fmt.lower()
    }
    
    # Fetch data
    response = requests.get(base_url, params=params)
    response.raise_for_status()
    
    # Return as pandas DataFrame
    return pd.read_csv(StringIO(response.text), comment="#")

# ==========================================================
# Step 1: Load catalogue of objects
# ==========================================================
catalogue = pd.read_csv("magnetic_cataclysmic_variables.txt")
# Assumption: catalogue has columns → "Name", "RA(J2000)", "DEC(J2000)"

all_lightcurves = []   # container for results

# ==========================================================
# Step 2: Loop through catalogue and fetch lightcurves
# ==========================================================
for _, row in catalogue.iterrows():   # limit to first 5 for testing
    name = row["Name"]
    ra_str, dec_str = row["RA(J2000)"], row["DEC(J2000)"]
    
    try:
        # Convert RA/Dec to decimal degrees
        coord = SkyCoord(ra_str, dec_str, unit=(u.hourangle, u.deg))
        ra_deg, dec_deg = coord.ra.deg, coord.dec.deg
        
        print(f"Querying ZTF for {name} at RA={ra_deg:.4f}, Dec={dec_deg:.4f}")
        
        # Get r-band lightcurve
        df = get_ztf_lightcurve(ra_deg, dec_deg, radius_arcsec=1.2, band="r")
        
        if not df.empty:
            df["object"] = name
            all_lightcurves.append(df)
            print(f"  ✅ Got {len(df)} points for {name}")
        else:
            print(f"  ⚠ No data returned for {name}")
    
    except Exception as e:
        print(f"  ❌ Failed for {name}: {e}")

# ==========================================================
# Step 3: Save raw merged results
# ==========================================================
if all_lightcurves:
    merged = pd.concat(all_lightcurves, ignore_index=True)
    merged.to_csv("lightcurves_fetched_from_CV_catalogue.csv", index=False)
    
    print(f"\n✅ Saved r-band lightcurves for {len(catalogue)} stars "
          f"({len(merged)} total points)")
    
    # ==========================================================
    # Step 4: Group by object into compact form (CSV-friendly)
    # ==========================================================
    data = pd.read_csv("lightcurves_fetched_from_CV_catalogue.csv")
    grouped = data.groupby("object", sort=False, as_index=False).agg(list)
    
    # Convert lists → single-line strings (joined by ;)
    for col in grouped.columns:
        if col != "object":
            grouped[col] = grouped[col].apply(lambda x: ";".join(map(str, x)))
    
    grouped.to_csv("lightcurves_grouped_r_clean.csv", index=False)
    print("✅ Grouped output saved to 'lightcurves_grouped_r_clean.csv'")
    
else:
    print("\n❌ No lightcurves fetched.")



Querying ZTF for V479 And at RA=4.7371, Dec=34.9122
  ✅ Got 2112 points for V479 And
Querying ZTF for V1309 Ori at RA=78.9225, Dec=1.0778
  ✅ Got 852 points for V1309 Ori
Querying ZTF for AI Tri at RA=30.9525, Dec=29.9906
  ✅ Got 1009 points for AI Tri
Querying ZTF for J0649-0737 at RA=102.4617, Dec=-7.6281
  ⚠ No data returned for J0649-0737
Querying ZTF for MQ Dra at RA=238.3796, Dec=55.2708
  ✅ Got 1387 points for MQ Dra
Querying ZTF for J2048+0050 at RA=312.1162, Dec=0.8358
  ✅ Got 476 points for J2048+0050
Querying ZTF for 0922+1333 at RA=141.2333, Dec=13.3481
  ⚠ No data returned for 0922+1333
Querying ZTF for VY For at RA=53.0192, Dec=-25.9489
  ✅ Got 175 points for VY For
Querying ZTF for J0227+1306 at RA=36.8871, Dec=13.1047
  ✅ Got 708 points for J0227+1306
Querying ZTF for J1740-2418 at RA=265.2037, Dec=-24.3094
  ⚠ No data returned for J1740-2418
Querying ZTF for J1424-0227 at RA=216.1625, Dec=-2.4608
  ⚠ No data returned for J1424-0227
Querying ZTF for QQ Vul at RA=301.424



  ✅ Got 504 points for J1446+0253
Querying ZTF for V2467 Cyg at RA=307.0521, Dec=41.8100
  ✅ Got 1120 points for V2467 Cyg
Querying ZTF for J1654-1916 at RA=253.6821, Dec=-19.2753
  ✅ Got 744 points for J1654-1916
Querying ZTF for V373 Sct at RA=283.8612, Dec=-7.7183
  ⚠ No data returned for V373 Sct
Querying ZTF for LD 317 at RA=356.2392, Dec=43.5228
  ⚠ No data returned for LD 317
Querying ZTF for AO Psc at RA=343.8250, Dec=-3.1778
  ✅ Got 576 points for AO Psc
Querying ZTF for VZ Sex at RA=146.1321, Dec=3.9681
  ✅ Got 699 points for VZ Sex
Querying ZTF for V647 Aur at RA=99.1358, Dec=35.5953
  ✅ Got 1254 points for V647 Aur
Querying ZTF for J1926+1322 at RA=291.6125, Dec=13.3681
  ✅ Got 1396 points for J1926+1322
Querying ZTF for J2014+1529 at RA=303.6037, Dec=15.4917
  ✅ Got 1419 points for J2014+1529
Querying ZTF for LS Cam at RA=89.3500, Dec=72.6978
  ✅ Got 1061 points for LS Cam
Querying ZTF for V2400 Oph at RA=258.1517, Dec=-24.2458
  ⚠ No data returned for V2400 Oph
Querying Z

In [2]:
#for 26 feature extractions 
import pandas as pd
import light_curve as lc
import numpy as np

# Load the file
df = pd.read_csv("lightcurves_grouped_r_clean.csv")

print("✅ DataFrame loaded with shape:", df.shape)
print(df.head())

✅ DataFrame loaded with shape: (140, 25)
       object                                                oid  \
0    V479 And  648209100001736;648209100001736;64820910000173...   
1   V1309 Ori  457201400006865;457201400006865;45720140000686...   
2      AI Tri  652203400004823;652203400004823;65220340000482...   
3      MQ Dra  794210400014259;794210400014259;79421040001425...   
4  J2048+0050  440215100004767;440215100004767;44021510000476...   

                                               expid  \
0  51444796;51744365;51943052;52046682;52047856;5...   
1  58448195;59047807;59648598;59944752;60244373;6...   
2  52848159;53446642;54048247;54342804;54642347;5...   
3  44436270;44438748;44440531;44442988;44447700;4...   
4  49347447;50046093;50341848;50346002;52046075;5...   

                                                 hjd  \
0  2458268.9448206285;2458271.9407102177;2458273....   
1  2458338.979216509;2458344.9758162564;2458350.9...   
2  2458282.9776469944;2458288.9628736307;2458

In [3]:
# Function to parse semicolon-separated arrays
def parse_array(x):
    return np.array([float(v) for v in str(x).split(";") if v != ""])


In [4]:
# Define extractor with all features
extractor = lc.Extractor(
    lc.Mean(),
    lc.WeightedMean(),
    lc.StandardDeviation(),
    lc.Median(),
    lc.Amplitude(),
    lc.BeyondNStd(nstd=1),
    lc.Cusum(),
    lc.InterPercentileRange(0.10),
    lc.Kurtosis(),
    lc.LinearTrend(),
    lc.LinearFit(),   # slope, slope_sigma, reduced_chi2
    lc.MagnitudePercentageRatio(0.4, .05),
    lc.MagnitudePercentageRatio(0.2, 0.1),
    lc.MaximumSlope(),
    lc.MedianAbsoluteDeviation(),
    lc.MedianBufferRangePercentage(0.10),
    lc.PercentAmplitude(),
    lc.MeanVariance(),
    lc.AndersonDarlingNormal(),
    lc.ReducedChi2(),
    lc.Skew(),
    lc.StetsonK()
)


In [8]:
# Collect results
results = []
for _, row in df.iterrows():
    try:
        t = parse_array(row["hjd"])
        m = parse_array(row["mag"])
        err = parse_array(row["magerr"])

        feats = extractor(t, m, err, sorted=True, check=False)
        results.append([row["oid"]] + list(feats))
    except Exception as e:
        print(f"⚠️ Skipping {row['oid']} due to error: {e}")

# Create DataFrame with proper headers
features_df = pd.DataFrame(results, columns=["oid"] + extractor.names)

# Save to CSV and Excel
features_df.to_csv("lightcurve_features_26.csv", index=False)
#features_df.to_excel("lightcurve_features_26.xlsx", index=False)
#features_df.to_parquet("lightcurve_features_26.parquet", index=False)
print("✅ Feature extraction complete! Saved to:")
print("   - lightcurve_features_26.csv")
#print("   - lightcurve_features_26.xlsx")
#print("   - lightcurve_features_26.parquet")


⚠️ Skipping 440202300015150;440202300015150;440202300015150 due to error: time-series' length 3 is smaller than the minimum required length 4
✅ Feature extraction complete! Saved to:
   - lightcurve_features_26.csv
