## SoLEXS Light Curve Processing Pipeline

Author - Prakhar Singh

Affiliation: Aryabhatta Research Institute of Observational Sciences (ARIES), Nainital 

Email: prakhar@aries.res.in 

---
##### Purpose - This notebook provides a fully automated pipeline to process SoLEXS light curve data from multiple datasets.

**Overview:**  
- All SoLEXS datasets (ZIP files) are stored in a single folder each corresponding to a day's observations.
- Extracts ZIPs if needed  
- Finds `.lc.gz` files  
- Plots LC (log-scale, UTC time)  
- Saves all plots in `SoLEXS_Lightcurve_plots` as `DD-MM-YYYY_SDDx_SoLEXS.png`  


In [None]:
import os
import glob
import gzip
import zipfile
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from datetime import datetime, timezone
from astropy.io import fits

# --- Base directory where all SoLEXS data ZIPs are stored ---
base_dir = "/Users/prakhar/HelioWork/solexs_pipleine/"

# --- Folder to save all plots ---
all_plots_dir = os.path.join(base_dir, "SoLEXS_Lightcurve_plots")
os.makedirs(all_plots_dir, exist_ok=True)


def unzip_file(zip_path):
    extract_dir = zip_path.replace(".zip", "")
    if not os.path.exists(extract_dir):
        with zipfile.ZipFile(zip_path, 'r') as zf:
            zf.extractall(extract_dir)
        print(f"✔ Extracted: {os.path.basename(zip_path)}")
    else:
        print(f"→ Already extracted: {os.path.basename(zip_path)}")
    return extract_dir


def plot_lightcurve(lc_filename, save_dir=all_plots_dir):
    with gzip.open(lc_filename, 'rb') as f:
        with fits.open(f) as lc_hdul:
            plot_times_UNIX = lc_hdul[1].data['TIME']
            plot_lcurve = lc_hdul[1].data['COUNTS']

            plot_times_UTC = [datetime.fromtimestamp(t, tz=timezone.utc) for t in plot_times_UNIX]

            fig, ax = plt.subplots(figsize=(12, 6))
            ax.plot(plot_times_UTC, plot_lcurve, label="SoLEXS Light Curve")
            ax.set_yscale('log')
            ax.set_xlabel('Time [UTC]', fontsize=12)
            ax.set_ylabel('Counts', fontsize=12)
            ax.tick_params(axis="both", which="major", labelsize=10)

            obs_date = datetime.fromisoformat(lc_hdul[1].header['DATE-OBS'].strip())
            ax.set_title(f"{obs_date.strftime('%d %B %Y')}", fontsize=12, weight="bold")
            ax.legend(frameon=False, fontsize=10)
            ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M', tz=timezone.utc))
            plt.tight_layout()

            sdd = os.path.basename(lc_filename).split("_SDD")[1].split("_")[0]
            date_str = obs_date.strftime('%d-%m-%Y') #%d%b%Y
            outname = os.path.join(save_dir, f"{date_str}_SDD{sdd}_SoLEXS.png")
            plt.savefig(outname, dpi=300)
            plt.close(fig)
            

def process_solexs_data(base_dir):
    zip_files = glob.glob(os.path.join(base_dir, "*.zip"))
    if not zip_files:
        print("⚠ No ZIP files found. Exiting.")
        return

    print(f"Processing {len(zip_files)} ZIP file(s)...")
    for zf in zip_files:
        extracted_dir = unzip_file(zf)
        lc_files = glob.glob(os.path.join(extracted_dir, "**/*.lc.gz"), recursive=True)
        for lc_file in lc_files:
            plot_lightcurve(lc_file)

    print(" All datasets processed successfully!")

process_solexs_data(base_dir)


Processing 3 ZIP file(s)...
→ Already extracted: AL1_SLX_L1_20240930_v1.1.zip
→ Already extracted: AL1_SLX_L1_20240912_v1.1.zip
→ Already extracted: AL1_SLX_L1_20241001_v1.0.zip
 All datasets processed successfully!
