In [None]:
import pandas as pd
import lightkurve as lk
import os
import warnings
from concurrent.futures import ThreadPoolExecutor, as_completed

# Suppress noisy warnings
warnings.filterwarnings("ignore")

# ================================
# Config
# ================================
INPUT_FILE = "tess_toi_full.csv"
OUTPUT_DIR = "tess_lightcurves"
os.makedirs(OUTPUT_DIR, exist_ok=True)

MAX_WORKERS = 4   # number of parallel downloads
LIMIT = 20        # for testing (increase later)

# Authors to try
AUTHORS = ["SPOC", "QLP", "TASOC"]

# ================================
# Load TIC IDs
# ================================
df = pd.read_csv(INPUT_FILE)
tic_ids = df["tid"].dropna().astype(str).unique()
tic_ids = tic_ids[:LIMIT]
print(f"Downloading {len(tic_ids)} TIC IDs (LIMIT={LIMIT}).")

# ================================
# Download Function
# ================================
def download_tic(tic_id):
    target = f"TIC {tic_id}"
    try:
        lc_collection = None
        used_author = None

        # Try each author until one returns results
        for author in AUTHORS:
            search_result = lk.search_lightcurve(target, mission="TESS", author=author)
            if len(search_result) > 0:
                lc_collection = search_result.download_all()
                used_author = author
                break

        if lc_collection is None or len(lc_collection) == 0:
            return f"{target}: No light curves found (all authors tried)."

        # Save all available sectors
        for lc in lc_collection:
            lc = lc.remove_nans().normalize()
            df_lc = lc.to_pandas()

            if "flux" not in df_lc.columns:
                continue

            cols = ["time", "flux"]
            if "flux_err" in df_lc.columns:
                cols.append("flux_err")

            df_lc = df_lc[cols]
            if len(df_lc) == 0:
                continue

            sector = getattr(lc, "sector", "unknown")
            csv_path = os.path.join(OUTPUT_DIR, f"TIC{tic_id}_sector{sector}_{used_author}.csv")
            df_lc.to_csv(csv_path, index=False)

        return f"{target}: OK ({len(lc_collection)} sectors, author={used_author})"

    except Exception as e:
        return f"{target}: FAILED ({e})"

# ================================
# Parallel Execution
# ================================
results = []
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
    futures = {executor.submit(download_tic, tic): tic for tic in tic_ids}
    for i, f in enumerate(as_completed(futures), 1):
        result = f.result()
        print(f"[{i}/{len(futures)}] {result}")
        results.append(result)

# ================================
# Save Log
# ================================
log_file = os.path.join(OUTPUT_DIR, "download_log.txt")
with open(log_file, "w") as f:
    for r in results:
        f.write(r + "\n")

print(f"✅ Finished. Log saved to {log_file}")
    



Downloading 20 TIC IDs (LIMIT=20).
[1/20] TIC 106997505: FAILED ("['time'] not in index")
[2/20] TIC 124709665: FAILED ("['time'] not in index")
[3/20] TIC 88863718: FAILED ("['time'] not in index")
[6/20] TIC 169904935: FAILED ("['time'] not in index")
[9/20] TIC 65212867: FAILED (Error in reading Data product C:\Users\Tilak Devi\.lightkurve\cache\mastDownload\TESS\tess2021014023720-s0034-0000000065212867-0204-s\tess2021014023720-s0034-0000000065212867-0204-s_lc.fits of type TessLightCurve .
This file may be corrupt due to an interrupted download. Please remove it from your disk and try again.)
