In [4]:
# Install required packages for the project
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
%pip install scikit-learn pandas numpy matplotlib seaborn umap-learn
%pip install captum plotly astropy tqdm
%pip install --upgrade --force-reinstall numpy pandas

# Optional: Install ztfquery for real ZTF data (requires IRSA account)
# pip install ztfquery

print("✅ All packages installed successfully!")

Looking in indexes: https://download.pytorch.org/whl/cpu
Collecting numpy
  Downloading numpy-2.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (62 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.1/62.1 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pandas
  Downloading pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting python-dateutil>=2.8.2 (from pandas)
  Downloading python_dateutil-2.9.0.post0-py2.py3-none-any.whl.metadata (8.4 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting six>=1.5 (from python-dateutil>=2.8.2->pandas)
  Downloading six-1.17.0-py2.py3-none-any.whl.metadat

✅ All packages installed successfully!


In [None]:
import os
import requests
import pandas as pd
from io import StringIO
from dotenv import load_dotenv

load_dotenv()

USER = os.getenv("IRSA_USER")
PASS = os.getenv("IRSA_PASS")


# Step 1: Search for available ZTF images
def search_ztf_images(field=None, ra=None, dec=None):
    """Search for ZTF images"""
    search_url = "https://irsa.ipac.caltech.edu/ibe/search/ztf/products/sci"

    params = {'ct': 'csv'}

    # Build WHERE clause based on search criteria
    where_clauses = []
    if field:
        where_clauses.append(f"field={field}")
    if ra and dec:
        where_clauses.append(f"POS={ra},{dec}")

    if where_clauses:
        params['WHERE'] = " AND ".join(where_clauses)

    response = requests.get(search_url, params=params, auth=(USER, PASS))

    if response.status_code == 200:
        # Parse CSV response
        df = pd.read_csv(StringIO(response.text))
        return df
    else:
        print(f"Search failed: {response.status_code}")
        print(response.text)
        return None

# Step 2: Download a specific file
def download_ztf_file(file_path, output_filename):
    """Download a ZTF file using the file path from search results"""
    base_url = "https://irsa.ipac.caltech.edu/ibe/data/ztf/products/sci"
    full_url = f"{base_url}/{file_path}"

    print(f"Downloading: {full_url}")

    response = requests.get(full_url, auth=(USER, PASS))

    if response.status_code == 200:
        with open(output_filename, 'wb') as f:
            f.write(response.content)
        print(f"Successfully downloaded: {output_filename}")
        return True
    else:
        print(f"Download failed: {response.status_code}")
        print(response.text[:200])
        return False

# Example usage
print("Searching for ZTF images...")
results = search_ztf_images(field=570)  # Search for field 570

if results is not None and len(results) > 0:
    print(f"Found {len(results)} images")
    print("\nFirst few results:")
    print(results.head())

    # Download the first image
    if 'filefracday' in results.columns:
        first_file = results.iloc[0]
        # Construct the file path from the metadata
        # This depends on the actual column names in the results
        print("\nAttempting to download first image...")
        # You'll need to construct the path based on the actual column names
        print(first_file)
else:
    print("No results found")


ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject