In [1]:
# Install astroquery if needed
!pip install astroquery astropy --quiet

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.1/11.1 MB[0m [31m67.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m44.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [6]:
from astroquery.mast import Observations
from astropy.io import fits
import pandas as pd
import numpy as np
import os

In [7]:
# Step 1: Search for a JWST exoplanet target
print("\n🔍 Searching JWST observations for JWST...")
obs = Observations.query_criteria(
    obs_collection="JWST",
    dataproduct_type="spectrum"
)
print(f"Found {len(obs)} observations")


🔍 Searching JWST observations for JWST...
Found 347237 observations


In [8]:
# Step 2: Choose the first observation and list products
if len(obs) > 0:
    print("\n📦 Fetching products for first observation...")
    products = Observations.get_product_list(obs[0])
    print(products[['productFilename', 'productType', 'size']][:5])
else:
    print("\n⚠️ No observations found to fetch products.")


📦 Fetching products for first observation...
                 productFilename                   productType   size  
-------------------------------------------------- ----------- --------
           jw01571081001_02201_00004_nis_uncal.jpg     PREVIEW   857777
          jw01571081001_02201_00004_nis_uncal.fits     SCIENCE 58766400
    jw01571081001_02201_00004_nis_trapsfilled.fits   AUXILIARY 50362560
jw01571-c1001_20250605t213559_spec2_00021_asn.json        INFO     1972
                  jw01571_20250605t213559_pool.csv        INFO   498392


In [23]:
from astroquery.mast import Observations

# Filter for valid FITS products only
fits_products = Observations.filter_products(products, productSubGroupDescription="UNCAL", extension='fits')

# Sanity check
print(f"🔍 Found {len(fits_products)} FITS files")

# ✅ Ensure all required fields are present
required_columns = ['obs_id', 'dataURI']
if all(col in fits_products.colnames for col in required_columns):
    try:
        # Safely download only the first file
        manifest = Observations.download_products(fits_products[:1], mrp_only=False)
    except Exception as e:
        print(f"❌ Download failed: {e}")
else:
    print("❌ Required fields not present in the product list.")


🔍 Found 12 FITS files
Downloading URL https://mast.stsci.edu/api/v0.1/Download/file?uri=mast:JWST/product/jw01571081001_02201_00004_nis_uncal.fits to ./mastDownload/JWST/jw01571081001_02201_00004_nis/jw01571081001_02201_00004_nis_uncal.fits ... [Done]


In [24]:
# Step 3: Download one spectrum product (usually a FITS file)
print("\n⬇️ Downloading product...")
# Convert products to a pandas DataFrame for easier filtering
products_df = products.to_pandas()

# Find the first FITS product and download it
fits_products = products_df[products_df['productFilename'].str.lower().str.endswith('.fits')]

# Display fits_products and its data types for debugging
print("\n🔍 Inspecting fits_products DataFrame:")
display(fits_products)
print("\n📊 Data types of fits_products DataFrame:")
display(fits_products.dtypes)

if len(fits_products) > 0:
    manifest = Observations.download_products(fits_products[:1], mrp_only=False)
else:
    print("\n⚠️ No .fits products found to download.")
    manifest = None # Set manifest to None if no fits files are found


⬇️ Downloading product...

🔍 Inspecting fits_products DataFrame:


Unnamed: 0,obsID,obs_collection,dataproduct_type,obs_id,description,type,dataURI,productType,productGroupDescription,productSubGroupDescription,productDocumentationURL,project,prvversion,proposal_id,productFilename,size,parent_obsid,dataRights,calib_level,filters
1,107910709,JWST,image,jw01571081001_02201_00004_nis,exposure (L1b): Uncalibrated 4D exposure data,S,mast:JWST/product/jw01571081001_02201_00004_ni...,SCIENCE,,UNCAL,,CALJWST,,1571,jw01571081001_02201_00004_nis_uncal.fits,58766400,248672982,PUBLIC,1,GR150R;F150W
2,107910709,JWST,spectrum,jw01571081001_02201_00004_nis,exposure (L2a): charge trap product for persis...,S,mast:JWST/product/jw01571081001_02201_00004_ni...,AUXILIARY,,TRAPSFILLED,,CALJWST,1.18.0,1571,jw01571081001_02201_00004_nis_trapsfilled.fits,50362560,248672982,PUBLIC,2,GR150R;F150W
10,107910709,JWST,spectrum,jw01571081001_02201_00004_nis,exposure (L2b): 2D calibrated exposure average...,S,mast:JWST/product/jw01571081001_02201_00004_ni...,SCIENCE,,CAL,,CALJWST,1.18.0,1571,jw01571081001_02201_00004_nis_cal.fits,17297280,248672982,PUBLIC,2,GR150R;F150W
11,107910709,JWST,spectrum,jw01571081001_02201_00004_nis,exposure (L2a): 2D count rate averaged over in...,S,mast:JWST/product/jw01571081001_02201_00004_ni...,SCIENCE,,RATE,,CALJWST,1.18.0,1571,jw01571081001_02201_00004_nis_rate.fits,83960640,248672982,PUBLIC,2,GR150R;F150W
12,107910709,JWST,spectrum,jw01571081001_02201_00004_nis,exposure (L2a): 3D countrate per integration,S,mast:JWST/product/jw01571081001_02201_00004_ni...,SCIENCE,,RATEINTS,,CALJWST,1.18.0,1571,jw01571081001_02201_00004_nis_rateints.fits,83952000,248672982,PUBLIC,2,GR150R;F150W
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
164,107910963,JWST,spectrum,jw01571081001_02201_00005_nis,exposure (L2b): 2D calibrated exposure average...,S,mast:JWST/product/jw01571081001_02201_00005_ni...,SCIENCE,,CAL,,CALJWST,1.18.0,1571,jw01571081001_02201_00005_nis_cal.fits,17173440,248672982,PUBLIC,2,GR150R;F150W
165,107910963,JWST,spectrum,jw01571081001_02201_00005_nis,exposure (L2a): 2D count rate averaged over in...,S,mast:JWST/product/jw01571081001_02201_00005_ni...,SCIENCE,,RATE,,CALJWST,1.18.0,1571,jw01571081001_02201_00005_nis_rate.fits,83960640,248672982,PUBLIC,2,GR150R;F150W
166,107910963,JWST,spectrum,jw01571081001_02201_00005_nis,exposure (L2a): 3D countrate per integration,S,mast:JWST/product/jw01571081001_02201_00005_ni...,SCIENCE,,RATEINTS,,CALJWST,1.18.0,1571,jw01571081001_02201_00005_nis_rateints.fits,83952000,248672982,PUBLIC,2,GR150R;F150W
167,107910963,JWST,spectrum,jw01571081001_02201_00005_nis,exposure/target (L2b/L3): 1D extracted spectrum,S,mast:JWST/product/jw01571081001_02201_00005_ni...,SCIENCE,,X1D,,CALJWST,1.18.0,1571,jw01571081001_02201_00005_nis_x1d.fits,3781440,248672982,PUBLIC,2,GR150R;F150W



📊 Data types of fits_products DataFrame:


Unnamed: 0,0
obsID,object
obs_collection,object
dataproduct_type,object
obs_id,object
description,object
type,object
dataURI,object
productType,object
productGroupDescription,object
productSubGroupDescription,object


RemoteServiceError: Error converting data type varchar to bigint.

In [35]:
# Step 4: Parse the FITS file
print("\n📖 Reading FITS file...")
file_path = manifest['Local Path'][0]
hdul = fits.open(file_path)
hdul.info()

# Try to find a valid HDU with table data
data = None
for i, hdu in enumerate(hdul):
    if isinstance(hdu, fits.BinTableHDU) or isinstance(hdu, fits.TableHDU):
        data = hdu.data
        if data is not None and hasattr(data, 'columns'):
            print(f"✅ Found tabular data in HDU[{i}] with columns: {data.columns.names}")
            break

# Validate extracted data
if data is None or not hasattr(data, 'columns'):
    print("❌ No tabular data found in any HDU.")
    hdul.close()
    raise ValueError("FITS file does not contain usable table data.")

column_names = data.columns.names

# Try flexible column detection
try:
    if 'WAVELENGTH' in column_names:
        wavelength = data['WAVELENGTH']
    elif 'lambda' in column_names:
        wavelength = data['lambda']
    else:
        raise KeyError("No 'WAVELENGTH' or 'lambda' column found.")

    if 'FLUX' in column_names:
        flux = data['FLUX']
    elif 'flux' in column_names:
        flux = data['flux']
    else:
        raise KeyError("No 'FLUX' or 'flux' column found.")
except Exception as e:
    print("❌ Could not auto-detect wavelength/flux columns:", e)
    hdul.close()
    raise



📖 Reading FITS file...
Filename: ./mastDownload/JWST/jw01571081001_02201_00004_nis/jw01571081001_02201_00004_nis_uncal.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU     172   ()      
  1  SCI           1 ImageHDU        65   (2048, 2048, 7, 1)   int16 (rescales to uint16)   
  2  GROUP         1 BinTableHDU     38   7R x 13C   [J, I, I, J, I, 26A, I, I, I, I, 36A, D, D]   
  3  INT_TIMES     1 BinTableHDU     24   1R x 7C   [J, D, D, D, D, D, D]   
  4  ASDF          1 BinTableHDU     11   1R x 1C   [7322B]   
✅ Found tabular data in HDU[2] with columns: ['integration_number', 'group_number', 'end_day', 'end_milliseconds', 'end_submilliseconds', 'group_end_time', 'number_of_columns', 'number_of_rows', 'number_of_gaps', 'completion_code_number', 'completion_code_text', 'bary_end_time', 'helio_end_time']
❌ Could not auto-detect wavelength/flux columns: "No 'WAVELENGTH' or 'lambda' column found."


KeyError: "No 'WAVELENGTH' or 'lambda' column found."

In [27]:
# Convert manifest['URL'] to a list and print first 5 entries
url_list = [str(url) for url in manifest['URL']]
print("\n🔍 Sample URLs from manifest:")
for url in url_list[:5]:
    print(url)



🔍 Sample URLs from manifest:
None


In [36]:
# Step 4: Parse the FITS file
print("\n📖 Reading FITS file...")
file_path = manifest['Local Path'][0]
hdul = fits.open(file_path)
hdul.info()

# Try to extract wavelength and flux from common extensions
try:
    data = hdul[1].data
    wavelength = data['WAVELENGTH'] if 'WAVELENGTH' in data.columns.names else data['lambda']
    flux = data['FLUX'] if 'FLUX' in data.columns.names else data['flux']
except Exception as e:
    print("Could not auto-detect wavelength/flux columns:", e)
    hdul.close()
    raise


📖 Reading FITS file...
Filename: ./mastDownload/JWST/jw01571081001_02201_00004_nis/jw01571081001_02201_00004_nis_uncal.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  PRIMARY       1 PrimaryHDU     172   ()      
  1  SCI           1 ImageHDU        65   (2048, 2048, 7, 1)   int16 (rescales to uint16)   
  2  GROUP         1 BinTableHDU     38   7R x 13C   [J, I, I, J, I, 26A, I, I, I, I, 36A, D, D]   
  3  INT_TIMES     1 BinTableHDU     24   1R x 7C   [J, D, D, D, D, D, D]   
  4  ASDF          1 BinTableHDU     11   1R x 1C   [7322B]   
Could not auto-detect wavelength/flux columns: 'numpy.ndarray' object has no attribute 'columns'


AttributeError: 'numpy.ndarray' object has no attribute 'columns'

In [37]:
# Try to extract wavelength and flux from common extensions
try:
    data = hdul[1].data
    column_names = data.dtype.names
    print("📊 Column names available:", column_names)

    # Flexible naming check
    wavelength = data['WAVELENGTH'] if 'WAVELENGTH' in column_names else data['lambda']
    flux = data['FLUX'] if 'FLUX' in column_names else data['flux']

except Exception as e:
    print("❌ Could not auto-detect wavelength/flux columns:", e)
    hdul.close()
    raise


📊 Column names available: None
❌ Could not auto-detect wavelength/flux columns: argument of type 'NoneType' is not iterable


TypeError: argument of type 'NoneType' is not iterable

In [38]:
# Step 5: Normalize and format data for ExoHabit pipeline
print("\n🧪 Normalizing data...")
flux_norm = (flux - np.min(flux)) / (np.max(flux) - np.min(flux))

# Add dummy environmental parameters (for now)
pressure = np.random.uniform(0.1, 2.0, size=len(flux))
temperature = np.random.uniform(150, 600, size=len(flux))
methane = np.random.uniform(0, 0.01, size=len(flux))
co2 = np.random.uniform(0, 0.01, size=len(flux))
water = np.random.uniform(0, 0.01, size=len(flux))



🧪 Normalizing data...


NameError: name 'flux' is not defined

In [39]:
# Create CSV dataframe
print("\n📁 Creating CSV file for ExoHabit...")
df = pd.DataFrame({
    'wavelength': wavelength,
    'intensity': flux_norm,
    'pressure': pressure,
    'temperature': temperature,
    'methane': methane,
    'co2': co2,
    'water': water
})

csv_path = "wasp39b_spectrogram_sample.csv"
df.to_csv(csv_path, index=False)
print(f"✅ CSV saved: {csv_path}")

hdul.close()



📁 Creating CSV file for ExoHabit...


NameError: name 'wavelength' is not defined