In [6]:
import requests
import pandas as pd
import io
from pathlib import Path
from urllib.parse import quote

# Try the modern CADC TAP (Table Access Protocol) service
def query_cadc_tap(object_name):
    """Query CADC TAP service for Solar System Object observations."""
    
    # CADC TAP service endpoint
    tap_url = "https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/tap/sync"
    
    # ADQL query to find observations of the specified object
    query = f"""
    SELECT TOP 100
        o.observationID,
        o.instrument_name,
        o.target_name,
        o.observationURI,
        p.productID,
        p.uri as download_uri
    FROM caom2.Observation o
    JOIN caom2.Plane p ON o.obsID = p.obsID
    WHERE UPPER(o.target_name) LIKE UPPER('%{object_name}%')
    AND o.collection = 'CFHT'
    ORDER BY o.observationID
    """
    
    params = {
        'REQUEST': 'doQuery',
        'LANG': 'ADQL',
        'FORMAT': 'csv',
        'QUERY': query
    }
    
    print(f"Querying CADC TAP for object: {object_name}")
    print(f"TAP URL: {tap_url}")
    print("-" * 50)
    
    try:
        response = requests.get(tap_url, params=params, timeout=30)
        response.raise_for_status()
        
        print(f"Response status: {response.status_code}")
        print(f"Content-Type: {response.headers.get('Content-Type', 'Not specified')}")
        print("-" * 50)
        
        csv_data = response.text
        
        if not csv_data.strip():
            print("No data returned from TAP service")
            return None
        
        # Check if we got HTML error page
        if csv_data.strip().startswith('<!DOCTYPE') or csv_data.strip().startswith('<html'):
            print("ERROR: Received HTML response instead of CSV")
            print("First 500 characters:")
            print(csv_data[:500])
            return None
        
        print("Raw CSV response preview:")
        print(csv_data[:500])
        print("\n" + "="*50 + "\n")
        
        # Parse CSV with pandas
        try:
            df = pd.read_csv(
                io.StringIO(csv_data),
                on_bad_lines='skip',
                engine='python'
            )
            
            if df.empty:
                print("Query returned no results")
                return None
                
            print(f"Found {len(df)} records")
            print("\nColumn names:")
            print(list(df.columns))
            print("\nFirst few rows:")
            print(df.head())
            
            return df
            
        except Exception as parse_error:
            print(f"Error parsing CSV: {parse_error}")
            print("Raw response:")
            print(csv_data)
            return None
            
    except requests.exceptions.RequestException as e:
        print(f"Error making TAP request: {e}")
        return None

# Try alternative: Direct SSOIS search with different parameters
def try_ssois_direct(object_name):
    """Try direct SSOIS query with various parameter combinations."""
    
    endpoints_to_try = [
        {
            'url': 'https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/ssois/find',
            'params': {
                'object': object_name,
                'format': 'tsv',
                'limit': '100'
            }
        },
        {
            'url': 'https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/cadcbin/ssos/ssosclf.pl',
            'params': {
                'object': object_name,
                'search': 'bynameCADC',
                'format': 'tsv',
                'lang': 'en'
            }
        }
    ]
    
    for i, endpoint in enumerate(endpoints_to_try, 1):
        print(f"\nTrying SSOIS endpoint #{i}:")
        print(f"URL: {endpoint['url']}")
        print(f"Parameters: {endpoint['params']}")
        
        try:
            response = requests.get(endpoint['url'], params=endpoint['params'], timeout=15)
            print(f"Status: {response.status_code}")
            
            if response.status_code == 200:
                data = response.text
                
                # Check if it's HTML
                if data.strip().startswith('<!DOCTYPE') or data.strip().startswith('<html'):
                    print("Returned HTML (likely an error page)")
                    continue
                
                # Check if it's empty or error message
                if not data.strip() or 'No data found' in data or 'Error' in data:
                    print("No data found or error message returned")
                    continue
                
                print(f"Success! Got {len(data)} characters of data")
                print("Preview:")
                print(data[:300])
                return data
                
        except Exception as e:
            print(f"Error: {e}")
            continue
    
    return None

# Main execution
object_name = 'CERES'

print("=" * 60)
print("ATTEMPT 1: Using CADC TAP Service")
print("=" * 60)

df = query_cadc_tap(object_name)

if df is None:
    print("\n" + "=" * 60)
    print("ATTEMPT 2: Trying Direct SSOIS Endpoints")
    print("=" * 60)
    
    tsv_data = try_ssois_direct(object_name)
    
    if tsv_data:
        try:
            df = pd.read_csv(
                io.StringIO(tsv_data), 
                sep='\t',
                on_bad_lines='skip',
                quoting=3,
                engine='python'
            )
            print(f"\nSuccessfully parsed {len(df)} records from SSOIS")
            print("Columns:", list(df.columns))
            print("\nFirst few rows:")
            print(df.head())
        except Exception as e:
            print(f"Error parsing SSOIS data: {e}")
    else:
        print("All endpoints failed. The SSOIS service may be temporarily unavailable.")
        print("\nTroubleshooting suggestions:")
        print("1. Check if the object name is correct")
        print("2. Try a different object (e.g., 'VESTA', 'PALLAS')")
        print("3. Check CADC service status at https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/")

# If we have data, try to download a file
if 'df' in locals() and df is not None and not df.empty:
    print("\n" + "=" * 60)
    print("ATTEMPTING DOWNLOAD")
    print("=" * 60)
    
    # Look for download URL columns
    url_columns = [col for col in df.columns if 'uri' in col.lower() or 'url' in col.lower()]
    
    if url_columns:
        url_col = url_columns[0]
        if pd.notna(df.iloc[0][url_col]):
            download_url = df.iloc[0][url_col]
            print(f"Found download URL in column '{url_col}': {download_url}")
            
            try:
                # Download the file
                file_response = requests.get(download_url, timeout=30)
                file_response.raise_for_status()
                
                # Create filename
                filename = f"{object_name}_observation.fits"
                
                with open(filename, 'wb') as f:
                    f.write(file_response.content)
                    
                print(f"Successfully downloaded: {filename} ({len(file_response.content)} bytes)")
                
            except Exception as e:
                print(f"Download failed: {e}")
        else:
            print(f"No valid URL found in column '{url_col}'")
    else:
        print("No URL/URI columns found for downloading")
        print("Available columns:", list(df.columns))

ATTEMPT 1: Using CADC TAP Service
Querying CADC TAP for object: CERES
TAP URL: https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/tap/sync
--------------------------------------------------
Error making TAP request: 404 Client Error: Not Found for url: https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/tap/sync?REQUEST=doQuery&LANG=ADQL&FORMAT=csv&QUERY=%0A++++SELECT+TOP+100%0A++++++++o.observationID%2C%0A++++++++o.instrument_name%2C%0A++++++++o.target_name%2C%0A++++++++o.observationURI%2C%0A++++++++p.productID%2C%0A++++++++p.uri+as+download_uri%0A++++FROM+caom2.Observation+o%0A++++JOIN+caom2.Plane+p+ON+o.obsID+%3D+p.obsID%0A++++WHERE+UPPER%28o.target_name%29+LIKE+UPPER%28%27%25CERES%25%27%29%0A++++AND+o.collection+%3D+%27CFHT%27%0A++++ORDER+BY+o.observationID%0A++++

ATTEMPT 2: Trying Direct SSOIS Endpoints

Trying SSOIS endpoint #1:
URL: https://www.cadc-ccda.hia-iha.nrc-cnrc.gc.ca/ssois/find
Parameters: {'object': 'CERES', 'format': 'tsv', 'limit': '100'}
Error making TAP request: 404 Client Er

In [7]:
# Download sample images using the MetaData URLs
if 'df' in locals() and df is not None and not df.empty:
    print("=" * 60)
    print("DOWNLOADING SAMPLE IMAGES")
    print("=" * 60)
    
    # Use MetaData column which contains the actual file URLs
    if 'MetaData' in df.columns:
        # Download first 3 images as samples
        download_count = min(3, len(df))
        
        for i in range(download_count):
            try:
                metadata_url = df.iloc[i]['MetaData']
                if pd.notna(metadata_url):
                    print(f"\nDownloading image {i+1}/{download_count}")
                    print(f"URL: {metadata_url}")
                    
                    # Extract filename from URL
                    filename = metadata_url.split('/')[-1]
                    if not filename.endswith(('.fit', '.fits', '.fz')):
                        filename += '.fits'
                    
                    # Make the request
                    response = requests.get(metadata_url, timeout=60)
                    response.raise_for_status()
                    
                    # Save the file
                    with open(filename, 'wb') as f:
                        f.write(response.content)
                    
                    file_size = len(response.content)
                    print(f"Downloaded: {filename} ({file_size:,} bytes)")
                    
                    # Print some details about this observation
                    row = df.iloc[i]
                    print(f"  Image ID: {row['Image']}")
                    print(f"  MJD: {row['MJD']}")
                    print(f"  Filter: {row['Filter']}")
                    print(f"  Telescope: {row['Telescope/Instrument']}")
                else:
                    print(f"No URL found for row {i}")
                    
            except Exception as e:
                print(f"Failed to download image {i+1}: {e}")
        
        print(f"\nDownload complete! Check your current directory for the image files.")
        print(f"Total observations found: {len(df)}")
    else:
        print("No MetaData column found for downloads")
        print("Available columns:", list(df.columns))

DOWNLOADING SAMPLE IMAGES

Downloading image 1/3
URL: https://sbnarchive.psi.edu/pds3/neat/geodss/data/g19960613/obsdata/960613110912a.fit.fz
Downloaded: 960613110912a.fit.fz (19,036,800 bytes)
  Image ID: 960613110912a
  MJD: 50247.4647222222
  Filter: none
  Telescope: NEAT-GEODSS-Maui

Downloading image 2/3
URL: https://sbnarchive.psi.edu/pds3/neat/geodss/data/g19960613/obsdata/960613113742a.fit.fz
Downloaded: 960613110912a.fit.fz (19,036,800 bytes)
  Image ID: 960613110912a
  MJD: 50247.4647222222
  Filter: none
  Telescope: NEAT-GEODSS-Maui

Downloading image 2/3
URL: https://sbnarchive.psi.edu/pds3/neat/geodss/data/g19960613/obsdata/960613113742a.fit.fz
Downloaded: 960613113742a.fit.fz (18,982,080 bytes)
  Image ID: 960613113742a
  MJD: 50247.4845138889
  Filter: none
  Telescope: NEAT-GEODSS-Maui

Downloading image 3/3
URL: https://sbnarchive.psi.edu/pds3/neat/geodss/data/g19960613/obsdata/960613120926a.fit.fz
Downloaded: 960613113742a.fit.fz (18,982,080 bytes)
  Image ID: 96061