In [1]:
import numpy
## There are a number of relatively unimportant warnings that 
## show up, so for now, suppress them:
import warnings
warnings.filterwarnings("ignore")

## For simple astropy tables
import astropy, io, requests

## For handling ordinary astropy Tables
from astropy.table import Table

## For handling VO table type objects
from astropy.io import votable as apvot

# Use the astroquery TapPlus library.
from astroquery.utils.tap.core import TapPlus

## Use NAVO utility for Registry and Cone searches
import sys
sys.path.append("../aas_workshop_2018_summer/navo_utils")
from navo_utils.registry import Registry
from navo_utils.cone import Cone
from navo_utils.tap import Tap


Just to start, get coordinates and service URL:

In [2]:
import astropy.coordinates as coord
coord=coord.SkyCoord.from_name("m51")
print(coord)
services=Registry.query(service_type='cone',source='heasarc%zcat')
services

<SkyCoord (ICRS): (ra, dec) in deg
    (202.469575, 47.1952583)>


waveband,short_name,ivoid,res_description,access_url,reference_url,publisher,service_type
str23,str9,str28,str24,str77,str57,str17,str10
optical,ABELLZCAT,ivo://nasa.heasarc/abellzcat,No Description Available,https://heasarc.gsfc.nasa.gov/cgi-bin/vo/cone/coneGet.pl?table=abellzcat&amp;,https://heasarc.gsfc.nasa.gov/W3Browse/all/abellzcat.html,NASA/GSFC HEASARC,conesearch
gamma-ray#optical#x-ray,ROMABZCAT,ivo://nasa.heasarc/romabzcat,No Description Available,https://heasarc.gsfc.nasa.gov/cgi-bin/vo/cone/coneGet.pl?table=romabzcat&amp;,https://heasarc.gsfc.nasa.gov/W3Browse/all/romabzcat.html,NASA/GSFC HEASARC,conesearch
optical,CFAZ,ivo://nasa.heasarc/zcat,No Description Available,https://heasarc.gsfc.nasa.gov/cgi-bin/vo/cone/coneGet.pl?table=zcat&amp;,https://heasarc.gsfc.nasa.gov/W3Browse/all/zcat.html,NASA/GSFC HEASARC,conesearch


Example with our Cone module

In [3]:
## Different tries seem to come back in different order!  Want CFAZ.  
table=Cone.query(service=services[2],coords=coord,radius=1)
table[0]

name,ra,dec,bmag,radial_velocity,radial_velocity_error,redshift,class,Search_Offset
Unnamed: 0_level_1,deg,deg,Unnamed: 3_level_1,km / s,km / s,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
str10,float64,float64,float32,int32,int32,float64,int32,float64
13267+4631,202.2065,46.2585,15.10,0,0,--,9999,57.2374
N5173,202.10529,46.59082,14.12,2467,26,--,6300,39.2217
N5198,202.54745,46.66996,13.30,2569,25,--,6300,31.679
N5169,202.04237,46.67154,14.70,2482,40,--,6200,35.9691
I4263,202.13883,46.92671,15.40,2663,25,--,6200,21.0327
N5194,202.46823,47.19815,9.03,474,23,--,6200,0.1819
N5195,202.49491,47.26792,10.94,558,23,--,6200,4.4802
1325+4754,201.87771,47.64124,--,18107,179,--,9999,35.9641
1325+4754,201.95234,47.64138,--,18296,11,--,9999,34.021


With the TAP, you can refine the search based on any other attribute in the given catalog.  

Now, we saw above with the results of the Registry query for cone services that the HEASARC lists every catalog as a separate cone service.  So we can find it with a search for source='heasarc%zcat', where the "%" is a wildcard.  But for TAP services, HEASARC lists in the Registry one TAP service that can access many catalogs:

How do we know which we want if we did not know it was called "public.zcat"?  You can also use TapPlus to get the names of all of the catalogs a given service provides.  This could be done in a loop over *all* the Registry results for table services if you don't know which service provides the one you want.  

Then after you've selected the *public.zcat* table, you want to know what columns you can query:  

### Trying to use TapPlus with HEASARC or Chandra's services fails to get the columns of the right table:


In [11]:
## Use the Registry to find all TAP services and select one, which CXC
tap_services=Registry.query(service_type='table',source='heasarc')
heasarc_url=tap_services[0]['access_url']
print(heasarc_url)

## Then get all the tables served by this service:
heasarc = TapPlus(url=heasarc_url)
tables = heasarc.load_tables()
for table in (tables):
    print(table.get_qualified_name())

## Then try to find the columns
#heasarc_table = heasarc.load_table('zcat')
heasarc_table = heasarc.load_table('public.zcat')
for column in (heasarc_table.get_columns()):
    print(column.get_name())


https://heasarc.gsfc.nasa.gov/xamin/vo/tap
Created TAP+ (v1.0.1) - Connection:
	Host: heasarc.gsfc.nasa.gov
	Use HTTPS: True
	Port: 443
	SSL Port: 443
Retrieving tables...
Parsing tables...
Done.
TAP_SCHEMA.TAP_SCHEMA.columns
TAP_SCHEMA.TAP_SCHEMA.key_columns
TAP_SCHEMA.TAP_SCHEMA.keys
TAP_SCHEMA.TAP_SCHEMA.schemas
TAP_SCHEMA.TAP_SCHEMA.tables
public.a1
public.a1point
public.a2lcpoint
public.a2lcscan
public.a2led
public.a2pic
public.a2point
public.a2rtraw
public.a2specback
public.a2spectra
public.a3
public.a4
public.a4spectra
public.aavsovsx
public.abell
public.abellzcat
public.acceptcat
public.acrs
public.actegsrcat
public.actssrcat
public.aegis20
public.aegis20id
public.aegisx
public.aegisxdcxo
public.agilecat
public.agileupvar
public.agnsdssxm2
public.agnsdssxmm
public.akaribsc
public.akaripsc
public.aknepdfcxo
public.alfperxmm
public.allwiseagn
public.ami10c15gz
public.amigps16gh
public.ansuvpscat
public.arcquincxo
public.ariel3a
public.ariel5
public.arxa
public.ascaegclus
public.a

Parsing table 'zcat'...
Done.
datatype
description
column_name
size
indexed
std
utype
ucd
unit
principal
table_name


In [5]:
## Use the Registry to find all TAP services and select one, which CXC
tap_services=Registry.query(service_type='table')
print(tap_services[20]['access_url'])

## Then get all the tables served by this service:
cxc = TapPlus(url=tap_services[20]['access_url'])
tables = cxc.load_tables()
for table in (tables):
    print(table.get_qualified_name())

## Then try to find the columns
cxc_table = cxc.load_table('cxc.cxc.image')
for column in (cxc_table.get_columns()):
    print(column.get_name())


http://voparis-rr.obspm.fr:80/tap
Created TAP+ (v1.0.1) - Connection:
	Host: voparis-rr.obspm.fr
	Use HTTPS: False
	Port: 80
	SSL Port: 443
Retrieving tables...
Parsing tables...
Done.
glots.glots.services
glots.glots.tables
glots.glots.columns
ivoa.ivoa.obscore
ivoa.ivoa.emptyobscore
rr.rr.registries
rr.rr.authorities
rr.rr.resource
rr.rr.res_role
rr.rr.res_subject
rr.rr.capability
rr.rr.res_schema
rr.rr.res_table
rr.rr.table_column
rr.rr.res_detail
rr.rr.interface
rr.rr.relationship
rr.rr.intf_param
rr.rr.validation
rr.rr.res_date
rr.rr.alt_identifier
rr.rr.stc_spatial
rr.rr.stc_temporal
rr.rr.stc_spectral
rr.rr.stc_redshift
tap_schema.tap_schema.schemas
tap_schema.tap_schema.tables
tap_schema.tap_schema.columns
tap_schema.tap_schema.keys
tap_schema.tap_schema.key_columns
tap_schema.tap_schema.groups
Retrieving table 'cxc.cxc.image'
Parsing table 'cxc.cxc.image'...
Done.
ivoid
accessurl
nextharvest
harvestinterval
lastsuccess


## Why does this work for this Gaia table but not for HEASARC's zcat or CXC stuff?

This example is from https://astroquery.readthedocs.io/en/latest/utils/tap.html though it had to be fixed since they had a typo in it.  

In [6]:
gaia = TapPlus(url="http://gea.esac.esa.int/tap-server/tap")
gaiadr1_table = gaia.load_table('gaiadr1.gaia_source')
for column in (gaiadr1_table.get_columns()):
    print(column.get_name())


Created TAP+ (v1.0.1) - Connection:
	Host: gea.esac.esa.int
	Use HTTPS: False
	Port: 80
	SSL Port: 443
Retrieving table 'gaiadr1.gaia_source'
Parsing table 'gaiadr1.gaia_source'...
Done.
solution_id
source_id
random_index
ref_epoch
ra
ra_error
dec
dec_error
parallax
parallax_error
pmra
pmra_error
pmdec
pmdec_error
ra_dec_corr
ra_parallax_corr
ra_pmra_corr
ra_pmdec_corr
dec_parallax_corr
dec_pmra_corr
dec_pmdec_corr
parallax_pmra_corr
parallax_pmdec_corr
pmra_pmdec_corr
astrometric_n_obs_al
astrometric_n_obs_ac
astrometric_n_good_obs_al
astrometric_n_good_obs_ac
astrometric_n_bad_obs_al
astrometric_n_bad_obs_ac
astrometric_delta_q
astrometric_excess_noise
astrometric_excess_noise_sig
astrometric_primary_flag
astrometric_relegation_factor
astrometric_weight_al
astrometric_weight_ac
astrometric_priors_used
matched_observations
duplicated_source
scan_direction_strength_k1
scan_direction_strength_k2
scan_direction_strength_k3
scan_direction_strength_k4
scan_direction_mean_k1
scan_direction_

If you look at this url, you'll see the raw XML that it's using.  

http://gea.esac.esa.int/tap-server/tap/tables 

For this Gaia example, it's parsing what you see correctly and finding the gaiadr1.gaia_source table somewhere down in the middle.

But if you look at the HEASARC XML, you see

view-source:https://heasarc.gsfc.nasa.gov/xamin/vo/tap/tables

and the very first table there is the TAP_SCHEMA.columns, and TapPlus is listing its columns instead of finding the public.zcat table which is further down.  So TapPlus and our TAP service tables endpoint are not playing correctly.  This is apparently the same for Chandra as well.

In [8]:
heasarc_tap = TapPlus(url=heasarc_url)
table=heasarc_tap.load_table('public.ascao')
for column in (table.get_columns()):
    print(column.get_name())

Created TAP+ (v1.0.1) - Connection:
	Host: heasarc.gsfc.nasa.gov
	Use HTTPS: True
	Port: 443
	SSL Port: 443
Retrieving table 'public.ascao'
Parsing table 'public.ascao'...
Done.
datatype
description
column_name
size
indexed
std
utype
ucd
unit
principal
table_name


What's in the Registry for Gaia?

In [19]:
gaia_taps=Registry.query(service_type='table',source='gaia')
print(gaia_taps['access_url'])
gaia_url2=gaia_taps[1]['access_url']
print(gaia_url2)

              access_url              
--------------------------------------
http://gea.esac.esa.int/tap-server/tap
   http://gaia.obspm.fr/tap-server/tap
 http://gaia.ari.uni-heidelberg.de/tap
http://gaia.obspm.fr/tap-server/tap


Using the second one, which is a different service than the above, get the same result, i.e., it parses the columns correctly.


In [17]:
gaia = TapPlus(url=gaia_url2)
gaiadr1_table = gaia.load_table('gaiadr1.gaia_source')
for column in (gaiadr1_table.get_columns()):
    print(column.get_name())


Created TAP+ (v1.0.1) - Connection:
	Host: gaia.obspm.fr
	Use HTTPS: False
	Port: 80
	SSL Port: 443
Retrieving table 'gaiadr1.gaia_source'
Parsing table 'gaiadr1.gaia_source'...
Done.
allwise_oid
cc_flags
dec
dec_error
designation
ext_flag
ph_qual
ra
radec_co_error
ra_error
tmass_key
var_flag
w1gmag
w1gmag_error
w1mjd_mean
w1mpro
w1mpro_error
w2gmag
w2gmag_error
w2mjd_mean
w2mpro
w2mpro_error
w3gmag
w3gmag_error
w3mjd_mean
w3mpro
w3mpro_error
w4gmag
w4gmag_error
w4mjd_mean
w4mpro
w4mpro_error


## So what's different about Gaia's TAP service?

Looking in the code for TapPlus, i.e. in 

astroquery.utils.tap.load_table()

it is adding to the TAP URL 

            response = connHandler.execute_get("tables?tables="+table)

This "tables?tables=foobar" is not in the TAP protocol.  So this is a special TapPlus thing.  So we cannot use that if we want to be generic.  The docs even do specify that the "Plus" indeed means things beyond the TAP protocol.  

https://astroquery.readthedocs.io/en/latest/utils/tap.html