<a href="https://colab.research.google.com/github/OIEIEIO/MoonWalker-HUD/blob/main/MAST_API_Tutorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import sys
import os
import time
import re
import json

import requests
from urllib.parse import quote as urlencode

from astropy.table import Table
import numpy as np

import pprint
pp = pprint.PrettyPrinter(indent=4)

In [7]:
def mast_query(request):
    """Perform a MAST query.
    
        Parameters
        ----------
        request (dictionary): The MAST request json object
        
        Returns head,content where head is the response HTTP headers, and content is the returned data"""
    
    # Base API url
    request_url='https://mast.stsci.edu/api/v0/invoke'    
    
    # Grab Python Version 
    version = ".".join(map(str, sys.version_info[:3]))

    # Create Http Header Variables
    headers = {"Content-type": "application/x-www-form-urlencoded",
               "Accept": "text/plain",
               "User-agent":"python-requests/"+version}

    # Encoding the request as a json string
    req_string = json.dumps(request)
    req_string = urlencode(req_string)
    
    # Perform the HTTP request
    resp = requests.post(request_url, data="request="+req_string, headers=headers)
    
    # Pull out the headers and response content
    head = resp.headers
    content = resp.content.decode('utf-8')

    return head, content


In [8]:
object_of_interest = 'M101'

resolver_request = {'service':'Mast.Name.Lookup',
                     'params':{'input':object_of_interest,
                               'format':'json'},
                     }

headers, resolved_object_string = mast_query(resolver_request)

resolved_object = json.loads(resolved_object_string)

pp.pprint(resolved_object)

{   'resolvedCoordinate': [   {   'cached': False,
                                  'canonicalName': 'MESSIER 101',
                                  'decl': 54.34895,
                                  'objectType': 'G',
                                  'ra': 210.80227,
                                  'radius': 0.24000000000000002,
                                  'resolver': 'NED',
                                  'resolverTime': 466,
                                  'searchRadius': -1.0,
                                  'searchString': 'm101'}],
    'status': ''}


In [9]:
obj_ra = resolved_object['resolvedCoordinate'][0]['ra']
obj_dec = resolved_object['resolvedCoordinate'][0]['decl']

In [10]:
mast_request = {'service':'Mast.Caom.Cone',
                'params':{'ra':obj_ra,
                          'dec':obj_dec,
                          'radius':0.2},
                'format':'json',
                'pagesize':2000,
                'page':1,
                'removenullcolumns':True,
                'removecache':True}

headers, mast_data_str = mast_query(mast_request)

mast_data = json.loads(mast_data_str)

print(mast_data.keys())
print("Query status:",mast_data['status'])

dict_keys(['status', 'msg', 'data', 'fields', 'paging'])
Query status: COMPLETE


In [11]:
pp.pprint(mast_data['fields'][:5])

[   {'name': 'intentType', 'type': 'string'},
    {'name': 'obs_collection', 'type': 'string'},
    {'name': 'provenance_name', 'type': 'string'},
    {'name': 'instrument_name', 'type': 'string'},
    {'name': 'project', 'type': 'string'}]


In [12]:
pp.pprint(mast_data['data'][0])

{   '_selected_': None,
    'calib_level': 3,
    'dataRights': 'PUBLIC',
    'dataURL': None,
    'dataproduct_type': 'image',
    'distance': 0,
    'em_max': 1000,
    'em_min': 600,
    'filters': 'TESS',
    'instrument_name': 'Photometer',
    'intentType': 'science',
    'jpegURL': None,
    'mtFlag': False,
    'obs_collection': 'TESS',
    'obs_id': 'tess-s0015-4-1',
    'obs_title': None,
    'obsid': 27507612,
    'project': 'TESS',
    'proposal_id': 'N/A',
    'proposal_pi': 'Ricker, George',
    'proposal_type': None,
    'provenance_name': 'SPOC',
    's_dec': 59.23577326662502,
    's_ra': 213.663757013405,
    's_region': 'POLYGON 227.56190400 55.89237000 210.04086200 50.98859500 '
                '197.01254300 60.84640200 220.28641600 67.20814100 '
                '227.56190400 55.89237000 ',
    'sequence_number': 15,
    'srcDen': None,
    't_exptime': 1425.599379,
    't_max': 58736.89295962,
    't_min': 58710.87239573,
    't_obs_release': 58756.3333334,
    'ta

In [13]:
mast_data_table = Table()

for col,atype in [(x['name'],x['type']) for x in mast_data['fields']]:
    if atype=="string":
        atype="str"
    if atype=="boolean":
        atype="bool"
    mast_data_table[col] = np.array([x.get(col,None) for x in mast_data['data']],dtype=atype)
    
print(mast_data_table)

intentType obs_collection provenance_name ...      distance      _selected_
---------- -------------- --------------- ... ------------------ ----------
   science           TESS            SPOC ...                0.0      False
   science           TESS            SPOC ...                0.0      False
   science           TESS            SPOC ...                0.0      False
   science           TESS            SPOC ...                0.0      False
   science           TESS            SPOC ...                0.0      False
   science           TESS            SPOC ...                0.0      False
   science           TESS            SPOC ...  407.3642445717816      False
   science           TESS            SPOC ...  407.3642445717816      False
   science           TESS            SPOC ...  407.3642445717816      False
   science           TESS            SPOC ...  407.3642445717816      False
       ...            ...             ... ...                ...        ...
   science  

In [14]:
mashup_request = {"service":"Mast.Caom.Filtered",
                  "format":"json",
                  "params":{
                      "columns":"COUNT_BIG(*)",
                      "filters":[
                          {"paramName":"filters",
                           "values":["NUV","FUV"],
                           "separator":";"
                          },
                          {"paramName":"t_max",
                           "values":[{"min":52264.4586,"max":54452.8914}], #MJD
                          },
                          {"paramName":"obsid",
                           "values":[],
                           "freeText":"%200%"}
                      ]}}
    
headers, out_string = mast_query(mashup_request)
count = json.loads(out_string)

pp.pprint(count)

{   'data': [{'Column1': 2601}],
    'fields': [{'name': 'Column1', 'type': 'string'}],
    'msg': '',
    'paging': {   'page': 1,
                  'pageSize': 1,
                  'pagesFiltered': 1,
                  'rows': 1,
                  'rowsFiltered': 1,
                  'rowsTotal': 1},
    'status': 'COMPLETE'}


In [15]:
mashup_request = {"service":"Mast.Caom.Filtered",
                 "format":"json",
                 "params":{
                     "columns":"*",
                     "filters":[
                         {"paramName":"filters",
                          "values":["NUV","FUV"],
                          "separator":";"
                         },
                         {"paramName":"t_max",
                          "values":[{"min":52264.4586,"max":54452.8914}], #MJD
                         },
                         {"paramName":"obsid",
                          "values":[],
                          "freeText":"%200%"}
                     ]}}
    
headers, out_string = mast_query(mashup_request)
filtered_data = json.loads(out_string)

print(filtered_data.keys())
print("Query status:", filtered_data['status'])

dict_keys(['status', 'msg', 'data', 'fields', 'paging'])
Query status: COMPLETE


In [16]:
pp.pprint(filtered_data['data'][0])

{   'calib_level': 2,
    'dataRights': 'PUBLIC',
    'dataURL': 'http://galex.stsci.edu/data/GR6/pipe/01-vsn/03200-MISDR1_16952_0422/d/01-main/0001-img/07-try/MISDR1_16952_0422-fd-cnt.fits.gz',
    'dataproduct_type': 'image',
    'em_max': 180600000000,
    'em_min': 134000000000,
    'filters': 'FUV',
    'instrument_name': 'GALEX',
    'intentType': 'science',
    'jpegURL': 'http://galex.stsci.edu/data/GR6/pipe/01-vsn/03200-MISDR1_16952_0422/d/01-main/0001-img/07-try/qa/MISDR1_16952_0422-xd-int_2color.jpg',
    'mtFlag': None,
    'objID': 391,
    'obs_collection': 'GALEX',
    'obs_id': '2418435336423079936',
    'obs_title': None,
    'obsid': 200,
    'project': 'MIS',
    'proposal_id': None,
    'proposal_pi': None,
    'proposal_type': 'MIS',
    'provenance_name': 'MIS',
    's_dec': 14.8739617983027,
    's_ra': 17.631372298039878,
    's_region': 'CIRCLE ICRS  17.63137230  14.87396180 0.625',
    'sequence_number': -999,
    'srcDen': 5885,
    't_exptime': 1632,
    't_

In [17]:
mashup_request = {
        "service":"Mast.Caom.Filtered.Position",
        "format":"json",
        "params":{
            "columns":"COUNT_BIG(*)",
            "filters":[
                {"paramName":"dataproduct_type",
                 "values":["cube"]
                }],
            "position":"210.8023, 54.349, 0.24"
        }}

headers, out_string = mast_query(mashup_request)
count = json.loads(out_string)

pp.pprint(count)

{   'data': [{'Column1': 797}],
    'fields': [{'name': 'Column1', 'type': 'string'}],
    'msg': '',
    'paging': {   'page': 1,
                  'pageSize': 1,
                  'pagesFiltered': 1,
                  'rows': 1,
                  'rowsFiltered': 1,
                  'rowsTotal': 1},
    'status': 'COMPLETE'}


In [18]:
# Picking the first Hubble Space Telescope observation
interesting_observation = mast_data_table[mast_data_table["obs_collection"] == "HST"][0]
print("Observation:",
      [interesting_observation[x] for x in ['dataproduct_type', 'obs_collection', 'instrument_name']])

Observation: ['image', 'HST', 'WFC3/UVIS']


In [19]:
obsid = interesting_observation['obsid']

product_request = {'service':'Mast.Caom.Products',
                  'params':{'obsid':obsid},
                  'format':'json',
                  'pagesize':100,
                  'page':1}   

headers, obs_products_string = mast_query(product_request)

obs_products = json.loads(obs_products_string)

print("Number of data products:", len(obs_products["data"]))
print("Product information column names:")
pp.pprint(obs_products['fields'])

Number of data products: 83
Product information column names:
[   {'name': 'obsID', 'type': 'string'},
    {'name': 'obs_collection', 'type': 'string'},
    {'name': 'dataproduct_type', 'type': 'string'},
    {'name': 'obs_id', 'type': 'string'},
    {'name': 'description', 'type': 'string'},
    {'name': 'type', 'type': 'string'},
    {'name': 'dataURI', 'type': 'string'},
    {'name': 'productType', 'type': 'string'},
    {'name': 'productGroupDescription', 'type': 'string'},
    {'name': 'productSubGroupDescription', 'type': 'string'},
    {'name': 'productDocumentationURL', 'type': 'string'},
    {'name': 'project', 'type': 'string'},
    {'name': 'prvversion', 'type': 'string'},
    {'name': 'proposal_id', 'type': 'string'},
    {'name': 'productFilename', 'type': 'string'},
    {'name': 'size', 'type': 'int'},
    {'name': 'parent_obsid', 'type': 'string'},
    {'name': 'dataRights', 'type': 'string'},
    {'name': 'calib_level', 'type': 'int'},
    {'name': '_selected_', 'type':

In [20]:
pp.pprint([x.get('productType',"") for x in obs_products["data"]])

[   'AUXILIARY',
    'PREVIEW',
    'SCIENCE',
    'SCIENCE',
    'SCIENCE',
    'AUXILIARY',
    'AUXILIARY',
    'PREVIEW',
    'SCIENCE',
    'SCIENCE',
    'AUXILIARY',
    'AUXILIARY',
    'PREVIEW',
    'SCIENCE',
    'SCIENCE',
    'AUXILIARY',
    'AUXILIARY',
    'PREVIEW',
    'SCIENCE',
    'SCIENCE',
    'AUXILIARY',
    'AUXILIARY',
    'PREVIEW',
    'SCIENCE',
    'SCIENCE',
    'AUXILIARY',
    'AUXILIARY',
    'PREVIEW',
    'SCIENCE',
    'AUXILIARY',
    'AUXILIARY',
    'PREVIEW',
    'SCIENCE',
    'AUXILIARY',
    'AUXILIARY',
    'AUXILIARY',
    'AUXILIARY',
    'AUXILIARY',
    'INFO',
    'PREVIEW',
    'PREVIEW',
    'SCIENCE',
    'SCIENCE',
    'AUXILIARY',
    'AUXILIARY',
    'AUXILIARY',
    'INFO',
    'PREVIEW',
    'PREVIEW',
    'PREVIEW',
    'SCIENCE',
    'SCIENCE',
    'SCIENCE',
    'AUXILIARY',
    'AUXILIARY',
    'AUXILIARY',
    'INFO',
    'PREVIEW',
    'PREVIEW',
    'PREVIEW',
    'SCIENCE',
    'SCIENCE',
    'SCIENCE',
    'AUXILIARY',

In [21]:
sci_prod_arr = [x for x in obs_products['data'] if x.get("productType", None) == 'SCIENCE']
science_products = Table()

for col, atype in [(x['name'], x['type']) for x in obs_products['fields']]:
    if atype=="string":
        atype="str"
    if atype=="boolean":
        atype="bool"
    if atype == "int":
        atype = "float" # array may contain nan values, and they do not exist in numpy integer arrays
    science_products[col] = np.array([x.get(col,None) for x in sci_prod_arr],dtype=atype)

print("Number of science products:",len(science_products))
print(science_products)

Number of science products: 27
 obsID   obs_collection dataproduct_type ... dataRights calib_level _selected_
-------- -------------- ---------------- ... ---------- ----------- ----------
26511449            HST            image ...     PUBLIC         3.0      False
26511449            HST            image ...     PUBLIC         3.0      False
26511449            HST            image ...     PUBLIC         3.0      False
26511442            HST            image ...     PUBLIC         2.0      False
26511442            HST            image ...     PUBLIC         2.0      False
26511431            HST            image ...     PUBLIC         2.0      False
26511431            HST            image ...     PUBLIC         2.0      False
26511411            HST            image ...     PUBLIC         2.0      False
26511411            HST            image ...     PUBLIC         2.0      False
26511418            HST            image ...     PUBLIC         2.0      False
     ...            .

In [22]:
download_url = 'https://mast.stsci.edu/api/v0.1/Download/file?'

for row in science_products[:2]:     

    # make file path
    out_path = os.path.join("mastFiles", row['obs_collection'], row['obs_id'])
    if not os.path.exists(out_path):
        os.makedirs(out_path)
    out_path = os.path.join(out_path, os.path.basename(row['productFilename']))
        
    # Download the data
    payload = {"uri":row['dataURI']}
    resp = requests.get(download_url, params=payload)
    
    # save to file
    with open(out_path,'wb') as FLE:
        FLE.write(resp.content)
        
    # check for file 
    if not os.path.isfile(out_path):
        print("ERROR: " + out_path + " failed to download.")
    else:
        print("COMPLETE: ", out_path)

COMPLETE:  mastFiles/HST/hst_11635_11_wfc3_uvis_f469n_ib3p11/hst_11635_11_wfc3_uvis_f469n_ib3p11_drc.fits
COMPLETE:  mastFiles/HST/hst_11635_11_wfc3_uvis_f469n_ib3p11/hst_11635_11_wfc3_uvis_f469n_ib3p11_point-cat.ecsv


In [23]:
url_list = [("uri", url) for url in science_products['dataURI'][:2]]
extension = ".tar.gz"

download_url = 'https://mast.stsci.edu/api/v0.1/Download/bundle'
resp = requests.post(download_url + extension, data=url_list)

out_file = "mastDownload" + extension
with open(out_file, 'wb') as FLE:
    FLE.write(resp.content)
    
# check for file 
if not os.path.isfile(out_file):
    print("ERROR: " + out_file + " failed to download.")
else:
    print("COMPLETE: ", out_file)

COMPLETE:  mastDownload.tar.gz
