In [1]:
from maap.maap import MAAP
# import printing package to help display outputs
from pprint import pprint
import os
import requests
import shutil

In [3]:
# this code is used to determine which datasets in MAAP CMR we should propose to offer in STAC because they are not discoverable and accessible in NASA's operational CMR.
URL = 'https://cmr.maap-project.org/search/collections.json?page_size=100'

# invoke the MAAP search client
maap = MAAP(maap_host='api.ops.maap-project.org')
# os.mkdir('tmp')

response = requests.get(URL)
not_in_cmr = []
in_cmr_can_access = []
in_cmr_cannot_access = [] 
for collection in response.json()['feed']['entry']:
    short_name, version, title = collection['short_name'], collection['version_id'], collection['title']
        
    # search NASA's CMR for the collection
    # 'AfriSAR_AGB_Maps_1681'
    results = maap.searchGranule(
        cmr_host='cmr.earthdata.nasa.gov',
        short_name=short_name,
        version=version,
    )
    if len(results) == 0:
        if version.startswith("00"):
            version = version.replace("00", "")
        results = maap.searchGranule(
            cmr_host='cmr.earthdata.nasa.gov',
            short_name=short_name,
            version=version,
        )        
    if len(results) > 0:
        # the collection IS in NASA's CMR and we can access it via maap-py, we don't need to catalog it in STAC
        print(f"Found granules for {short_name}, version {version}. Testing download...")
        print(f"Collection title is {title}")
        try:
            downloadedFile = results[0].getData('tmp')
            print(f"Downloaded file {downloadedFile}")
            file_size = os.path.getsize(downloadedFile)
            print("File Size is :", file_size, "bytes")
            if file_size < 1000:
                print(downloadedFile.read())
                print(f"Pretty sure {downloadedFile} is NOT a valid file and we should propose to catalog {short_name}, version {version}")
            else:
                in_cmr_can_access.append([title, short_name, version])
                print(f"Pretty sure {downloadedFile} is a valid file.")
        except Exception as e:
            in_cmr_cannot_access.append([title, short_name, version])
            print(f"Cannot access {results[0]._location} from {short_name}, version {version}")
            print(e)

    # if the collection isn't in NASA's CMR, we will propose to keep it
    else:   
        not_in_cmr.append([title, short_name, version])
        print(f"{short_name}, version {version} not in NASA's operational CMR.")
        
        
print("Not in CMR:")
print(*not_in_cmr, sep='\n')

print("Can access:")
print(*in_cmr_can_access, sep='\n')

print("Cannot access:")
print(*in_cmr_cannot_access, sep='\n')

shutil.rmtree('tmp')


Found granules for ABLVIS1B, version 1. Testing download...
Collection title is ABoVE LVIS L1B Geolocated Return Energy Waveforms V001
Downloaded file tmp/LVIS1B_ABoVE2017_0629_R1803_056233.h5
File Size is : 995295392 bytes
Pretty sure tmp/LVIS1B_ABoVE2017_0629_R1803_056233.h5 is a valid file.
Found granules for ABLVIS2, version 1. Testing download...
Collection title is ABoVE LVIS L2 Geolocated Surface Elevation Product V001
Downloaded file tmp/LVIS2_ABoVE2017_0629_R1803_056233.TXT
File Size is : 106921165 bytes
Pretty sure tmp/LVIS2_ABoVE2017_0629_R1803_056233.TXT is a valid file.
Found granules for ALOS_PSR_RTC_HIGH, version 1. Testing download...
Collection title is Advance Land Observing Satellite Phased Array type L-band Synthetic Aperture Radar Radiometric Terrain-Corrected High Resolution products, Equatorial Western Africa, May 2006-March 2011
Downloaded file tmp/AP_01635_FBS_F0340_RT1.zip
File Size is : 151904490 bytes
Pretty sure tmp/AP_01635_FBS_F0340_RT1.zip is a valid fil