## Imports and Functions

In [1]:
# this code allows us to run the internal packages

import os
import sys

parent_directory = os.path.split(os.getcwd())[0]

import_path = parent_directory + '/data_models'

if import_path not in sys.path:
    sys.path.insert(0, import_path)

In [2]:
from api import Api
from cmr.get_metadata import *

from datetime import datetime
import requests
import json

In [3]:
def convert_date(date_string):
    date_list = [int(x) for x in date_string.split('-')]
    return datetime(*date_list)

## Add Initial CMR files (only once)

In [5]:
old_list = [
        "ACES",
        "AirMOSS",
        "ARCTAS",
        "CARVE",
        "DC3",
        "GCPEx",
        "GOES-R PLT",
        "GRIP",
        "HS3",
        "OLYMPEX"
    ]
new_list=[
        "ACEPOL",
        "ACES",
        "ACT-America",
        "AfriSAR",
        "AirMOSS",
        "ARCTAS",
        "ATTREX",
        "BOREAS",
        "CARVE",
        "CLASIC07",
        "CPEX",
        "DC3",
        "GCPEx",
        "GOES-R PLT",
        "GRIP",
        "HS3",
        "IPHEx",
        "LPVEx",
        "NAMMA",
        "OLYMPEX",
        "OMG",
        "SEAC4RS",
        "SNF",
        "SnowEx",
        "TARFOX",
        "TCSP"
    ]

In [6]:
camps_to_add = [camp for camp in new_list if camp not in old_list]

In [7]:
for camp in camps_to_add:
    try:
        query_api(camp)
    except:
        print(camp)

## Query Database

In [8]:
failed_camps = ['BOREAS', 'DC3', 'SEAC4RS']

def build_dict(server):
    camp_data = {}
    
    # get the campaigns
    campaigns = server.get('campaign')
    camp_data = {camp['uuid']:{'short_name':camp['short_name'], 'uuid':camp['uuid'], 'deployments':[]} for camp in campaigns['data']}
    print(camp_data[list(camp_data.keys())[0]])

    # get the deployments 
    deployments = server.get('deployment')
    dep_data = {dep['uuid']:{'uuid':dep['uuid'], 'start_date':dep['start_date'], 'end_date':dep['end_date'], 'campaign': dep['campaign'], 'flights':[]} for dep in deployments['data']}
    print(dep_data[list(dep_data.keys())[0]])

    # get the flights
    flights = server.get('collection_period')
    flight_data = flights['data']

    # build the mega dictionary
    for flight in flights['data']:
        dep_data[flight['deployment']]['flights'].append(flight)
    for deployment_uuid, deployment_data in dep_data.items():
        camp_data[deployment_data['campaign']]['deployments'].append(deployment_data)

    # add CMR to the data tree
    # loop through all campaigns in database
    for campaign_uuid, campaign in camp_data.items():
        # skip DC3, since it fails
        if campaign['short_name'] in failed_camps:
            continue
        print(campaign['short_name'])
        for deployment in campaign['deployments']:
            start = convert_date(deployment['start_date'])
            end = convert_date(deployment['end_date'])
            print('    ', start, '-', end)
            cmr_data = get_deployment_and_cp(campaign['short_name'], start, end)
            cmr_data_alt = get_concepts(campaign['short_name'], start, end)
            deployment['cmr'] = cmr_data
            deployment['cmr_alt'] = cmr_data_alt
            
    return camp_data

In [9]:
api = Api('production')
camp_data = build_dict(api)

{'short_name': 'BOREAS', 'uuid': '20c2121f-bd91-4f7e-80ac-b729c1998ca9', 'deployments': []}
{'uuid': '54b4a66c-96ed-44e1-a1a0-d7aaa9b2c01b', 'start_date': '2012-08-28', 'end_date': '2012-11-06', 'campaign': '3671a147-4531-459e-92cf-3b15c0409426', 'flights': []}
GCPEx
     2012-01-15 00:00:00 - 2012-03-04 00:00:00
         C1625128636-GHRC_CLOUD success
         C1625128964-GHRC_CLOUD success
         C1625128999-GHRC_CLOUD success
         C1625128673-GHRC_CLOUD success
         C1625128701-GHRC_CLOUD success
         C1625128716-GHRC_CLOUD success
         C1625128607-GHRC_CLOUD success
         C1625128501-GHRC_CLOUD success
         C1625128649-GHRC_CLOUD success
         C1625128713-GHRC_CLOUD success
         C1625128767-GHRC_CLOUD success
         C1625128876-GHRC_CLOUD success
         C1625128398-GHRC_CLOUD success
         C1625128738-GHRC_CLOUD success
         C1625128916-GHRC_CLOUD success
         C1625128267-GHRC_CLOUD success
         C1625128683-GHRC_CLOUD success
     

         C1292501250-ORNL_DAAC success
         C1292500395-ORNL_DAAC success
         C1362316058-ORNL_DAAC success
         C1345709827-ORNL_DAAC success
         C1345709833-ORNL_DAAC success
         C1369107223-ORNL_DAAC success
         C1292501246-ORNL_DAAC success
         C1379758333-ORNL_DAAC success
         C1379758263-ORNL_DAAC success
         C1281783517-ORNL_DAAC success
         C1386601843-ORNL_DAAC success
         C1406946749-ORNL_DAAC success
{'concept_id': 'C1400101586-ORNL_DAAC', 'metadata': {'CollectionCitations': [{'OtherCitationDetails': 'Veraverbeke, S., B.M. Rogers, M.L. Goulden, R. Jandt, C.E. Miller, E.B. Wiggins, and J.T. Randerson. 2017. ABoVE: Ignitions, burned area and emissions of fires in AK, YT, and NWT, 2001-2015. ORNL DAAC, Oak Ridge, Tennessee, USA. https://doi.org/10.3334/ORNLDAAC/1341'}], 'SpatialExtent': {'SpatialCoverageType': 'HORIZONTAL', 'HorizontalSpatialDomain': {'Geometry': {'CoordinateSystem': 'CARTESIAN', 'BoundingRectangles': [{'West

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



## Add CMR Data

In [None]:
# # add CMR to the data tree


# # loop through all campaigns in database
# for campaign_uuid, campaign in camp_data.items():
#     if campaign['short_name']=='DC3':
#         continue
#     print(campaign['short_name'])
#     for deployment in campaign['deployments']:
#         start = convert_date(deployment['start_date'])
#         end = convert_date(deployment['end_date'])
#         print('    ', start, '-', end)
#         cmr_data = get_deployment_and_cp(campaign['short_name'], start, end)
#         deployment['cmr'] = cmr_data
# #         for flight in deployment['flights']:
# #             print('        ', flight['uuid'])

## Cross Checking

In [None]:
# ####################################################
# ############### OLD ################################
# ####################################################

# # loop through all campaigns in database
# for campaign_uuid, campaign in camp_data.items():
    
#     if campaign['short_name']=='DC3':
#         continue
        
#     print(campaign['short_name'])
#     for deployment in campaign['deployments']:
        
#         start = convert_date(deployment['start_date'])
#         end = convert_date(deployment['end_date'])
#         print('    ', start, '-', end)
        
#         print(len(deployment['flights']))
        
#         for cmr_plat_name, cmr_plat in deployment['cmr']['collection_period']['platforms'].items():
#             cmr_plat_short = cmr_plat['platform_names']['short_name']
#             print(' '*8, 'cmr plat:', cmr_plat_short)
#             cmr_inst_shorts = [data['short_name'] for key, data in cmr_plat['instruments'].items()]
#             print(' '*12, 'cmr inst:', cmr_inst_shorts)
#             dois = [data['DOI'] for data in cmr_plat['dois']]
#             print(' '*12, dois)            
            
#             for flight in deployment['flights']:
#                 flight_plat_shorts = test.gcmd_shorts('platform', flight['platform'])
# #                 print(' '*16,'camp plat:', flight_plat_shorts)
#                 flight_inst_shorts = [test.gcmd_shorts('instrument', inst_uuid) for inst_uuid in flight['instruments']]
# #               [print(' '*20, short) for short in flight_inst_shorts]
                
#                 plat_match = False
#                 if cmr_plat_short in flight_plat_shorts:
#                     print(' '*16, 'plat match found', cmr_plat_short)
#                     plat_match = True
                
#                 for cmr_inst_short in cmr_inst_shorts:
#                     if cmr_inst_short in flight_inst_shorts:
#                         print(' '*16, 'inst match found', cmr_inst_short)
#                         if plat_match == True:
#                             print(' '*20, 'flight match found', )
                
                
                
# #         cmr_data = get_deployment_and_cp(campaign['short_name'], start, end)
# #         deployment['cmr'].append(cmr_data)
# # #         for flight in deployment['flights']:
# # #             print('        ', flight['uuid'])

In [None]:
# # loop through all campaigns in database
# for campaign_uuid, campaign in camp_data.items():
    
#     if campaign['short_name']=='DC3':
#         continue
        
#     print(campaign['short_name'])
#     for deployment in campaign['deployments']:
        
#         start = convert_date(deployment['start_date'])
#         end = convert_date(deployment['end_date'])
#         print('    ', start, '-', end)
        
# #         print(len(deployment['flights']))
        
#         for cmr_plat_name, cmr_plat in deployment['cmr']['collection_period']['platforms'].items():
#             cmr_plat_short = cmr_plat['platform_names']['short_name']
# #             print(' '*8, 'cmr plat:', cmr_plat_short)
#             cmr_inst_shorts = [data['short_name'] for key, data in cmr_plat['instruments'].items()]
# #             print(' '*12, 'cmr inst:', cmr_inst_shorts)
#             dois = [data['DOI'] for data in cmr_plat['dois']]
# #             print(' '*12, dois)            
            
#             for flight in deployment['flights']:
#                 flight_plat_shorts = test.gcmd_shorts('platform', flight['platform'])
# #                 print(' '*16,'camp plat:', flight_plat_shorts)
#                 flight_inst_shorts = [test.gcmd_shorts('instrument', inst_uuid) for inst_uuid in flight['instruments']]
# #               [print(' '*20, short) for short in flight_inst_shorts]
                
#                 plat_match = False

#                 if cmr_plat_short in flight_plat_shorts:
#                     print(' '*16, 'plat match found', cmr_plat_short, dois)
#                     print(' '*16, 'plat match found', cmr_plat_short)
                    
#                     plat_match = True
#                 print(cmr_inst_shorts)
#                 print(flight_inst_shorts)
#                 print()                
#                 for cmr_inst_short in cmr_inst_shorts:
#                     if cmr_inst_short in flight_inst_shorts:
#                         print(' '*16, 'inst match found', cmr_inst_short)
#                         if plat_match == True:
#                             print(' '*20, 'flight match found', )
                
                
                
# #         cmr_data = get_deployment_and_cp(campaign['short_name'], start, end)
# #         deployment['cmr'].append(cmr_data)
# # #         for flight in deployment['flights']:
# # #             print('        ', flight['uuid'])

In [11]:
test = api

In [12]:
###############################
#### print out flight matches #
###############################

failed_camps = ['BOREAS', 'DC3', 'SEAC4RS', 'CARVE']


# loop through all campaigns in database
for campaign_uuid, campaign in camp_data.items():
    
    if campaign['short_name'] != 'OLYMPEX':
        continue
    
    if campaign['short_name'] in failed_camps:
        continue        
        
    print(campaign['short_name'])
    for deployment in campaign['deployments']:
        
        start = convert_date(deployment['start_date'])
        end = convert_date(deployment['end_date'])
        print('    ', start, '-', end)
        
#         print(len(deployment['flights']))
        for concept in deployment['cmr_alt']:
            doi = concept['doi']
            doi_short = concept['short_name']
#             print(' '*4, doi, doi_short)
            
            for cmr_plat_data in concept['platforms']:
                for cmr_plat_short, cmr_inst_short_data in cmr_plat_data.items():
                    cmr_inst_shorts = cmr_inst_short_data['instruments']
#                     print('cmr inst', cmr_inst_shorts)
#                     print(' '*8, cmr_plat_short)
#                     for cmr_inst_short in cmr_inst_shorts:
#                         print(' '*12, cmr_inst_short)     

                    for flight in deployment['flights']:
                        flight_plat_shorts = test.gcmd_shorts('platform', flight['platform'])
        #                 print(' '*16,'camp plat:', flight_plat_shorts)
                        flight_inst_shorts = [test.gcmd_shorts('instrument', inst_uuid) for inst_uuid in flight['instruments']]
                        flight_inst_shorts = [short for sub in flight_inst_shorts for short in sub]
        #               [print(' '*20, short) for short in flight_inst_shorts]
                        
                        plat_match = False

                        if cmr_plat_short in flight_plat_shorts:
                            print(' '*16, 'plat match found', cmr_plat_short, doi)

                            plat_match = True
#                         print(cmr_inst_shorts)
#                         print(flight_inst_shorts)
#                         print()       
#                         for 
                        for cmr_inst_short in cmr_inst_shorts:
                            if cmr_inst_short in flight_inst_shorts:
                                print(' '*16, 'inst match found', cmr_inst_short, doi)
                                if plat_match == True:
                                    print(' '*20, 'flight match found', flight['uuid'], doi )
                
                
                
# #         cmr_data = get_deployment_and_cp(campaign['short_name'], start, end)
# #         deployment['cmr'].append(cmr_data)
# # #         for flight in deployment['flights']:
# # #             print('        ', flight['uuid'])

OLYMPEX
     2015-11-01 00:00:00 - 2016-05-01 00:00:00
                 inst match found DOW 10.5067/GPMGV/OLYMPEX/DOW/DATA201
                 inst match found D3R 10.5067/GPMGV/OLYMPEX/D3R/DATA101
                 inst match found NEXRAD 10.5067/GPMGV/OLYMPEX/NEXRAD/DATA101
                 inst match found NEXRAD 10.5067/GPMGV/OLYMPEX/NEXRAD/DATA201
                 inst match found NEXRAD 10.5067/GPMGV/OLYMPEX/NEXRAD/DATA301


In [11]:
failed_camps = [
    'BOREAS', 
    'DC3', 
    'SEAC4RS', 
    'CARVE', 
    'ATTREX'
]

already_ingested = [
    "GCPEx",
    "GOES-R PLT",
    "GRIP",
    "OMG",
    "SNF",
    "SnowEx",
    "TARFOX",
    "ACEPOL",
    "ACES",
    "ACT-America",
    "AfriSAR",
    "AirMOSS",
    "ARCTAS",
]

In [19]:
###############################
####### add dois to db ########
###############################

try:
    # loop through all campaigns in database
    for campaign_uuid, campaign in camp_data.items():

        if campaign['short_name'] in failed_camps or campaign['short_name'] in already_ingested:
            continue


        print(campaign['short_name'])
        for deployment in campaign['deployments']:

            start = convert_date(deployment['start_date'])
            end = convert_date(deployment['end_date'])
            print('    ', start, '-', end)

    #         print(len(deployment['flights']))
            for concept in deployment['cmr_alt']:
                doi = concept['doi']
                doi_short = concept['short_name']
                doi_title = concept['EntryTitle'] # TODO: verify this line before integration
                doi_data = {'short_name': doi, 'long_name': doi_title}
    #             print(' '*4, doi, doi_short)

                for cmr_plat_data in concept['platforms']:
                    for cmr_plat_short, cmr_inst_short_data in cmr_plat_data.items():
                        cmr_inst_shorts = cmr_inst_short_data['instruments']
    #                     print('cmr inst', cmr_inst_shorts)
    #                     print(' '*8, cmr_plat_short)
    #                     for cmr_inst_short in cmr_inst_shorts:
    #                         print(' '*12, cmr_inst_short)     

                        for flight in deployment['flights']:
                            flight_plat_shorts = test.gcmd_shorts('platform', flight['platform'])
            #                 print(' '*16,'camp plat:', flight_plat_shorts)
    #                         flight_inst_shorts = [test.gcmd_shorts('instrument', inst_uuid) for inst_uuid in flight['instruments']]
    #                         flight_inst_shorts = [short for sub in flight_inst_shorts for short in sub]
            #               [print(' '*20, short) for short in flight_inst_shorts]

                            plat_match = False

                            if cmr_plat_short in flight_plat_shorts:
                                print(' '*16, 'plat match found', cmr_plat_short, flight['platform'], doi)
                                test.add_link_doi('platform', flight['platform'], doi_data)

                                plat_match = True
    #                         print(cmr_inst_shorts)
    #                         print(flight_inst_shorts)
    #                         print()       
    #                         for 
                            for flight_inst_uuid in flight['instruments']:
                                flight_inst_shorts = test.gcmd_shorts('instrument', flight_inst_uuid)

                                for cmr_inst_short in cmr_inst_shorts:
                                    if cmr_inst_short in flight_inst_shorts:
                                        print(' '*16, 'inst match found', cmr_inst_short, flight_inst_uuid, doi)
                                        test.add_link_doi('instrument', flight_inst_uuid, doi_data)
                                        if plat_match == True:
                                            print(' '*20, 'flight match found', flight['uuid'], doi )
                                            test.add_link_doi('collection_period', flight['uuid'], doi_data)



    # #         cmr_data = get_deployment_and_cp(campaign['short_name'], start, end)
    # #         deployment['cmr'].append(cmr_data)
    # # #         for flight in deployment['flights']:
    # # #             print('        ', flight['uuid'])
except:
    import ipdb; ipdb.set_trace()

CLASIC07
     2007-06-11 00:00:00 - 2007-07-06 00:00:00
                 inst match found PALS cb51e763-3128-48e8-b3dc-8521c6d7867f 10.5067/75ZB400QV98N
--Call--
> [0;32m/home/carson/anaconda3/envs/admg_aws/lib/python3.8/site-packages/urllib3/response.py[0m(623)[0;36mclosed[0;34m()[0m
[0;32m    622 [0;31m[0;34m[0m[0m
[0m[0;32m--> 623 [0;31m    [0;34m@[0m[0mproperty[0m[0;34m[0m[0;34m[0m[0m
[0m[0;32m    624 [0;31m    [0;32mdef[0m [0mclosed[0m[0;34m([0m[0mself[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0;34m[0m[0m
[0m
ipdb> quit
