## Imports and Functions

In [1]:
# this code allows us to run the internal packages

import os
import sys

parent_directory = os.path.split(os.getcwd())[0]

import_path = parent_directory + '/data_models'

if import_path not in sys.path:
    sys.path.insert(0, import_path)

In [2]:
from api import Api
from cmr.get_metadata import *

from datetime import datetime
import requests
import json

In [3]:
def convert_date(date_string):
    date_list = [int(x) for x in date_string.split('-')]
    return datetime(*date_list)

## Query Database

In [4]:
def build_dict(server):
    camp_data = {}
    
    # get the campaigns
    campaigns = server.get('campaign')
    camp_data = {camp['uuid']:{'short_name':camp['short_name'], 'uuid':camp['uuid'], 'deployments':[]} for camp in campaigns['data']}
    print(camp_data[list(camp_data.keys())[0]])

    # get the deployments 
    deployments = server.get('deployment')
    dep_data = {dep['uuid']:{'uuid':dep['uuid'], 'start_date':dep['start_date'], 'end_date':dep['end_date'], 'campaign': dep['campaign'], 'flights':[]} for dep in deployments['data']}
    print(dep_data[list(dep_data.keys())[0]])

    # get the flights
    flights = server.get('collection_period')
    flight_data = flights['data']

    # build the mega dictionary
    for flight in flights['data']:
        dep_data[flight['deployment']]['flights'].append(flight)
    for deployment_uuid, deployment_data in dep_data.items():
        camp_data[deployment_data['campaign']]['deployments'].append(deployment_data)

    # add CMR to the data tree
    # loop through all campaigns in database
    for campaign_uuid, campaign in camp_data.items():
        # skip DC3, since it fails
        if campaign['short_name']=='DC3':
            continue
        print(campaign['short_name'])
        for deployment in campaign['deployments']:
            start = convert_date(deployment['start_date'])
            end = convert_date(deployment['end_date'])
            print('    ', start, '-', end)
            cmr_data = get_deployment_and_cp(campaign['short_name'], start, end)
            cmr_data_alt = get_concepts(campaign['short_name'], start, end)
            deployment['cmr'] = cmr_data
            deployment['cmr_alt'] = cmr_data_alt
            
    return camp_data

In [5]:
test = Api('test')
camp_data = build_dict(test)

{'short_name': 'CARVE', 'uuid': '3d42927c-4a55-4bea-9e6f-1d53d870965c', 'deployments': []}
{'uuid': 'a315c8a5-a1ef-40b1-8e34-9702cfe6b08b', 'start_date': '2012-08-28', 'end_date': '2012-11-06', 'campaign': 'ec77afcb-d280-4d77-a4c1-4bb79a5a76d7', 'flights': []}
CARVE
     2012-05-23 00:00:00 - 2012-10-30 00:00:00
         C1400101586-ORNL_DAAC success
         C1299758974-ORNL_DAAC success
         C1000000661-ORNL_DAAC success
         C1000000520-ORNL_DAAC success
         C1248454738-ORNL_DAAC success
         C1420459165-ORNL_DAAC success
         C1420654406-ORNL_DAAC success
         C1511758943-ORNL_DAAC success
         C1329921126-ORNL_DAAC success
         C1420654430-ORNL_DAAC success
         C1420654607-ORNL_DAAC success
         C1373782713-ORNL_DAAC success
         C1372809456-ORNL_DAAC success
         C1372808917-ORNL_DAAC success
         C1358003823-ORNL_DAAC success
         C1373450312-ORNL_DAAC success
         C1292501250-ORNL_DAAC success
         C1292500395-OR

         C1625128617-GHRC_CLOUD success
         C1625128634-GHRC_CLOUD success
         C1625128382-GHRC_CLOUD success
         C1625128643-GHRC_CLOUD success
         C1625128529-GHRC_CLOUD success
         C1625128530-GHRC_CLOUD success
         C1625128220-GHRC_CLOUD success
         C1625128614-GHRC_CLOUD success
         C1625128355-GHRC_CLOUD success
         C1625128303-GHRC_CLOUD success
         C1625128454-GHRC_CLOUD success
         C1625128289-GHRC_CLOUD success
         C1625128296-GHRC_CLOUD success
         C1625128498-GHRC_CLOUD success
         C1625128298-GHRC_CLOUD success
         C1625128359-GHRC_CLOUD success
         C1625128538-GHRC_CLOUD success
         C1625128285-GHRC_CLOUD success
         C1625128307-GHRC_CLOUD success
         C1625128752-GHRC_CLOUD success
         C1625128388-GHRC_CLOUD success
         C1625128560-GHRC_CLOUD success
         C1625128336-GHRC_CLOUD success
         C1625129048-GHRC_CLOUD success
         C1625128583-GHRC_CLOUD success


IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



## Add CMR Data

In [None]:
# # add CMR to the data tree


# # loop through all campaigns in database
# for campaign_uuid, campaign in camp_data.items():
#     if campaign['short_name']=='DC3':
#         continue
#     print(campaign['short_name'])
#     for deployment in campaign['deployments']:
#         start = convert_date(deployment['start_date'])
#         end = convert_date(deployment['end_date'])
#         print('    ', start, '-', end)
#         cmr_data = get_deployment_and_cp(campaign['short_name'], start, end)
#         deployment['cmr'] = cmr_data
# #         for flight in deployment['flights']:
# #             print('        ', flight['uuid'])

## Cross Checking

In [None]:
# ####################################################
# ############### OLD ################################
# ####################################################

# # loop through all campaigns in database
# for campaign_uuid, campaign in camp_data.items():
    
#     if campaign['short_name']=='DC3':
#         continue
        
#     print(campaign['short_name'])
#     for deployment in campaign['deployments']:
        
#         start = convert_date(deployment['start_date'])
#         end = convert_date(deployment['end_date'])
#         print('    ', start, '-', end)
        
#         print(len(deployment['flights']))
        
#         for cmr_plat_name, cmr_plat in deployment['cmr']['collection_period']['platforms'].items():
#             cmr_plat_short = cmr_plat['platform_names']['short_name']
#             print(' '*8, 'cmr plat:', cmr_plat_short)
#             cmr_inst_shorts = [data['short_name'] for key, data in cmr_plat['instruments'].items()]
#             print(' '*12, 'cmr inst:', cmr_inst_shorts)
#             dois = [data['DOI'] for data in cmr_plat['dois']]
#             print(' '*12, dois)            
            
#             for flight in deployment['flights']:
#                 flight_plat_shorts = test.gcmd_shorts('platform', flight['platform'])
# #                 print(' '*16,'camp plat:', flight_plat_shorts)
#                 flight_inst_shorts = [test.gcmd_shorts('instrument', inst_uuid) for inst_uuid in flight['instruments']]
# #               [print(' '*20, short) for short in flight_inst_shorts]
                
#                 plat_match = False
#                 if cmr_plat_short in flight_plat_shorts:
#                     print(' '*16, 'plat match found', cmr_plat_short)
#                     plat_match = True
                
#                 for cmr_inst_short in cmr_inst_shorts:
#                     if cmr_inst_short in flight_inst_shorts:
#                         print(' '*16, 'inst match found', cmr_inst_short)
#                         if plat_match == True:
#                             print(' '*20, 'flight match found', )
                
                
                
# #         cmr_data = get_deployment_and_cp(campaign['short_name'], start, end)
# #         deployment['cmr'].append(cmr_data)
# # #         for flight in deployment['flights']:
# # #             print('        ', flight['uuid'])

In [None]:
# # loop through all campaigns in database
# for campaign_uuid, campaign in camp_data.items():
    
#     if campaign['short_name']=='DC3':
#         continue
        
#     print(campaign['short_name'])
#     for deployment in campaign['deployments']:
        
#         start = convert_date(deployment['start_date'])
#         end = convert_date(deployment['end_date'])
#         print('    ', start, '-', end)
        
# #         print(len(deployment['flights']))
        
#         for cmr_plat_name, cmr_plat in deployment['cmr']['collection_period']['platforms'].items():
#             cmr_plat_short = cmr_plat['platform_names']['short_name']
# #             print(' '*8, 'cmr plat:', cmr_plat_short)
#             cmr_inst_shorts = [data['short_name'] for key, data in cmr_plat['instruments'].items()]
# #             print(' '*12, 'cmr inst:', cmr_inst_shorts)
#             dois = [data['DOI'] for data in cmr_plat['dois']]
# #             print(' '*12, dois)            
            
#             for flight in deployment['flights']:
#                 flight_plat_shorts = test.gcmd_shorts('platform', flight['platform'])
# #                 print(' '*16,'camp plat:', flight_plat_shorts)
#                 flight_inst_shorts = [test.gcmd_shorts('instrument', inst_uuid) for inst_uuid in flight['instruments']]
# #               [print(' '*20, short) for short in flight_inst_shorts]
                
#                 plat_match = False

#                 if cmr_plat_short in flight_plat_shorts:
#                     print(' '*16, 'plat match found', cmr_plat_short, dois)
#                     print(' '*16, 'plat match found', cmr_plat_short)
                    
#                     plat_match = True
#                 print(cmr_inst_shorts)
#                 print(flight_inst_shorts)
#                 print()                
#                 for cmr_inst_short in cmr_inst_shorts:
#                     if cmr_inst_short in flight_inst_shorts:
#                         print(' '*16, 'inst match found', cmr_inst_short)
#                         if plat_match == True:
#                             print(' '*20, 'flight match found', )
                
                
                
# #         cmr_data = get_deployment_and_cp(campaign['short_name'], start, end)
# #         deployment['cmr'].append(cmr_data)
# # #         for flight in deployment['flights']:
# # #             print('        ', flight['uuid'])

In [None]:
###############################
#### print out flight matches #
###############################

# loop through all campaigns in database
for campaign_uuid, campaign in camp_data.items():
    
    if campaign['short_name']=='DC3':
        continue

    if campaign['short_name']!='CARVE':
        continue
        
        
    print(campaign['short_name'])
    for deployment in campaign['deployments']:
        
        start = convert_date(deployment['start_date'])
        end = convert_date(deployment['end_date'])
        print('    ', start, '-', end)
        
#         print(len(deployment['flights']))
        for concept in deployment['cmr_alt']:
            doi = concept['doi']
            doi_short = concept['short_name']
#             print(' '*4, doi, doi_short)
            
            for cmr_plat_data in concept['platforms']:
                for cmr_plat_short, cmr_inst_short_data in cmr_plat_data.items():
                    cmr_inst_shorts = cmr_inst_short_data['instruments']
#                     print('cmr inst', cmr_inst_shorts)
#                     print(' '*8, cmr_plat_short)
#                     for cmr_inst_short in cmr_inst_shorts:
#                         print(' '*12, cmr_inst_short)     

                    for flight in deployment['flights']:
                        flight_plat_shorts = test.gcmd_shorts('platform', flight['platform'])
        #                 print(' '*16,'camp plat:', flight_plat_shorts)
                        flight_inst_shorts = [test.gcmd_shorts('instrument', inst_uuid) for inst_uuid in flight['instruments']]
                        flight_inst_shorts = [short for sub in flight_inst_shorts for short in sub]
        #               [print(' '*20, short) for short in flight_inst_shorts]
                        
                        plat_match = False

                        if cmr_plat_short in flight_plat_shorts:
                            print(' '*16, 'plat match found', cmr_plat_short, doi)

                            plat_match = True
#                         print(cmr_inst_shorts)
#                         print(flight_inst_shorts)
#                         print()       
#                         for 
                        for cmr_inst_short in cmr_inst_shorts:
                            if cmr_inst_short in flight_inst_shorts:
                                print(' '*16, 'inst match found', cmr_inst_short, doi)
                                if plat_match == True:
                                    print(' '*20, 'flight match found', flight['uuid'], doi )
                
                
                
# #         cmr_data = get_deployment_and_cp(campaign['short_name'], start, end)
# #         deployment['cmr'].append(cmr_data)
# # #         for flight in deployment['flights']:
# # #             print('        ', flight['uuid'])

In [8]:
###############################
####### add dois to db ########
###############################

# loop through all campaigns in database
for campaign_uuid, campaign in camp_data.items():
    
    if campaign['short_name']=='DC3':
        continue

#     if campaign['short_name']!='CARVE':
#         continue
        
        
    print(campaign['short_name'])
    for deployment in campaign['deployments']:
        
        start = convert_date(deployment['start_date'])
        end = convert_date(deployment['end_date'])
        print('    ', start, '-', end)
        
#         print(len(deployment['flights']))
        for concept in deployment['cmr_alt']:
            doi = concept['doi']
            doi_short = concept['short_name']
            doi_title = concept['EntryTitle'] # TODO: verify this line before integration
            doi_data = {'short_name': doi, 'long_name': doi_title}
#             print(' '*4, doi, doi_short)
            
            for cmr_plat_data in concept['platforms']:
                for cmr_plat_short, cmr_inst_short_data in cmr_plat_data.items():
                    cmr_inst_shorts = cmr_inst_short_data['instruments']
#                     print('cmr inst', cmr_inst_shorts)
#                     print(' '*8, cmr_plat_short)
#                     for cmr_inst_short in cmr_inst_shorts:
#                         print(' '*12, cmr_inst_short)     

                    for flight in deployment['flights']:
                        flight_plat_shorts = test.gcmd_shorts('platform', flight['platform'])
        #                 print(' '*16,'camp plat:', flight_plat_shorts)
#                         flight_inst_shorts = [test.gcmd_shorts('instrument', inst_uuid) for inst_uuid in flight['instruments']]
#                         flight_inst_shorts = [short for sub in flight_inst_shorts for short in sub]
        #               [print(' '*20, short) for short in flight_inst_shorts]
                        
                        plat_match = False

                        if cmr_plat_short in flight_plat_shorts:
                            print(' '*16, 'plat match found', cmr_plat_short, flight['platform'], doi)
                            test.add_link_doi('platform', flight['platform'], doi_data)

                            plat_match = True
#                         print(cmr_inst_shorts)
#                         print(flight_inst_shorts)
#                         print()       
#                         for 
                        for flight_inst_uuid in flight['instruments']:
                            flight_inst_shorts = test.gcmd_shorts('instrument', flight_inst_uuid)
                            
                            for cmr_inst_short in cmr_inst_shorts:
                                if cmr_inst_short in flight_inst_shorts:
                                    print(' '*16, 'inst match found', cmr_inst_short, flight_inst_uuid, doi)
                                    test.add_link_doi('instrument', flight_inst_uuid, doi_data)
                                    if plat_match == True:
                                        print(' '*20, 'flight match found', flight['uuid'], doi )
                                        test.add_link_doi('collection_period', flight['uuid'], doi_data)
                
                
                
# #         cmr_data = get_deployment_and_cp(campaign['short_name'], start, end)
# #         deployment['cmr'].append(cmr_data)
# # #         for flight in deployment['flights']:
# # #             print('        ', flight['uuid'])

CARVE
     2012-05-23 00:00:00 - 2012-10-30 00:00:00
                 plat match found C-23 Sherpa e0f88864-3a65-4475-ae6d-ef86db5f2651 10.3334/ORNLDAAC/1300
                 plat match found C-23 Sherpa e0f88864-3a65-4475-ae6d-ef86db5f2651 10.3334/ORNLDAAC/1434
                 plat match found C-23 Sherpa e0f88864-3a65-4475-ae6d-ef86db5f2651 10.3334/ORNLDAAC/1433
                 plat match found C-23 Sherpa e0f88864-3a65-4475-ae6d-ef86db5f2651 10.3334/ORNLDAAC/1425
                 inst match found DADS 45abac7a-f9a0-4feb-acae-421ee5cc7ade 10.3334/ORNLDAAC/1425
                     flight match found 5a2156c5-8501-4ed8-a97b-6b0caea36677 10.3334/ORNLDAAC/1425
                 plat match found C-23 Sherpa e0f88864-3a65-4475-ae6d-ef86db5f2651 10.3334/ORNLDAAC/1429
                 plat match found C-23 Sherpa e0f88864-3a65-4475-ae6d-ef86db5f2651 10.3334/ORNLDAAC/1389
     2013-02-05 00:00:00 - 2013-11-25 00:00:00
                 plat match found C-23 Sherpa e0f88864-3a65-4475-ae6d-ef8

                 inst match found AVIRIS 3a57a6dd-6899-4f52-9f2f-e69bfb32a71f 10.5067/GOESRPLT/AVIRISNG/DATA101
                     flight match found c658eff9-c212-42f2-81bc-5dac2a3c7ebc 10.5067/GOESRPLT/AVIRISNG/DATA101
                 plat match found NASA ER-2 4c5aad6a-d7fb-4bec-ab1b-fb0abadf481b 10.5067/GOESRPLT/FEGS/DATA101
                 inst match found FEGS 01cd49de-428f-462f-98b4-bd1fb70601ec 10.5067/GOESRPLT/FEGS/DATA101
                     flight match found c658eff9-c212-42f2-81bc-5dac2a3c7ebc 10.5067/GOESRPLT/FEGS/DATA101
                 plat match found NASA ER-2 4c5aad6a-d7fb-4bec-ab1b-fb0abadf481b 10.5067/GOESRPLT/GCAS/DATA101
                 inst match found LMA f8718d9e-cacd-4ec1-ad5e-273358ae9af5 10.5067/GOESRPLT/LMA/DATA301
                 plat match found NASA ER-2 4c5aad6a-d7fb-4bec-ab1b-fb0abadf481b 10.5067/GOESRPLT/LIP/DATA101
                 inst match found LIP 2bebf989-3a1c-47a1-9509-6ee87704714b 10.5067/GOESRPLT/LIP/DATA101
                     fli

                 inst match found CPI PROBES 17611f92-9afc-4ea3-9ec8-bf52e7ac110f 10.5067/GPMGV/OLYMPEX/MULTIPLE/DATA201
                     flight match found 0912985c-818a-4de2-afa3-076da4d88795 10.5067/GPMGV/OLYMPEX/MULTIPLE/DATA201
                 plat match found UND CITATION II 2bc9548b-541c-4839-8570-1c0ebe713e95 10.5067/GPMGV/OLYMPEX/NAV/DATA101
                 plat match found NASA DC-8 ffd9d478-be13-4c5c-bef2-3c2cab3216ff 10.5067/GPMGV/OLYMPEX/RADIOSONDES/DATA101
ARCTAS
     2008-04-01 00:00:00 - 2008-04-21 00:00:00
     2008-06-18 00:00:00 - 2008-06-24 00:00:00
     2008-06-26 00:00:00 - 2008-07-14 00:00:00
AirMOSS
     2012-09-18 00:00:00 - 2012-09-20 00:00:00
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1412
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1415
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1420
          

                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1412
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1413
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1414
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1415
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1420
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1418
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1366
     2013-06-10 00:00:00 - 2013-06-21 00:00:00
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1406
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1407
                 plat match found A

                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1412
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1413
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1414
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1415
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1420
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1418
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1366
     2013-11-19 00:00:00 - 2013-11-21 00:00:00
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1406
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1407
                 plat match found A

                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1413
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1414
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1415
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1418
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1366
     2014-07-08 00:00:00 - 2014-07-19 00:00:00
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1406
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1407
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1408
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1409
                 plat match found A

                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1407
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1408
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1409
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1410
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1412
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1413
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1414
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1415
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10.3334/ORNLDAAC/1418
                 plat match found AIRCRAFT d23fda6b-92a0-4094-8217-66b3c49829da 10