# Imports

In [1]:
from helper_functions import * 
from datetime import datetime

# User Input

In [3]:
short_name='above'
dep_start = datetime(2015,1,2)
dep_end = datetime(2016,1,1)

# Pipeline

In [4]:
# initial api query
campaign_trees = ingest_campaign(short_name)
campaign_metadata = campaign_xlm_json(campaign_trees)

In [8]:
# for use in deployment/flight level assignments|
filtered_metadata = date_filter(campaign_metadata, dep_start, dep_end)

In [9]:
db = {}

In [13]:
# ContactPersons
# possible contact persons, might be suggested for serveral fields
# Campaign.technical_contact
# Campaign.project_lead
# Campaign.program_lead
# Instrument.technical_contact
# Instrument.lead_investigator
# flight.platform_technical_contact

# db['campaign']={}
# db['campaign']['contacts']=general_extractor(campaign_metadata, 'ContactPersons')

In [14]:
# # Abstract
# # could help when writing the description 
# db['campaign']['description_long']=general_extractor(campaign_metadata, 'Abstract')

In [15]:
# DOI
# this doesn't have a direct match. Need to ask Stephanie if she wants it
dois = general_extractor(campaign_metadata, 'DOI')
processed_dois=[item.get('DOI',None) for item in dois]
db['campaign']['dois'] = processed_dois

In [16]:
db['campaign']['dois']

['10.3334/ORNLDAAC/1617',
 '10.5067/TLOIRQYWX00H',
 '10.5067/4O5WY1ORYWK2',
 '10.5067/UMRAWS57QAFU',
 '10.5067/IA5WAX7K3YGY',
 '10.3334/ORNLDAAC/1657',
 '10.3334/ORNLDAAC/1676',
 '10.3334/ORNLDAAC/1760',
 '10.3334/ORNLDAAC/1643',
 '10.3334/ORNLDAAC/1646',
 '10.3334/ORNLDAAC/1655',
 '10.3334/ORNLDAAC/1707',
 '10.3334/ORNLDAAC/1698',
 '10.3334/ORNLDAAC/1724',
 '10.3334/ORNLDAAC/1717',
 '10.3334/ORNLDAAC/1658',
 '10.3334/ORNLDAAC/1545',
 '10.3334/ORNLDAAC/1694',
 '10.3334/ORNLDAAC/1548',
 '10.3334/ORNLDAAC/1307',
 '10.3334/ORNLDAAC/1740',
 '10.3334/ORNLDAAC/1595',
 '10.3334/ORNLDAAC/1664',
 '10.3334/ORNLDAAC/1606',
 '10.3334/ORNLDAAC/1562',
 '10.3334/ORNLDAAC/1700',
 '10.3334/ORNLDAAC/1526',
 '10.3334/ORNLDAAC/1640',
 '10.3334/ORNLDAAC/1602',
 '10.3334/ORNLDAAC/1583',
 '10.3334/ORNLDAAC/1582',
 '10.3334/ORNLDAAC/1761',
 '10.3334/ORNLDAAC/1695',
 '10.3334/ORNLDAAC/1702',
 '10.3334/ORNLDAAC/1705',
 '10.3334/ORNLDAAC/1739',
 '10.3334/ORNLDAAC/1362',
 '10.3334/ORNLDAAC/1565',
 '10.3334/ORNLDA

In [64]:
# SpatialExtents
db['campaign']['spatial_bounds']=general_extractor(campaign_metadata, 'SpatialExtent')
# TODO: this needs more processing in the combine_spatial_extents() function 

In [55]:
# LocationKeywords
# this doesn't have a direct match. Need to ask Stephanie if she wants it
# otherwise, we can parse these and put them into the campaign.region_description

nested_regions = general_extractor(campaign_metadata, 'LocationKeywords')

# json.dumps allows us to take the set of the dictionaries
# the list comprehension is unpacking the nested entries
regions_json = set([json.dumps(region) 
                 for region_list in nested_regions 
                     for region in region_list])
regions_dict = [json.loads(region) for region in regions]

db['campaign']['gcmd_region'] = regions_dict

In [57]:
# Platforms
db['deployment'] = extract_inst_plat(filtered_metadata)
# TODO: maybe make a flight from each platform and assign it instruments?

In [58]:
db['deployment']

{'FIELD INVESTIGATION': {'Computer',
  'PROBES',
  'SOIL SAMPLER',
  'VISUAL OBSERVATIONS'},
 'G-III': {'P-SAR', 'RADAR', 'UAVSAR'},
 'Environmental Modeling': {'Computer'},
 'SATELLITES': {'MODIS'},
 'SMAP': {'SMAP L-BAND RADAR'},
 'B-200': {'AirSWOT', 'CAMERA'},
 'LANDSAT': {'DIGITIZER', 'MSS', 'TM'},
 'FIELD SURVEYS': {'CAMERAS',
  'CLINOMETERS',
  'CO2 ANALYZERS',
  'Computer',
  'GNSS RECEIVER',
  'GPS',
  'IR CO2 ANALYZER',
  'LICOR QUANTUM SENSOR',
  'LICOR SOIL GAS CHAMBER',
  'MAGNAPROBE',
  'NET RADIOMETERS',
  'PROBES',
  'SOIL DEPTH PROBE',
  'SOIL MOISTURE PROBE',
  'SOIL SAMPLER',
  'SOIL TEMPERATURE PROBE',
  'STEEL MEASURING TAPE',
  'TEMPERATURE LOGGERS',
  'THERMOCOUPLES',
  'THERMOMETERS',
  'VISUAL OBSERVATIONS'},
 'LANDSAT-8': {'OLI', 'TIRS'},
 'AQUA': {'AMSR-E', 'MODIS'},
 'TERRA': {'MODIS'},
 'LABORATORY': {'CARBON ANALYZERS', 'MASS SPECTROMETERS'},
 'NOAA POES': {'SSM/I'},
 'DMSP': {'AVHRR', 'SSM/I'},
 'GROUND STATIONS': {'ANEMOMETERS',
  'EDDY CORRELATION DEVIC

In [59]:
platforms = {}
for data_product in campaign_metadata:

    for platform_info in data_product['metadata']['Platforms']:
        platform_short = platform_info['ShortName']
        platform_long = platform_info['LongNameName']
        platform_reference_name = platform_short + '_&_' + platform_long
        platforms[platform_reference_name] = platforms.get(platform_reference_name, [])

        # many satellites don't have instrument metadata
        if 'Instruments' in platform_info.keys():
            for instrument_info in platform_info['Instruments']:
                instrument_short_name = instrument_info['ShortName']
                platforms[platform_short_name].append(instrument_short_name)
        else:
            pass
            # print(f'{platform_short_name} has no instruments')
            # print()

for platform_name in platforms.keys():
    platforms[platform_name] = set(platforms[platform_name])

In [66]:
campaign_metadata[1]['metadata']['Platforms']

[{'Type': 'Aircraft',
  'ShortName': 'B-200',
  'LongName': 'Beechcraft King Air B-200',
  'Characteristics': [{'Name': 'AircraftID',
    'Description': 'The identifier of the airplane used by the FAA to uniquely identify each aircraft',
    'Value': 'N529NA',
    'Unit': 'Not Applicable',
    'DataType': 'STRING'}],
  'Instruments': [{'ShortName': 'LVIS',
    'LongName': 'Land, Vegetation, and Ice Sensor',
    'Technique': 'instrument',
    'NumberOfInstruments': 1,
    'ComposedOf': [{'ShortName': 'LVIS',
      'LongName': 'Land, Vegetation, and Ice Sensor'}]}]},
 {'Type': 'Aircraft',
  'ShortName': 'C-130',
  'LongName': 'Cessna 130',
  'Characteristics': [{'Name': 'AircraftID',
    'Description': 'The identifier of the airplane used by the FAA to uniquely identify each aircraft',
    'Value': 'N439NA',
    'Unit': 'Not Applicable',
    'DataType': 'STRING'}],
  'Instruments': [{'ShortName': 'LVIS',
    'LongName': 'Land, Vegetation, and Ice Sensor',
    'Technique': 'instrument',
 

In [79]:
(2560*1440)/(1080*1920)

1.7777777777777777

In [77]:
(3840*2160)/(1080*1920)

4.0

4.0

In [None]:
# CollectionCitations
citations = general_extractor(campaign_metadata, 'CollectionCitations')
db['campaign']['publications']=[
    citation 
        for citation_list in citations
            for citation in citation_list 
                if citation.get('Title')
]
# this has more information in it than we currently track for campaign.publications...
# maybe we can add some more granularity to that field?

In [None]:
# ScienceKeywords
db['campaign']['gcmd_phenomena']=general_extractor(campaign_metadata, 'ScienceKeywords')

In [None]:
# RelatedUrls
db['campaign']['other_resources']=general_extractor(campaign_metadata, 'RelatedUrls')
# this contains a 'Description' of the resource as well as the 'URL'
# perhaps we should implement an other_resource description in our db?

In [None]:
# DataCenters
# There are multiple DAAC roles, ARCHIVER, DISTRIBUTOR, PROCESSOR, ORIGINATOR.
# Which ones do we care about

role_filter=['ARCHIVER', 'DISTRIBUTOR', 'PROCESSOR', 'ORIGINATOR']

mega_daac_list=general_extractor(campaign_metadata, 'DataCenters')
db['campaign']['repositories']=[
    daac['ShortName']
        for daac_list in mega_daac_list
             for daac in daac_list
                 if daac['Roles'][0] in role_filter
        ]