In [40]:
import requests
import time
import glob
import os
import fnmatch
import xml
import xml.etree.ElementTree as ET
import argparse
import NETCDF4

In [41]:
def companion_missing(granule_name, companion_names, granule_pattern, companion_pattern):
    name_trunk = granule_name.split('.')[0:-1] #only remove .nc
    pattern = '.'.join(name_trunk) + companion_pattern #create full file name with _stare.nc
    companion_name = fnmatch.filter(companion_names, pattern)
    if len(companion_name) == 0:
        return True

In [42]:
def get_lonely_granules(granule_folder, companion_folder, granule_pattern, companion_pattern):
    granule_names = []
    for file in glob.glob(os.path.expanduser(granule_folder) + granule_pattern + '*'):
        if file[-8:] != 'stare.nc':
            granule_names.append(file) #filter out stare files from granule
    granule_names = sorted(granule_names)
    #granule_names = sorted(glob.glob(os.path.expanduser(granule_folder) + granule_pattern + '*'))
    companion_names = sorted(glob.glob(os.path.expanduser(companion_folder) + '*' + companion_pattern)) #switched wildcard order to call stare files
    missing = []
    for granule_name in granule_names:
        if companion_missing(granule_name, companion_names, granule_pattern, companion_pattern):
            granule_name = granule_name.split('/')[-1]            
            missing.append(granule_name)
            print('missing companion for: ' + granule_name)
    return missing

In [43]:
granule_folder = 'data/'
companion_folder = 'data/'
granule_pattern = 'CLD'
companion_pattern = '_stare.nc'

In [44]:
get_lonely_granules(granule_folder, companion_folder, granule_pattern, companion_pattern)

missing companion for: CLDMSK_L2_VIIRS_SNPP.A2018167.0712.001.2019064063933.nc
missing companion for: CLDMSK_L2_VIIRS_SNPP.A2020099.0518.001.2020099131351.nc
missing companion for: CLDMSK_L2_VIIRS_SNPP.A2020219.0742.001.2020219190616.nc
missing companion for: CLDMSK_L2_VIIRS_SNPP.A2020234.0442.001.2020234130517.nc


['CLDMSK_L2_VIIRS_SNPP.A2018167.0712.001.2019064063933.nc',
 'CLDMSK_L2_VIIRS_SNPP.A2020099.0518.001.2020099131351.nc',
 'CLDMSK_L2_VIIRS_SNPP.A2020219.0742.001.2020219190616.nc',
 'CLDMSK_L2_VIIRS_SNPP.A2020234.0442.001.2020234130517.nc']

In [45]:
granule_pattern = 'VNP02'
get_lonely_granules(granule_folder, companion_folder, granule_pattern, companion_pattern)

missing companion for: VNP02DNB.A2018167.0712.001.2018170061449.nc
missing companion for: VNP02DNB.A2020099.0518.001.2020105152822.nc
missing companion for: VNP02DNB.A2020219.0742.001.2020219125654.nc
missing companion for: VNP02DNB.A2020234.0442.001.2020234113021.nc


['VNP02DNB.A2018167.0712.001.2018170061449.nc',
 'VNP02DNB.A2020099.0518.001.2020105152822.nc',
 'VNP02DNB.A2020219.0742.001.2020219125654.nc',
 'VNP02DNB.A2020234.0442.001.2020234113021.nc']

In [46]:
granule_pattern = 'VNP03'
get_lonely_granules(granule_folder, companion_folder, granule_pattern, companion_pattern)

[]

In [47]:
granule_pattern = 'VNP'
companion_pattern = '_stare.nc'

In [48]:
get_lonely_granules('data/', 'data/', granule_pattern, companion_pattern)

missing companion for: VNP02DNB.A2018167.0712.001.2018170061449.nc
missing companion for: VNP02DNB.A2020099.0518.001.2020105152822.nc
missing companion for: VNP02DNB.A2020219.0742.001.2020219125654.nc
missing companion for: VNP02DNB.A2020234.0442.001.2020234113021.nc


['VNP02DNB.A2018167.0712.001.2018170061449.nc',
 'VNP02DNB.A2020099.0518.001.2020105152822.nc',
 'VNP02DNB.A2020219.0742.001.2020219125654.nc',
 'VNP02DNB.A2020234.0442.001.2020234113021.nc']

In [49]:
import netCDF4
netcdf = netCDF4.Dataset('data/VNP03DNB.A2020234.0442.001.2020234111916_stare.nc', 'r', format = 'NETCDF4')

In [50]:
def variable_checker(netcdf):
    stare_variables = ['Latitude_750m', 'Longitude_750m', 'STARE_cover_750m', 'STARE_index_750m']
    for i in stare_variables:
        if netcdf.variables[i] is None:
            return True
            #print(netcdf + 'is missing variable' + i)
        #else:
            #pass


In [51]:
def find_missing_variables(companion_folder, companion_pattern):
    companion_names = sorted(glob.glob(os.path.expanduser(companion_folder) + '*' + companion_pattern)) #switched wildcard order to call stare files
    for companion_name in companion_names:
        missing = []
        netcdf = netCDF4.Dataset(companion_name, 'r', format = 'NETCDF4')
        if variable_checker(netcdf):
            companion_name = companion_name.split('/')[-1]
            missing.append(companion_name)
    return missing
        

In [52]:
find_missing_variables(companion_folder, companion_pattern)

[]

In [54]:
if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Finds and retrieves missing geolocation companion files')
    parser.add_argument('--granule_folder', type=str, help='Granule folder (e.g. location of VNP02DNB, VNP03DNB, or CLDMSK)', required=True)
    parser.add_argument('--companion_folder', type=str, help='Companion folder (e.g. location of *_stare.nc). Default: granule_folder')
    
    parser.add_argument('--granule_pattern', type=str, help='Pattern of the granule name (e.g. VNP02DNB, VNP03DNB, or CLDMSK)', required=True)
    parser.add_argument('--companion_pattern', type=str, help='Pattern of the companion name (e.g _stare.nc)', required=True)
    

    args = parser.parse_args()
   
    if args.companion_folder is None:
        args.companion_folder = args.granule_folder 

            
        
    lonely_granules = get_lonely_granules(granule_folder=args.granule_folder, companion_folder=args.companion_folder, 
                                          granule_pattern=args.granule_pattern, companion_pattern=args.companion_pattern)
    
    print('{n} missing companions'.format(n=len(lonely_granules)))
    
    missing_variables = find_missing_variables(companion_folder=args.companion_folder, companion_pattern=args.companion_pattern)
    
    print('{n} files are missing variables'.format(n=len(missing_variables)))

usage: ipykernel_launcher.py [-h] --granule_folder GRANULE_FOLDER
                             [--companion_folder COMPANION_FOLDER]
                             --granule_pattern GRANULE_PATTERN
                             --companion_pattern COMPANION_PATTERN
ipykernel_launcher.py: error: the following arguments are required: --granule_folder, --granule_pattern, --companion_pattern


SystemExit: 2