In [1]:
from pathlib import Path

import pandas as pd

from pdr_tests.utilz.new_coverage_utilz import (
    MetricLoader, pathtable_to_treeframe, tf_pathcounts
)

#### Load summary products for one or more manifests

**Note**: These are created with old data; they don't update just based on changes in pdr-tests. So you'll want to occasionally recreate them (TODO: add/point to instructions on how to do that) so you have updated metrics.

In [2]:
# change COVERAGE_ROOT_PATH if you've put the files somewhere else
COVERAGE_ROOT_PATH = Path.home() / "datascratch/pdr_coverage"
loader = MetricLoader(COVERAGE_ROOT_PATH / "coverage_metrics")

In [None]:
# now you can load products of whatever type, pivot, 
# and filter you like, for one or more manifests at once.

# TODO: explain explain etc.

# options are:
# pivot = one of ("dataset_pds", "dataset_ix", "ptype", "volume")
# filt = one of ("cov", "ucov", "inc", "all") 
# mtype = ("paths", "stats")

# don't specify 'filt' for stats files.

# run this if you'd like to see the names of all manifests
# with available metrics:
# ```
# measured_manifest_names(metrics_path)
# ```

#### The cell below shows PDS datasets with no coverage.

The default settings for 'loader' are `pivot="dataset_pds"` and `mtype="stats"`.
This is the most useful product type for this kind of analysis.

**Note**: You can change the ==0.0 to <=0.3 to get anything less than 30% coverage for example. You'll see that for this paticular example there is no change in the number of rows. This means any PDS datsets we have coverage for are covered at a higher rate than 30%. Which is good! We should have very high coverage in any volume we've already added definitions for!

In [3]:
# load stats products for PDS datasets at IMG-USGS.
# note that no manifests are actually named 'img_usgs' --
# they're 'img_usgs_clementine', etc. -- but the loader
# will handle a partial name, or a list of (partial) names
# as well as a complete name.

statstable = loader.load("img_usgs")
# show the ones with no coverage.
nocoverage = statstable.loc[statstable['coverage'] == 0]
nocoverage

Unnamed: 0,dataset_pds,n_inc,n_cov,n_ucov,coverage,n,volume,ptype,dataset_ix,manifest
0,dawn-a-fc2-2-edr-ceres-images,251074,0,251074,0.0,423170,"[Missions/Dawn/Ceres/DWNCAFC2_1A, Missions/Daw...",,,img_usgs_dawn
1,dawn-a-fc2-2-edr-vesta-images;dawn-a-fc2-3-rdr...,61330,0,61330,0.0,61340,"[Missions/Dawn/DWNVFC2_1B, Missions/Dawn/Vesta...",,,img_usgs_dawn
0,vg1/vg2-s-iss-2/3/4/6-processed,65396,0,65396,0.0,66789,"[Missions/Voyager/VGISS_0004, Missions/Voyager...",,,img_usgs_voyager
0,mess-h-mdis-5-dem-elevation,1531,0,1531,0.0,1643,[Missions/MESSENGER/MESSDEM_1001],,,img_usgs_messenger
1,mess-e/v/h-mdis-2-edr-rawdata,291008,0,291008,0.0,291331,[Missions/MESSENGER/MSGRMDS_1001],,,img_usgs_messenger
...,...,...,...,...,...,...,...,...,...,...
3,rs_moon_vis_15;a15lcl-l-vis-6-rock-sample-images,17884,0,17884,0.0,44738,[Missions/Apollo/Lunar_Sample_Photographs/A15V...,,,img_usgs_apollo
4,a17lcl-l-vis-6-rock-sample-images,11426,0,11426,0.0,28924,[Missions/Apollo/Lunar_Sample_Photographs/A17V...,,,img_usgs_apollo
5,a15c-l-mc-2-scanned-images,9768,0,9768,0.0,13422,"[Missions/Apollo/Metric_Camera/A15MC_0001, Mis...",,,img_usgs_apollo
6,a16c-l-mc-2-scanned-images,4994,0,4994,0.0,6815,[Missions/Apollo/Metric_Camera/A16MC_0001],,,img_usgs_apollo


In [4]:
# Volumes -- which are partial URLs at the respective node -- are expressed 
# as lists in dataset_pds stats products, which can be
# hard to read. To get a clearer look, we can get a flattened list out of a 
# stats table by doing this:
vols_for_uncov_datasets = nocoverage['volume'].explode().unique()
vols_for_uncov_datasets[:10]

array(['Missions/Dawn/Ceres/DWNCAFC2_1A',
       'Missions/Dawn/Ceres/DWNCHFC2_1A',
       'Missions/Dawn/Ceres/DWNCLFC2_1A',
       'Missions/Dawn/Ceres/DWNCSFC2_1A', 'Missions/Dawn/DWNCAFC2_1A',
       'Missions/Dawn/DWNCHFC2_1A', 'Missions/Dawn/DWNCLFC2_1A',
       'Missions/Dawn/DWNCSFC2_1A', 'Missions/Dawn/DWNVFC2_1B',
       'Missions/Dawn/Vesta/DWNVFC2_1B'], dtype=object)

In [5]:
# Of course, some PDS volumes are really, really large and
# contain many different datasets and product types. 
# We can drill down using the paths tables. 

# load paths products for uncovered datasets at IMG-USGS.
# this is probably going to make a big table!
img_ucov_paths = loader.load("img_usgs", mtype="paths", filt="ucov")
img_ucov_paths

Unnamed: 0,field,name,path,extension,inc,n,manifest
0,dataset_pds,vg1/vg2-s-iss-2/3/4/6-processed,Missions/Voyager/VGISS_0004/DIONE,IMG,56,56,img_usgs_voyager
1,dataset_pds,vg1/vg2-s-iss-2/3/4/6-processed,Missions/Voyager/VGISS_0004/DIONE,LBL,56,56,img_usgs_voyager
2,dataset_pds,vg1/vg2-s-iss-2/3/4/6-processed,Missions/Voyager/VGISS_0004/ENCELADU,IMG,31,31,img_usgs_voyager
3,dataset_pds,vg1/vg2-s-iss-2/3/4/6-processed,Missions/Voyager/VGISS_0004/ENCELADU,LBL,31,31,img_usgs_voyager
4,dataset_pds,vg1/vg2-s-iss-2/3/4/6-processed,Missions/Voyager/VGISS_0004/EPIMETHE,IMG,10,10,img_usgs_voyager
...,...,...,...,...,...,...,...
4817,dataset_pds,a17c-l-mc-2-scanned-images,Missions/Apollo/Metric_Camera/A17MC_0001/DATA/...,tif,145,145,img_usgs_apollo
4818,dataset_pds,a17c-l-mc-2-scanned-images,Missions/Apollo/Metric_Camera/A17MC_0001/DATA/...,lbl,148,148,img_usgs_apollo
4819,dataset_pds,a17c-l-mc-2-scanned-images,Missions/Apollo/Metric_Camera/A17MC_0001/DATA/...,tif,148,148,img_usgs_apollo
4820,dataset_pds,a17c-l-mc-2-scanned-images,Missions/Apollo/Metric_Camera/A17MC_0001/DATA/...,lbl,107,107,img_usgs_apollo


In [6]:
# then, if you'd like to drill down to a specific dataset...

# (this does not need to actually be random)
uncovered_set = nocoverage['dataset_pds'].sample().iloc[0]
print(uncovered_set)

# the 'name' field of a paths table always contains the pivot value 
set_paths = img_ucov_paths.loc[img_ucov_paths['name'] == uncovered_set]
set_paths

ody-m-thm-3-visrdr;ody-m-thm-2-visedr;ody-m-thm-2-iredr;ody-m-thm-3-visabr;ody-m-thm-3-irrdr;ody-m-thm-3-irbtr


Unnamed: 0,field,name,path,extension,inc,n,manifest
0,dataset_pds,ody-m-thm-3-visrdr;ody-m-thm-2-visedr;ody-m-th...,Missions/Mars_Odyssey/THEMIS/USA_NASA_PDS_ODTS...,IMG,92,92,img_usgs_mars_odyssey
1,dataset_pds,ody-m-thm-3-visrdr;ody-m-thm-2-visedr;ody-m-th...,Missions/Mars_Odyssey/THEMIS/USA_NASA_PDS_ODTS...,IMG,73,73,img_usgs_mars_odyssey
2,dataset_pds,ody-m-thm-3-visrdr;ody-m-thm-2-visedr;ody-m-th...,Missions/Mars_Odyssey/THEMIS/USA_NASA_PDS_ODTS...,IMG,71,71,img_usgs_mars_odyssey
3,dataset_pds,ody-m-thm-3-visrdr;ody-m-thm-2-visedr;ody-m-th...,Missions/Mars_Odyssey/THEMIS/USA_NASA_PDS_ODTS...,IMG,79,79,img_usgs_mars_odyssey
4,dataset_pds,ody-m-thm-3-visrdr;ody-m-thm-2-visedr;ody-m-th...,Missions/Mars_Odyssey/THEMIS/USA_NASA_PDS_ODTS...,IMG,198,198,img_usgs_mars_odyssey
...,...,...,...,...,...,...,...
7469,dataset_pds,ody-m-thm-3-visrdr;ody-m-thm-2-visedr;ody-m-th...,Missions/Mars_Odyssey/THEMIS/USA_NASA_PDS_ODTS...,QUB,555,555,img_usgs_mars_odyssey
7470,dataset_pds,ody-m-thm-3-visrdr;ody-m-thm-2-visedr;ody-m-th...,Missions/Mars_Odyssey/THEMIS/USA_NASA_PDS_ODTS...,QUB,405,405,img_usgs_mars_odyssey
7471,dataset_pds,ody-m-thm-3-visrdr;ody-m-thm-2-visedr;ody-m-th...,Missions/Mars_Odyssey/THEMIS/USA_NASA_PDS_ODTS...,QUB,490,490,img_usgs_mars_odyssey
7472,dataset_pds,ody-m-thm-3-visrdr;ody-m-thm-2-visedr;ody-m-th...,Missions/Mars_Odyssey/THEMIS/USA_NASA_PDS_ODTS...,QUB,259,259,img_usgs_mars_odyssey


In [7]:
# you can use this to find paths to explore, to help see what we
# should exclude that we're not excluding (or include that we're 
# not including), etc. 

# for exapmle, here's a summary of the file extensions:
set_paths['extension'].value_counts()

extension
QUB          5794
IMG          1666
btupd           6
backup          5
IMG_v1          1
gapdb           1
QUB_delGQ       1
Name: count, dtype: int64

In [9]:
# pdr-tests also includes a convenience function to help you slice it up and 
# get a look at the directory structure:
tf = pathtable_to_treeframe(set_paths)
tf

Unnamed: 0,0,1,2,3,4,5,6,filename,extension
0,Missions,Mars_Odyssey,THEMIS,USA_NASA_PDS_ODTSDP_100XX,ODTSDP_10001,data,odtib1_0001,i008xxbtr,IMG
1,Missions,Mars_Odyssey,THEMIS,USA_NASA_PDS_ODTSDP_100XX,ODTSDP_10001,data,odtib1_0001,i009xxbtr,IMG
2,Missions,Mars_Odyssey,THEMIS,USA_NASA_PDS_ODTSDP_100XX,ODTSDP_10001,data,odtib1_0001,i010xxbtr,IMG
3,Missions,Mars_Odyssey,THEMIS,USA_NASA_PDS_ODTSDP_100XX,ODTSDP_10001,data,odtib1_0001,i011xxbtr,IMG
4,Missions,Mars_Odyssey,THEMIS,USA_NASA_PDS_ODTSDP_100XX,ODTSDP_10001,data,odtib1_0001,i012xxbtr,IMG
...,...,...,...,...,...,...,...,...,...
7469,Missions,Mars_Odyssey,THEMIS,USA_NASA_PDS_ODTSDP_100XX,ODTSDP_10077,data,odtie1_0077,i841xxedr,QUB
7470,Missions,Mars_Odyssey,THEMIS,USA_NASA_PDS_ODTSDP_100XX,ODTSDP_10077,data,odtie1_0077,i842xxedr,QUB
7471,Missions,Mars_Odyssey,THEMIS,USA_NASA_PDS_ODTSDP_100XX,ODTSDP_10077,data,odtie1_0077,i843xxedr,QUB
7472,Missions,Mars_Odyssey,THEMIS,USA_NASA_PDS_ODTSDP_100XX,ODTSDP_10077,data,odtie1_0077,i844xxedr,QUB
