In [1]:
from pathlib import Path
import re
import pandas as pd
from gdstools import image_collection, find

In [2]:

def paths_to_dict(paths_list, idx):
    """
    Create a dictionary from a list of paths.

    :param paths_list: A list of file paths.
    :type paths_list: list
    :param idx: The index of the path to use for the collection name.
    :type idx: int
    :return: A dictionary with the collection name as the key and a list of file paths as the value.
    :rtype: dict
    """
    _dict = {}
    for p in paths_list:
        plist = p.split('/')
        # expects name in format cellid_year_state_agency/dataset
        nameparts = Path(p).stem.split('_')
        collection = plist[idx + 1]
        year = nameparts[1]
        if not re.match(r'^\d+$', year):
            _dict.setdefault(collection, []).append(p)
        else:
            _dict.setdefault(collection, {}).setdefault(year, []).append(p)
    return _dict


In [3]:
naip = image_collection('/mnt/data/FESDataRepo/stac_plots/data/naip/', file_pattern='*.tif')

In [4]:
naip_dict = paths_to_dict(naip, 5)

In [5]:
naip_counts = []
for key, value in naip_dict['naip'].items():
    ee_imgs = len([s for s in value if re.search('DOQQ', s)])
    usfs = len([s for s in value if re.search('USFS', s)])
    bml = len([s for s in value if re.search('BLM', s)])
    wadnr = len([s for s in value if re.search('WA-DNR', s)])
    naip_counts.append([key, len(value), ee_imgs, len(value) - ee_imgs, usfs, bml, wadnr])

In [6]:
pd.DataFrame(naip_counts, columns=['year', 'total', 'gee', 'noaa', 'usfs', 'blm', 'wadnr']).sort_values(by='year')

Unnamed: 0,year,total,gee,noaa,usfs,blm,wadnr
4,2009,5086,5086,0,1223,1376,2487
3,2011,5088,5088,0,1223,1379,2486
9,2013,2920,2920,0,433,0,2487
10,2014,2179,2179,0,788,1379,12
8,2015,2926,2926,0,439,0,2487
11,2016,2179,2179,0,788,1379,12
0,2017,2925,2925,0,438,0,2487
7,2018,2,2,0,2,0,0
5,2019,2621,2621,0,386,0,2235
6,2020,4331,2178,2153,1562,2758,11


In [7]:
res = []
for key, value in naip_dict['naip'].items():
    filenames = [Path(s).name for s in value]
    surveys = [n.split('_')[2] for n in filenames]
    uuids = [n.split('_')[0] for n in filenames]
    res.append(pd.DataFrame(
        {
            'year': [key]*len(value), 
            'survey': surveys,
            'uuid': uuids, 
            'filename': filenames, 
            'path': value
        }
    ))


In [8]:
df = pd.concat(res).sort_values(by='year')
df

Unnamed: 0,year,survey,uuid,filename,path
4165,2009,WA-DNR,77fdf844,77fdf844_2009_WA-DNR_NAIP_DOQQ-cog.tif,/mnt/data/FESDataRepo/stac_plots/data/naip/200...
1691,2009,BLM-ROGUE,b2794b40,b2794b40_2009_BLM-ROGUE_NAIP_DOQQ-cog.tif,/mnt/data/FESDataRepo/stac_plots/data/naip/200...
1692,2009,BLM-ROGUE,82adc9ca,82adc9ca_2009_BLM-ROGUE_NAIP_DOQQ-cog.tif,/mnt/data/FESDataRepo/stac_plots/data/naip/200...
1693,2009,WA-DNR,8e5adec6,8e5adec6_2009_WA-DNR_NAIP_DOQQ-cog.tif,/mnt/data/FESDataRepo/stac_plots/data/naip/200...
1694,2009,BLM-COOS,d77ccad5,d77ccad5_2009_BLM-COOS_NAIP_DOQQ-cog.tif,/mnt/data/FESDataRepo/stac_plots/data/naip/200...
...,...,...,...,...,...
1010,2022,BLM-LANE,a40ae4c6,a40ae4c6_2022_BLM-LANE_NAIP_DOQQ-cog.tif,/mnt/data/FESDataRepo/stac_plots/data/naip/202...
1011,2022,USFS-UMPQUA,a2cfa176,a2cfa176_2022_USFS-UMPQUA_NAIP_DOQQ-cog.tif,/mnt/data/FESDataRepo/stac_plots/data/naip/202...
1012,2022,USFS-FREMONT-WINEMA,71ed4d9c,71ed4d9c_2022_USFS-FREMONT-WINEMA_NAIP_DOQQ-co...,/mnt/data/FESDataRepo/stac_plots/data/naip/202...
1005,2022,USFS-WENATCHEE,7f4bb474,7f4bb474_2022_USFS-WENATCHEE_NAIP_NOAA-cog.tif,/mnt/data/FESDataRepo/stac_plots/data/naip/202...


In [9]:
df[find('NOAA', df.filename)]

Unnamed: 0,year,survey,uuid,filename,path
441,2020,BLM-COOS,24083172,24083172_2020_BLM-COOS_NAIP_NOAA-cog.tif,/mnt/data/FESDataRepo/stac_plots/data/naip/202...
1130,2020,BLM-COOS,62e8e024,62e8e024_2020_BLM-COOS_NAIP_NOAA-cog.tif,/mnt/data/FESDataRepo/stac_plots/data/naip/202...
1129,2020,USFS-UMPQUA,fe1ec4b9,fe1ec4b9_2020_USFS-UMPQUA_NAIP_NOAA-cog.tif,/mnt/data/FESDataRepo/stac_plots/data/naip/202...
1128,2020,BLM-LANE,b806aeb5,b806aeb5_2020_BLM-LANE_NAIP_NOAA-cog.tif,/mnt/data/FESDataRepo/stac_plots/data/naip/202...
1127,2020,BLM-COOS,1af9a249,1af9a249_2020_BLM-COOS_NAIP_NOAA-cog.tif,/mnt/data/FESDataRepo/stac_plots/data/naip/202...
...,...,...,...,...,...
1060,2022,USFS-WENATCHEE,0d70865d,0d70865d_2022_USFS-WENATCHEE_NAIP_NOAA-cog.tif,/mnt/data/FESDataRepo/stac_plots/data/naip/202...
1109,2022,WA-DNR,02297a4c,02297a4c_2022_WA-DNR_NAIP_NOAA-cog.tif,/mnt/data/FESDataRepo/stac_plots/data/naip/202...
1019,2022,USFS-WENATCHEE,29e85728,29e85728_2022_USFS-WENATCHEE_NAIP_NOAA-cog.tif,/mnt/data/FESDataRepo/stac_plots/data/naip/202...
1020,2022,WA-DNR,536d7a64,536d7a64_2022_WA-DNR_NAIP_NOAA-cog.tif,/mnt/data/FESDataRepo/stac_plots/data/naip/202...


In [10]:
# Do agency plots intersect with other agency plots?
import geopandas as gpd

plots = gpd.read_file('../data/dev/features/plot_features.geojson')

res = []
agency = 'WADNR'
for geom in plots[plots.agency == agency].geometry:
    res.append(sum(plots[plots.agency != agency].intersects(geom)))

if sum(res) == 0:
    print(f'No {agency} plots intersect with other agency plots.')

No WADNR plots intersect with other agency plots.


In [11]:
# List all plot years
pd.read_csv('../data/dev/features/inventory_features.csv').year.unique()

array([2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020,
       2021, 2022, 2023, 2024, 2025, 2026, 2027])