In [1]:
from skmap.io import RasterData
from eumap.misc import ttprint

bands = ['blue','green','red','nir','swir1','swir2','thermal']
tile = '007E_48N'
raster_files = {}

for b in bands:
    raster_files[b] = f'http://192.168.49.30:8333/prod-landsat-ard2/{tile}/seasconv/{b}_glad.SeasConv.ard2_m_30m_s_' + '{dt}_go_epsg.4326_v20230908.tif'

rdata = RasterData(raster_files,verbose=True
        ).timespan('20000101', '20011231', date_unit='months', date_step=2, ignore_29feb=True)

rdata.info['input_path'] = rdata.info.apply(lambda x: x['input_path'].replace('192.168.49.30', f'192.168.49.{30+x.name%13}'), axis=1)

rdata = rdata.read(n_jobs=60)

# rescale the values to [0,1]
rdata.array = rdata.array/251  #https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/atoms/files/ard_metadata_v1_0.xsd_.xml


In [None]:
def pixel_phenology(pixel, rdata):
    i, j = pixel
    ndvi = rdata[i, j, :]
    min_height = 0.5*10000  # NDVI higher than 0.5 could be seen as a peak
    min_prominence = 0.25*10000  # Minimum prominence is 0.2 (peak must be at least 0.2 more than neibouring troughs)
    min_distance = 1  # 1*2 months, minimum between each peak
    
    # ndvi = pd.Series(ndvi_series).interpolate()
    
    peaks, _ = find_peaks(ndvi, height=min_height, prominence=min_prominence, distance=min_distance)
    nos = len(peaks)
    
    if nos > 0:
        mean = np.mean(ndvi[peaks])
        los = np.sum(ndvi>mean*0.5) / len(ndvi)
    else:
        los = 0

    # mean = np.mean(ndvi_series[peaks])
    return i, j, nos, los

In [4]:
from skmap.io import process

rdata = rdata.run(
                process.Calc( #
                    expressions = {
                        'ndvi': '((nir - red) / (nir + red))',
                        'ndwi': '((green - swir2)/(green + swir2))',
                        'savi': '(nir - red)*1.5 / (nir + red + 0.5)',
                        'msavi': '(2 * nir + 1 - sqrt((2 * nir + 1)**2 - 8 * (nir - red))) / 2',
                        'nbr': '(nir - thermal) / (nir + thermal)',
                        'ndmi': '(nir - swir1) / (nir + swir1)',
                        'nbr2': '(swir1 - thermal) / (swir1 + thermal)',
                        'rei': '(nir-blue)/(nir+blue*nir)',
                        'bsi': '((swir1+red)-(nir+blue))/((swir1+red)+(nir+blue))',
                        'ndti': '(swir1-swir2)/(swir1+swir2)',
                        'ndsi': '(green-swir1)/(green+swir1)',
                        'ndsmi': '(nir-swir2)/(nir-swir2)',
                        'nirv': '(ndvi - 0.08)*nir',
                        'evi': '2.5 * (nir-red) / (nir + 6 * red - 7.5 * blue + 1)',
                        'fapar': '(((ndvi - 0.03)*(0.95 - 0.001))/(0.96 - 0.03)) + 0.001',
                        'bs': 'where(ndvi <= 0.35, 1, 0)'
                    },
                    n_jobs = 60
                )
            ).run(process.TimeAggregate(time=[process.TimeEnum.YEARLY], operations = ['p25', 'p50', 'p75'], n_jobs = 60), group=bands, drop_input=True
            ).run(process.TimeAggregate(time=[process.TimeEnum.YEARLY], operations = ['sum'], n_jobs = 60), group='bs', drop_input=True
            # ).rename(groups = {'bs.yearly.sum':'bsf'}
            ).run(process.Calc(expressions = {'bsf': 'bs.yearly.sum * 100 / 6'}, n_jobs = 60), drop_input=True)



[13:18:15] Running Calc on (4004, 4004, 84)
[13:20:04] Execution time for Calc: 108.79 segs
[13:20:07] Running TimeAggregate on (4004, 4004, 12) for blue group
[13:20:22] Dropping data and info for blue group
[13:20:22] Execution time for TimeAggregate: 17.63 segs
[13:20:24] Running TimeAggregate on (4004, 4004, 12) for green group
[13:20:39] Dropping data and info for green group
[13:20:39] Execution time for TimeAggregate: 17.49 segs
[13:20:42] Running TimeAggregate on (4004, 4004, 12) for nir group
[13:20:57] Dropping data and info for nir group
[13:20:57] Execution time for TimeAggregate: 17.42 segs
[13:20:59] Running TimeAggregate on (4004, 4004, 12) for red group
[13:21:14] Dropping data and info for red group
[13:21:14] Execution time for TimeAggregate: 17.49 segs
[13:21:17] Running TimeAggregate on (4004, 4004, 12) for swir1 group
[13:21:32] Dropping data and info for swir1 group
[13:21:32] Execution time for TimeAggregate: 17.47 segs
[13:21:34] Running TimeAggregate on (4004, 

AttributeError: 'VariableNode' object has no attribute 'yearly'

In [None]:
rdata = rdata.rename(groups = {'bs.yearly.sum':'bss'}
            ).run(process.Calc(expressions = {'bsf': 'bss * 100 / 6'}, n_jobs = 60), drop_input=True)

In [5]:
rdata.info['group'].unique()


array(['ndvi', 'ndwi', 'savi', 'msavi', 'nbr', 'ndmi', 'nbr2', 'rei',
       'bsi', 'ndti', 'ndsi', 'ndsmi', 'nirv', 'evi', 'fapar',
       'blue.yearly.p25', 'blue.yearly.p50', 'blue.yearly.p75',
       'green.yearly.p25', 'green.yearly.p50', 'green.yearly.p75',
       'nir.yearly.p25', 'nir.yearly.p50', 'nir.yearly.p75',
       'red.yearly.p25', 'red.yearly.p50', 'red.yearly.p75',
       'swir1.yearly.p25', 'swir1.yearly.p50', 'swir1.yearly.p75',
       'swir2.yearly.p25', 'swir2.yearly.p50', 'swir2.yearly.p75',
       'thermal.yearly.p25', 'thermal.yearly.p50', 'thermal.yearly.p75',
       'bs.yearly.sum'], dtype=object)

In [None]:
ttprint('start saving files')
rdata.to_dir('/mnt/apollo/bare_soil_xuemeng/tmp_files/skmap-indices')
ttprint('finish saving')

array(['ndvi', 'ndwi', 'savi', 'nbr', 'ndmi', 'nbr2', 'rei', 'bsi',
       'ndti', 'ndsi', 'ndsmi', 'nirv', 'evi', 'fapar', 'blue.yearly.p25',
       'blue.yearly.p50', 'blue.yearly.p75', 'green.yearly.p25',
       'green.yearly.p50', 'green.yearly.p75', 'nir.yearly.p25',
       'nir.yearly.p50', 'nir.yearly.p75', 'red.yearly.p25',
       'red.yearly.p50', 'red.yearly.p75', 'swir1.yearly.p25',
       'swir1.yearly.p50', 'swir1.yearly.p75', 'swir2.yearly.p25',
       'swir2.yearly.p50', 'swir2.yearly.p75', 'thermal.yearly.p25',
       'thermal.yearly.p50', 'thermal.yearly.p75'], dtype=object)

In [1]:
import sys

import warnings
# warnings.filterwarnings('ignore')

from skmap.io import read_rasters, save_rasters #, RasterData
from skmap.io import process
from skmap.misc import date_range, ttprint , new_memmap # load_memmap, del_memmap, ref_memmap
# from eumap.misc import find_files, nan_percentile, ttprint
# from eumap.raster import read_rasters, save_rasters
import os

from pathlib import Path
from minio import Minio
# from eumap.misc import find_files, nan_percentile
import numpy as np
import gc
import numexpr as ne
import bottleneck as bn

from minio.commonconfig import REPLACE, CopySource
import time as tm
import geopandas as gpd
import matplotlib.pyplot as plt 
import fiona
from osgeo import gdal

bands = [('blue',1),('green',2),('red',3),('NIR',4),('SWIR1',5),('SWIR2',6),('thermal',7),('qa',8)]
tiles = ['005E_52N', '006E_51N', '023E_52N', '007E_46N', '024E_38N', '004W_36N'] #['016E_63N', '006W_53N', '003E_52N', '004E_51N',
# tiles = ['016E_63N','017E_63N','006W_53N','006W_52N','003E_52N','004E_52N','003E_51N','004E_51N','005E_52N','006E_52N','005E_51N',
#          '006E_51N','023E_52N','007E_46N','008E_46N','009E_44N','010E_44N','023E_38N','024E_38N','023E_37N','004W_36N']

gdal_opts = {
     'GDAL_HTTP_MULTIRANGE': 'SINGLE_GET',
     'GDAL_HTTP_MERGE_CONSECUTIVE_RANGES': 'NO',
     'GDAL_HTTP_VERSION': '1.0',
     'GDAL_DISABLE_READDIR_ON_OPEN': 'EMPTY_DIR',
     'VSI_CACHE': 'FALSE',
     'CPL_VSIL_CURL_ALLOWED_EXTENSIONS': '.tif',
     'GDAL_HTTP_CONNECTTIMEOUT': '320',
     'CPL_VSIL_CURL_USE_HEAD': 'NO',
     'GDAL_HTTP_TIMEOUT': '320',
     'CPL_CURL_GZIP': 'NO',
    'CPL_CURL_VERBOSE': 'YES'
}

itvls = []
years = list(np.arange(2000,2022,1))
for year in years:
    for j in np.arange(1,24,1):
        itvls.append((year,(year-1980)*23+j))
        
ddoy = ['001','017','033','049','065','081','097','113','129','145','161','177','193','209','225','241','257','273','289','305','321','337','353']
dioy = list(range(1,24,1))
doy = dict(zip(dioy,ddoy))

filler = process.SeasConvFill(season_size = 23,att_seas = 60,att_env = 20,return_qa = True, n_jobs=40,verbose = True)

def ard2_urls(tile,year,band):
    urls = []
    for doy in ddoy:
        urls.append(f'http://192.168.1.30:8333/prod-landsat-ard2/{tile}/seasconv/thermal_glad.SeasConv.ard2_m_30m_s_20210901_20211031_go_epsg.4326_v20230908.tif')
    return urls

tile = '016E.63N'
band = 'blue'
year = 2000
urls = ard2_urls(tile,year,band)
print(urls[0:3])
rdata = read_rasters(raster_files=urls[0:3], n_jobs=3, verbose=True, gdal_opts=gdal_opts)
# urls = ard2_urls(tile, hosts = s3['hosts'], bucket=s3['bucket_r'], itvls = itvls)
# rdata = read_rasters(raster_files=urls, band=band[1], dtype='uint16', n_jobs=50, expected_shape=(4004,4004), verbose=True)

['http://192.168.1.30:8333/tmp-bare-soil/gapfilled/016E.63N/landsat_gapfilled_blue_016E.63N_2000001.tif', 'http://192.168.1.30:8333/tmp-bare-soil/gapfilled/016E.63N/landsat_gapfilled_blue_016E.63N_2000017.tif', 'http://192.168.1.30:8333/tmp-bare-soil/gapfilled/016E.63N/landsat_gapfilled_blue_016E.63N_2000033.tif']
[11:23:49] Reading 3 raster file(s) using 3 workers
[11:23:49] [11:23:49] Exception: CURL error: Received HTTP/0.9 when not allowedException: CURL error: Received HTTP/0.9 when not allowed



* Couldn't find host 192.168.1.30 in the (nil) file; using defaults
* Found bundle for host: 0x55ca3132a4b0 [serially]
* Re-using existing connection #0 with host 192.168.1.30
> GET /tmp-bare-soil/gapfilled/016E.63N/landsat_gapfilled_blue_016E.63N_2000001.tif HTTP/1.0
Host: 192.168.1.30:8333
Accept: */*

* Couldn't find host 192.168.1.30 in the (nil) file; using defaults
* Found bundle for host: 0x55ca3132a4b0 [serially]
* Re-using existing connection #0 with host 192.168.1.30
> GET /tmp-bare-soil/gapfilled/016E.63N/landsat_gapfilled_blue_016E.63N_2000017.tif HTTP/1.0
Host: 192.168.1.30:8333
Accept: */*

* Couldn't find host 192.168.1.30 in the (nil) file; using defaults
* Found bundle for host: 0x55ca3132a4b0 [serially]
* Re-using existing connection #0 with host 192.168.1.30
> GET /tmp-bare-soil/gapfilled/016E.63N/landsat_gapfilled_blue_016E.63N_2000033.tif HTTP/1.0
Host: 192.168.1.30:8333
Range: bytes=32410-10515905
Accept: */*

* * HTTP 1.0, assume close after body
Received HTTP/0.

Exception: The raster http://192.168.1.30:8333/tmp-bare-soil/gapfilled/016E.63N/landsat_gapfilled_blue_016E.63N_2000001.tif not exists

In [3]:
tile = '016E.63N'
band = 'blue'
year = 2000
urls = ard2_urls(tile,year,band)
urls[0]

'http://192.168.49.30:8333/tmp-bare-soil/gapfilled/016E.63N/landsat_gapfilled_blue_016E.63N_2000001.tif'

In [5]:
import rasterio
from osgeo import gdal

for key in gdal_opts.keys():
    gdal.SetConfigOption(key,gdal_opts[key])

for url in urls:
    ds = rasterio.open(url)
    data = ds.read(1)

In [5]:
gdal_opts

{'GDAL_HTTP_MULTIRANGE': 'SINGLE_GET',
 'GDAL_HTTP_MERGE_CONSECUTIVE_RANGES': 'NO',
 'GDAL_HTTP_VERSION': '1.1',
 'GDAL_DISABLE_READDIR_ON_OPEN': 'EMPTY_DIR',
 'VSI_CACHE': 'FALSE',
 'CPL_VSIL_CURL_ALLOWED_EXTENSIONS': '.tif',
 'GDAL_HTTP_CONNECTTIMEOUT': '320',
 'CPL_VSIL_CURL_USE_HEAD': 'NO',
 'GDAL_HTTP_TIMEOUT': '320',
 'CPL_CURL_GZIP': 'NO'}

In [4]:
rdata = read_rasters(raster_files=urls[0:3], band=1, dtype='uint8', n_jobs=3, expected_shape=(4004,4004), verbose=True, gdal_opts=gdal_opts)

[11:03:09] Reading 3 raster file(s) using 3 workers
[11:03:09] Exception: CURL error: Received HTTP/0.9 when not allowed
[11:03:09] Full nan image for http://192.168.49.30:8333/tmp-bare-soil/gapfilled/016E.63N/landsat_gapfilled_blue_016E.63N_2000001.tif


IndexError: index 4004 is out of bounds for axis 0 with size 4004

In [None]:
RasterData({
    'blue': [
        ('http://192.168.49.30:8333/landsat-ard2/2000/006W_53N/461.tif', 1, '2000-01-01', , '2000-01-16'),
        ('http://192.168.49.30:8333/landsat-ard2/2000/006W_53N/462.tif', 1, '2000-01-16', , '2000-02-02')
    ],
    'green': [
        ('http://192.168.49.30:8333/landsat-ard2/2000/006W_53N/461.tif', 2, '2000-01-01', , '2000-01-16'),
        ('http://192.168.49.30:8333/landsat-ard2/2000/006W_53N/462.tif', 2, '2000-01-16', , '2000-02-02')
    ],
    'red': [
        ('http://192.168.49.30:8333/landsat-ard2/2000/006W_53N/461.tif', 3, '2000-01-01', , '2000-01-16'),
        ('http://192.168.49.30:8333/landsat-ard2/2000/006W_53N/462.tif', 3, '2000-01-16', , '2000-02-02')
    ]
})

RasterData({
    'red': 'http.._{dt}_'
}).timespan()