In [2]:
import digitalhub as dh
# conda install -c conda-forge gdal
PROJECT_NAME = "OPT_project"
proj = dh.get_or_create_project(PROJECT_NAME) # source="git://github.com/scc-digitalhu

In [None]:
%conda install gdal

In [3]:
%%writefile "opt_main_file.py"

from digitalhub_runtime_python import handler
import sys, os, time, json
import configparser
import argparse
from datetime import timedelta

# from multiprocessing import freeze_support, set_start_method #some stuff for multi-processing support
from joblib import Parallel, delayed, parallel_backend

from libs.RSdatamanager import filemanager as fm
from libs.RSdatamanager.Sentinel2.S2L2A import L2Atile, getTileList
from libs.RSdatamanager.Landsat.LandsatL2SP import L2SPtile, getL2SPTileList
from libs.ToolboxModules import featurext as m1


def tile_reading_1(tileDatapath, maindir, sensor):
    if sensor == 'S2':
        tile = L2Atile(maindir, tileDatapath)
    if sensor == 'Landsat':
        tile = L2SPtile(maindir, tileDatapath)
    return tile


def tile_reading_2(tile, outpath, tilename, year, **kwargs):
    #UPDATE OPTIONS
    name = str(tilename) + '_' + year
    update = {
        'year': year,
        'savepath': fm.check_folder(outpath, name)
    }
    #MODULE 1
    options = kwargs.get('module1',{})
    options.update( update )
    m1.manager(tile, **options)


def parallel_tile_reading(tiledict, maindir, sensor, tile_keys, outpath, tilename, years, **kwargs):
    tiles = Parallel(n_jobs=-1)(delayed(tile_reading_1)(tiledict[k], maindir, sensor) for k in tile_keys)
    Parallel(n_jobs=-1)(delayed(tile_reading_2)(tile, outpath, tilename, year, **kwargs) for tile in tiles for year in years)

@handler()
def main(**kwargs):
    #PREPARE SOME TOOLBOX PARAMETERS
    sensor = kwargs['options'].get('sensor', None)
    tilename = kwargs['options'].get('tilename', None)
    years = kwargs['options'].get('years', None)
    maindir = kwargs['options'].get('maindir', None)
    outpath = kwargs['options'].get('outpath', None)
    deltemp = kwargs['options'].get('deltemp', True)
    datapath = kwargs['datapath']
    module1 = kwargs['module1']

    with parallel_backend('loky'):
        if (module1):
            logging = {} 
            t_tot = time.time()  
            #READ DATASETS
            if sensor == 'S2':
                tiledict = getTileList(datapath)
            elif sensor == 'Landsat':
                tiledict = getL2SPTileList(datapath)
            else:
                raise IOError('Invalid sensor')
            keys = tiledict.keys()

            parallel_tile_reading(tiledict, maindir, sensor, keys, outpath, tilename, years, **kwargs)

            t_tot = timedelta(seconds=(time.time() - t_tot))     
            print("MOD1 TIME = ", t_tot,flush=True)      
            logging['MODULE 1'] = {'TIME': str(t_tot) }            
            file = fm.joinpath(outpath,"logging_MODULE.txt")
            fm.check_folder(outpath)
            with open(file,'w') as json_file:
               json.dump(logging,json_file) 

Writing opt_main_file.py


In [58]:
FUNCTION_NAME="execute-preprocessing"
func = proj.new_function(name="python-function",
                         kind="python",
                         code_src="opt_main_file.py",
                         handler="main",
                         python_version="PYTHON3_9",
                         requirements=["astroid==2.4.2","certifi==2020.6.20","cycler==0.10.0","decorator==4.4.2","h5py==3.10.0","imageio==2.9.0","isort==5.5.1","joblib==0.16.0","kiwisolver==1.2.0","lazy-object-proxy==1.4.3","matplotlib==3.4.0","mccabe==0.6.1","networkx==2.5","numpy==1.23.5","pandas","Pillow==7.2.0","pylint==2.6.0","pyparsing==2.4.7","python-dateutil==2.8.2","pytz==2020.1","PyWavelets==1.1.1","scikit-image","scikit-learn","scipy","six==1.15.0","threadpoolctl==2.1.0","toml==0.10.1","wrapt==1.11.2"],
                         )

In [59]:
options = {
    'sensor': "S2",
    'tilename': "42WXS",
    'years': ["2017","2018"],
    'maindir': "/",
    'outpath': "outputs/",
    'info': False,
    'deltemp': False
}

datapath = "inputs/"
m1options = True

input_ab ={
    "datapath" : datapath,
    "options" :options,
	"module1" : m1options,
}
#  processor.job Something got wrong during function execution. ("main() missing 3 required positional arguments: 'datapath', 'options', and 'module1'",) {"worker_id": "0"

run_download = func.run(action="job",
                        parameters=input_ab,
                         local_execution=True)