# ImageProcessingUtils
[![DOI](https://zenodo.org/badge/287286230.svg)](https://zenodo.org/badge/latestdoi/287286230)

Author: g.nodjoumi@jacobs-university.de
This repo contain image processing utilities that i used for prepare images before Deep Learning training
____________________________________________________________________________
ImageProcessingUtils is a Jupyter Notebook for processing georeferenced images such as GeoTiff, JP2, png/jpeg+world file, CUB (USGS ISIS).
With this tool is possible to perform single to multiple tasks including:

* convert to GeoTiff, Cloud Optimize GeoTiff (COG), JP2, png/jpeg+world file, CUB (USGS ISIS)
* rescale images pixel resolution
* create tiles for images larger than user-defined size limit
* remove black borders for images/tiles
* crop images/tiles with a 1:1 centered aspect ration

## Usage
Edit the general configuration dictionary in [General Config](#General_Config).
```
config = {
'PATH':"../data/",
'DST_PATH':"../data/",
'IXT':'cub',
'OXT':'tiff',
'BC':'n',
'SQCRP':'n',
'RES':'y',
'CELL_SIZE':'2',
'LIM':'n',
'LIM_SIZE':None,
'COG':'y',
'8bit':'y',
'dem':'n'
}
```
**PATH and DST_PATH must be edited if the data is contained in subfolders**
**other parameters, if not set here, will be asked interactively**
**dem is a flag to avoid JPEG compression and 8bit conversion when source image is a DEM**


## Acknowledgment
This work is within the Europlanet 2024 RI and EXPLORE project, and it has received funding from the European Union’s Horizon 2020 research and innovation programme under grant agreement No 871149 and No 101004214.


In [1]:
import os
from copy import copy
import gc
import numpy as np
import pandas as pd
import rasterio as rio
from rasterio.enums import Resampling
from rasterio.windows import Window
from osgeo import gdal
gdal.UseExceptions()
from osgeo.gdal import gdalconst
import cv2 as cv
from utils.GenUtils import get_paths, chunk_creator, folder_file_size, question
from utils.ImgUtils import square_crop, geoslicer, borderCropper, CellSizeScale, cogCreator, _translate

In [2]:
global PATH
global dst_path
global ixt
global oxt
global res, cell_size
global sqcrp
global bc
global lim, lim_size
global proc_df
global cog
global bit
global dem

<a id='General_Config'></a>
***EDIT THIS***
**or use interactive configuration**

In [3]:
config = {
'PATH':"../data/MARSPIT_v2/test_pit",
'DST_PATH':"../data/MARSPIT_v2/test_pit",
'IXT':'jp2',
'OXT':'tiff',
'BC':'n',
'SQCRP':'n',
'RES':'n',
'CELL_SIZE':None,
'LIM':'y',
'LIM_SIZE':8000,
'COG':'n',
'8bit':'y',
'dem':'n',
'overlap':5
}

**end of config**

In [5]:
def cropper(image, bc, sqcrp, res, cell_size, lim, limit_size, cog, cog_cfg, bit, dem):
    image_name = os.path.basename(image).split('.')[0]#+ixt
    savename=dst_folder+'/'+image_name
    data_dict = {'Name': image_name, 'Status':None,'Errors':None} 
    tmp_df = pd.DataFrame.from_dict([data_dict])
    try:
        with rio.open(image) as src:
          
            src_height, src_width = src.shape
            crs = src.crs
            cnt = src.count
            src_trs = src.transform
            dst_trs = copy(src_trs)
            xoff = 0
            yoff = 0
            src_win = Window(xoff,yoff,src.width,src_height)
            # dst_win = copy(src_win)
            if cell_size == None:
                cell_size = src.transform[0]
            elif float(cell_size) < src.transform[0]:
                cell_size = src.transform[0]
                data_dict['Errors']='Processed with source cell size. Cannot process cell size values lower than source'
            if lim in ['Y','y']:
                if src_width > int(limit_size) or src_height >int(limit_size):
                    # src_crs = src.crs
                    max_dim = int(limit_size)
                elif src_width > src_height:
                    max_dim = src_width
                else:
                    max_dim = src_height
                tmp_df = geoslicer(image, max_dim, savename, bc, sqcrp, res, cell_size, oxt, cog, cog_cfg, bit, data_dict, dem, ixt, overlap)

            else:
                
                if bc in ['y','y']:
                    try:
                        src_width, src_height, src_win, dst_trs, savename =  borderCropper(src, src_win, savename, oxt)
                    except Exception as e:
                        print(e)
                        data_dict['Status']=e
                        pass
                    
                if sqcrp in ['Y','y']:
                    try:
                        src_width, src_height, src_win, dst_trs, savename = square_crop(src,
                                                                              src_width,
                                                                              src_height,
                                                                              src_win,
                                                                              # xoff,
                                                                              # yoff,
                                                                              savename,
                                                                              oxt)
                    except Exception as e:
                        print(e)
                        data_dict['Status']=e
                        pass
               
                if res in ['Y', 'y']:                    
                    try:
                        src_height, src_width, dst_trs, savename = CellSizeScale(src,
                                                                                 src_height,
                                                                                 src_width,
                                                                                 float(cell_size),
                                                                                 dst_trs,
                                                                                 savename)
                    except Exception as e:
                        print(e)
                        data_dict['Status']=e
                        pass                    
                
                #if any(item in [el.lower() for el in [bc, sqcrp, res, lim]] for item in ['yes','ye','y']):
                try:
                    img = src.read(window=src_win,
                                   out_shape=(cnt, src_height, src_width),
                                   resampling=Resampling.cubic,
                                  masked=True)
                    noData=src.nodata
                    dt = src.dtypes[0]
                    if noData == None:
                        noData = 0
                    if bit in ['yes','ye','y']:
                        noData=0
                        img = cv.convertScaleAbs(img,alpha=(255.0/img.max()))
                        dt = img.dtype
                    if dem.lower() in ['yes','ye','y']:

                        print('DEM cannot be 8bit')
                        bit = 'n'          


                    savename = savename+'.'+oxt
                    with rio.open(savename,'w',
                              driver='GTiff',
                              window=src_win,
                              width=src_width,
                              height=src_height,
                              count=cnt,
                              nodata=noData,
                              dtype=dt,
                              transform=dst_trs,
                              crs=crs) as dst:
                        dst.write(img)

                    _ = gc.collect()                
                    #data_dict['Status']='Done'
                except Exception as e:
                    print(e)
                    del img
                    _ = gc.collect()                
                    data_dict['Errors']=e
                data_dict['Status']='Done'
#                tmp_df = pd.DataFrame.from_dict([data_dict])     
                
                if cog in ['Yes','yes','Y','y']:
                    print('cog')
                    try:
                        if any(item in [el.lower() for el in [bc, sqcrp, res, lim]] for item in ['yes','ye','y']):
                            print('a')
                            source = savename
                            #dest = savename.split('.'+ixt)[0]+'-cog.'+oxt
                            dest = savename.split('.'+oxt)[0]+'-cog.'+oxt  
                        else:
                            source = image+'.'+ixt
                            dest = savename.split('.'+ixt)[0]+'-cog.'+oxt  
                        _translate(source, dest,profile='DEFLATE', profile_options=cog_cfg)
                    except Exception as e:
                        print(e)
                        data_dict['Errors']=e
            
                data_dict['Status']='Done'
                tmp_df = pd.DataFrame.from_dict([data_dict])     
    except Exception as e:
        print(e)
        data_dict['Errors']=e
        data_dict['Status']='Error'
        pass
    return tmp_df

In [6]:
def parallel_crops(files, JOBS, bc, sqcrp, res, cell_size, lim, limit_size, cog, cog_cfg, bit, dem):
    from joblib import Parallel, delayed, parallel_backend
    with parallel_backend("loky", inner_max_num_threads=2):
    
        tmp_df = Parallel (n_jobs=JOBS)(delayed(cropper)(files[i],
                                                     bc, sqcrp,
                                                     res, cell_size,
                                                     lim, limit_size,
                                                     cog,
                                                     cog_cfg,
                                                     bit,
                                                     dem)
                            for i in range(len(files)))
    return tmp_df

In [7]:
def main():
        
    image_list = get_paths(PATH, ixt) 
    total_size, max_size, av_fsize = folder_file_size(PATH,image_list)

    from tqdm import tqdm
    import psutil
    
    avram=psutil.virtual_memory().total >> 30
    avcores=psutil.cpu_count(logical=False)
    reqram = avcores*max_size
    JOBS = avcores   
    if ixt in ['jp2','JP2','Jp2'] and avcores < len(image_list):
        min_ramcore=av_fsize*10
        JOBS=round(avram/min_ramcore)
        if JOBS > avcores:
            JOBS = avcores

    # cols = ['Name','Processed']
    proc_csv = dst_folder+'/Processed.csv'
    try:
        proc_df = pd.read_csv(proc_csv)
    except Exception as e:
        print(e)
        proc_df = pd.DataFrame(columns=['Name','Status','Errors'])
    pass
    
    with tqdm(total=len(image_list),
             desc = 'Generating Images',
             unit='File') as pbar:
        
        filerange = len(image_list)
        chunksize = round(filerange/JOBS)
        if chunksize <1:
            chunksize=1
            JOBS = filerange
        chunks = []
        for c in chunk_creator(image_list, JOBS):
            chunks.append(c)
        
        
        for i in range(len(chunks)):
            files = chunks[i]
            lambda_f = lambda element:(os.path.basename(element).split('.')[0]) not in proc_df['Name'].to_list()
            # filtered = [path for path in files if os.path.basename(path).split(ixt)[0] not in proc_df['Name'].to_list()]
            filtered = filter(lambda_f, files)
            #for proc in proc_df['Name'].tolist():
                #chunk_filter = lambda element: element != proc
                #filtered = filter(chunk_filter, proc)
            chunk = list(filtered)
            if len(chunk)>0:
               tmp_df = parallel_crops(files, JOBS, bc, sqcrp, res, cell_size, lim, limit_size, cog, cog_cfg, bit, dem)
               for df in tmp_df:
                   proc_df = proc_df.append(df,ignore_index=True)
               #proc_df = proc_df.append(tmp_df,ignore_index=True)
               proc_df.to_csv(proc_csv, index=False)
               pbar.update(JOBS)
            else:
                pbar.update(len(files))
                continue

In [8]:
if __name__ == "__main__":

    for cfg in config:
        if cfg in ['CELL_SIZE','LIM_SIZE']:
            pass
        else:
            if config[cfg]==None:
                if cfg in ['IXT']:            
                    while True:
                        config[cfg] = input("Insert value for  {}".format(cfg))
                        if config[cfg].lower() in ['cub','jp2','tiff','tif']:
                            break
                        else:
                            print('Only supported: CUB/cub, JP2/jp2, TIFF/tiff, TIF,Tif')
                            continue
                elif cfg in ['OXT']:            
                    while True:
                        config[cfg] = input("Insert value for  {}".format(cfg))
                        if config[cfg].lower() in ['tiff','tif','jpeg','png']:
                            break
                        else:
                            print('Cannot export in cub\nOnly supported: TIFF, TIF, PNG, JPEG')
                            continue

                else:
                    while True:
                        config[cfg] = input("Insert value for  {}".format(cfg))
                        if config[cfg].lower() in ['yes','y','ye','no','n']:
                            break
                        else:
                            continue        

                if cfg in ['RES']:
                    if config[cfg].lower() in ['yes','y','ye']:
                        while True:
                            try:
                                cfg='CELL_SIZE'
                                config[cfg] = float(input("Insert value for  {}".format(cfg)))
                                break
                            except:
                                print("That's not a valid option! Only float numbers")
                    elif config[cfg].lower() in ['n','no']:
                        pass
                if cfg in ['LIM']:
                    if config[cfg].lower() in ['yes','y','ye']:
                        while True:
                            try:
                                cfg='LIM_SIZE'
                                config[cfg] = int(input("Insert value for  {}".format(cfg)))
                                break
                            except:
                                print("That's not a valid option! Only float numbers")
                    elif config[cfg].lower() in ['n','no']:
                        pass

    if config['COG'].lower() in ['yes','y','ye'] and config['OXT'].lower() in ['png','jpeg']:
        print('COG not compatible with png or jpeg format')
        config['COG'] = 'n'

    PATH = config['PATH']
    dst_folder = config['DST_PATH']
    ixt = config['IXT']
    oxt = config['OXT']
    bc = config['BC']
    sqcrp = config['SQCRP']
    res = config['RES']
    cell_size = config['CELL_SIZE']
    lim = config['LIM']
    limit_size = BC = config['LIM_SIZE']
    cog=config['COG']
    bit=config['8bit']
    dem=config['dem']
    overlap=config['overlap']
    
    cfg_params = [bc, sqcrp, lim, res, cog]
    if all(cf == '' for cf in cfg_params) == True and ixt == oxt:
        print('Please select at least one task or different file format for input and output')
    else:
        fold_name = 'BC_'+str(bc)+'_SQCRP_'+str(sqcrp)+'_'+'CellSize_'+str(cell_size).replace('.','-')+'_m_'+'_LIM_'+str(lim)+'_'+str(limit_size)+'_px_'+'cog_'+cog
        
        if dst_folder == None:
            dst_folder = PATH+'/'+fold_name
        else:
            
            dst_folder = dst_folder+'/'+fold_name
        os.makedirs(dst_folder, exist_ok=True)
    
        main()

[Errno 2] No such file or directory: '../data/MARSPIT_v2/test_pit/BC_n_SQCRP_n_CellSize_None_m__LIM_y_8000_px_cog_n/Processed.csv'


  proc_df = proc_df.append(df,ignore_index=True)
Generating Images: 100%|██████████| 1/1 [01:32<00:00, 92.76s/File]
