# ImageProcessingUtils
[![DOI](https://zenodo.org/badge/287286230.svg)](https://zenodo.org/badge/latestdoi/287286230)

Author: g.nodjoumi@jacobs-university.de
This repo contain image processing utilities that i used for prepare images before Deep Learning training
   
_____________________________________________________________________________

# MIP-SCR - Multi Image Parallel Square Crop Resize
This script do:

- Crop images to 1:1 aspect ratio from the center of the image
- Remove black borders from image
- Resize cell-size of image to user-defined size
- Create tiles if images are above a user-defined limit
- Convert from JP2 format to Geotiff

## Usage
These tools can be used in a conda environment or in docker container.
For Conda environment, just install the required packages, activate the env, otherwise build the container using the provided Dockerfile. In both cases do as follows:
** Command Line Interface **
- Create provided environment or install required packages
- Just execute the script and interactively insert all parameters.
** Jupyter Notebook **
- Create provided environment or install required packages
- Run the notebook and change the config dictionary or just run and pass configs interactively
If using CLI execute the script passing at least --PATH argument

The script will automatically create subfolders containing the results. 

In [1]:
import os
from copy import copy
import pandas as pd
import rasterio as rio
from rasterio.enums import Resampling
from rasterio.windows import Window
import cv2 as cv
from utils.GenUtils import get_paths, chunk_creator, folder_file_size, question
from utils.ImgUtils import square_crop, geoslicer, borderCropper, CellSizeScale

In [2]:
global PATH
global DST_PATH
global IXT
global OXT
global RES, CELL_SIZE
global SQRCP
global BC
global LIM, LIM_SIZE
global proc_df

***EDIT THIS***
**or use interactive configuration**

In [3]:
config = {
'PATH':"../data",
'DST_PATH':"../data",
'IXT':None,
'OXT':None,
'BC':None,
'SQCRP':None,
'RES':None,
'CELL_SIZE':None,
'LIM':None,
'LIM_SIZE':None,
}

**end of config**

In [4]:
def cropper(image, bc, sqcrp, res, cell_size, lim, limit_size):
    image_name = os.path.basename(image).split('.')[0]#+ixt
    savename=dst_folder+'/'+image_name
    data_dict = {'Name': image_name} 
    tmp_df = pd.DataFrame.from_dict([data_dict])
    try:
        with rio.open(image) as src:
          
            src_height, src_width = src.shape
            crs = src.crs
            cnt = src.count
            src_trs = src.transform
            dst_trs = copy(src_trs)
            xoff = 0
            yoff = 0
            src_win = Window(xoff,yoff,src.width,src_height)
            # dst_win = copy(src_win)
            if cell_size == None:
               cell_size = src.transform[0]
            
            if lim in ['Y','y']:
                if src_width > int(limit_size) or src_height >int(limit_size):
                    # src_crs = src.crs
                    max_dim = int(limit_size)
                geoslicer(image, max_dim, savename, bc, sqcrp, res, cell_size, oxt)

            else:
                
                if bc in ['y','y']:
                    try:
                        src_width, src_height, src_win, dst_trs, savename =  borderCropper(src, src_win, savename)
                    except Exception as e:
                        print(e)
                        pass
                    
                if sqcrp in ['Y','y']:

                    try:
                       src_width, src_height, src_win, dst_trs, savename = square_crop(src,
                                                                              src_width,
                                                                              src_height,
                                                                              src_win,
                                                                              # xoff,
                                                                              # yoff,
                                                                              savename)
                    except Exception as e:
                        print(e)
                        pass
                    
                    
                    
               
                if res in ['Y', 'y']:
                    try:
                        src_height, src_width, dst_trs, savename = CellSizeScale(src,
                                                                                 src_height,
                                                                                 src_width,
                                                                                 float(cell_size),
                                                                                 dst_trs,
                                                                                 savename)
                    except Exception as e:
                        print(e)
                        pass
    
                try:
                    img = src.read(window=src_win,
                                   out_shape=(cnt, src_height, src_width),
                                   resampling=Resampling.cubic)
                    img = cv.normalize(img, None, 0, 255, cv.NORM_MINMAX, cv.CV_8U)
                    maxval = img.max()
                    
                    if maxval != 0:
                        alpha = alpha=(255.0/maxval)
                        img = cv.convertScaleAbs(img, alpha=alpha) 
                        savename = savename+'.'+oxt
                        print(savename)
                        with rio.open(savename,'w',
                                  driver='GTiff',
                                  window=src_win,
                                  width=src_width,
                                  height=src_height,
                                  count=cnt,
                                  dtype=img.dtype,
                                  transform=dst_trs,
                                  crs=crs) as dst:
                            dst.write(img)
                except Exception as e:
                    print(e)
                
                tmp_df = pd.DataFrame.from_dict([data_dict])                    
                return tmp_df
    except Exception as e:
        print(e)
        pass

In [5]:
def parallel_crops(files, JOBS, bc, sqcrp, res, cell_size, lim, limit_size):
    from joblib import Parallel, delayed
    tmp_df = Parallel (n_jobs=JOBS)(delayed(cropper)(files[i],
                                                     bc, sqcrp,
                                                     res, cell_size,
                                                     lim, limit_size)
                            for i in range(len(files)))
    return tmp_df

In [6]:
def main():
        
    image_list = get_paths(PATH, ixt) 
    total_size, max_size, av_fsize = folder_file_size(PATH,image_list)

    from tqdm import tqdm
    import psutil
    
    avram=psutil.virtual_memory().total >> 30
    avcores=psutil.cpu_count(logical=False)
    avthreads=psutil.cpu_count(logical=True)
    ram_thread = avram/avthreads
    req_mem = avthreads*max_size
    if req_mem > avcores and req_mem > avram:
        JOBS = avcores
    else:
        JOBS = avcores
    
        
    if ram_thread > 2:
        JOBS=avcores
    
    # cols = ['Name','Processed']
    proc_csv = dst_folder+'/Processed.csv'
    try:
        proc_df = pd.read_csv(proc_csv)
    except Exception as e:
        print(e)
        proc_df = pd.DataFrame(columns=['Name'])
    pass
    
    with tqdm(total=len(image_list),
             desc = 'Generating Images',
             unit='File') as pbar:
        
        filerange = len(image_list)
        chunksize = round(filerange/JOBS)
        if chunksize <1:
            chunksize=1
            JOBS = filerange
        chunks = []
        for c in chunk_creator(image_list, JOBS):
            chunks.append(c)
        
        
        for i in range(len(chunks)):
            files = chunks[i]
            lambda_f = lambda element:(os.path.basename(element).split('.')[0]) not in proc_df['Name'].to_list()
            # filtered = [path for path in files if os.path.basename(path).split(ixt)[0] not in proc_df['Name'].to_list()]
            filtered = filter(lambda_f, files)
            #for proc in proc_df['Name'].tolist():
                #chunk_filter = lambda element: element != proc
                #filtered = filter(chunk_filter, proc)
            chunk = list(filtered)
            if len(chunk)>0:
               tmp_df = parallel_crops(files, JOBS, bc, sqcrp, res, cell_size, lim, limit_size)
               for df in tmp_df:
                   proc_df = proc_df.append(df,ignore_index=True)
               #proc_df = proc_df.append(tmp_df,ignore_index=True)
               proc_df.to_csv(proc_csv, index=False)
               pbar.update(JOBS)
            else:
                pbar.update(len(files))
                continue

In [7]:
if __name__ == "__main__":
    for cfg in config:
        if config[cfg] == None:
            if cfg in ['CELL_SIZE']:
                if config['RES'] in ['y','yes','YES','Y']:
                    config[cfg] = input("Insert value for  {}".format(cfg))
            elif cfg in ['LIM_SIZE']:
                if config['LIM'] in ['y','yes','YES','Y']:
                    config[cfg] = input("Insert value for  {}".format(cfg))
            else:
                config[cfg] = input("Insert value for  {}".format(cfg))
        
    fold_name = 'BC_'+str(config['BC'])+'_SQCRP_'+str(config['SQCRP'])+'_'+'CellSize_'+str(config['CELL_SIZE']).replace('.','-')+'_m_'+'_LIM_'+str(config['LIM'])+'_'+str(config['LIM_SIZE'])+'_px'
    
    PATH = config['PATH']
    dst_folder = config['DST_PATH']
    ixt = config['IXT']
    oxt = config['OXT']
    bc = config['BC']
    sqcrp = config['SQCRP']
    res = config['RES']
    cell_size = config['CELL_SIZE']
    lim = config['LIM']
    limit_size = BC = config['LIM_SIZE']
    
    
    if bc == None and sqcrp == None and cell_size == None and lim == None and res == None:
        print('Please select at least one task')
    else:
        fold_name = 'BC_'+str(bc)+'_SQCRP_'+str(sqcrp)+'_'+'CellSize_'+str(cell_size).replace('.','-')+'_m_'+'_LIM_'+str(lim)+'_'+str(limit_size)+'_px'
        
        if dst_folder == None:
            dst_folder = PATH+'/'+fold_name
        else:
            
            dst_folder = dst_folder+'/'+fold_name
        os.makedirs(dst_folder, exist_ok=True)
    
    main()

Insert value for  IXT jp2
Insert value for  OXT tiff
Insert value for  BC y
Insert value for  SQCRP n
Insert value for  RES y
Insert value for  CELL_SIZE 2
Insert value for  LIM y
Insert value for  LIM_SIZE 5000


[Errno 2] No such file or directory: '../data/BC_y_SQCRP_n_CellSize_2_m__LIM_y_5000_px/Processed.csv'


Generating Images:   0%|                                | 0/1 [00:40<?, ?File/s]
ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/home/hyradus/anaconda3/envs/dev39/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3441, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "/tmp/ipykernel_103342/1728841714.py", line 39, in <module>
    main()
  File "/tmp/ipykernel_103342/1291007132.py", line 56, in main
    tmp_df = parallel_crops(files, JOBS, bc, sqcrp, res, cell_size, lim, limit_size)
  File "/tmp/ipykernel_103342/3733375019.py", line 3, in parallel_crops
    tmp_df = Parallel (n_jobs=JOBS)(delayed(cropper)(files[i],
  File "/home/hyradus/anaconda3/envs/dev39/lib/python3.9/site-packages/joblib/parallel.py", line 1041, in __call__
    if self.dispatch_one_batch(iterator):
  File "/home/hyradus/anaconda3/envs/dev39/lib/python3.9/site-packages/joblib/parallel.py", line 859, in dispatch_one_batch
    self._dispatch(tasks)
  File "/home/hyradus/anaconda3/envs/dev39/lib/python3.9/site-packages/joblib/parallel.py", line 777, in _dispatch

TypeError: object of type 'NoneType' has no len()