## Multiple-format CRS Converter

This notebook convert any CRS, if present, of all files with specific extension in a user-selected folder into user-selected CRS.
If source file is not availabe, it will be assigned the user-selected CRS.

- **All file with extension different from [shp/SHP, tiff/TIFF, tif/tif, gpkg/GPKG] will be ignored**
- **At the moment is not recursive**
### TO-DO**
- [ ] Add recursive search
- [ ] Add a function to move all not supported files in the destination path, recustructing the original strucutre of the directories

For problems contact: g.nodjoumi@jacobs-university.de

Imports

In [None]:
from argparse import ArgumentParser
from tkinter import Tk,filedialog
import os
from tqdm.notebook import tqdm
import itertools
import gdal
from gdal import Warp
import shutil
import datetime
import geopandas as gpd

Utilities functions

In [None]:
def question(question, answers):
    answ = None
    while answ not in answers:
        print('Please enter only: ')
        print(*answers, sep=', ')
        
        answ = input(question+'Answer: ')
    return(answ)

def make_folder(path, name):
    #os.getcwd()
    folder = path+'/'+name
    if os.path.exists(folder):
           qst = name + ' Folder exist, remove it? '
           answ = question(qst,['yes','y','no','n'])
           if answ in ['yes', 'y']:
               shutil.rmtree(folder)
               os.mkdir(folder)
               print(name, 'Folder created')
           else:
               now = datetime.now()
               folder = path+'/'+name +'_' + now.strftime("%d%m%Y_%H%M%S")
               print(folder, ' Folder not exist, creating.')
               os.mkdir(folder)
               print('Created new ', name,' Folder')
    else:
        print(name, ' Folder not exist, creating.')
        os.mkdir(folder)
        print('Created new ', name,' Folder')
    return(folder)

def get_paths(PATH, ixt):
    import re
    import fnmatch
    os.chdir(PATH)
    files = []
    for ix in ixt:
        ext='*.'+ix
        chkCase = re.compile(fnmatch.translate(ext), re.IGNORECASE)
        files.extend([f for f in os.listdir(PATH) if chkCase.match(f)])
        
    return(files)

Converter core function

In [None]:
def converter(file, OUT_CRS):
    
    xt = file.split('.')[1]
    name = os.path.basename(file)
    outfile = DST_PATH+'/'+xt+'/'+ name
    if xt in['shp','SHP','gpkg','GPKG']:
        gdf = gpd.read_file(file)
        src_crs = gdf.crs
        try:
            gdf = gdf.to_crs(OUT_CRS)
        except:
            gdf.crs = OUT_CRS
        if xt in ['gpkg','GPKG']:
            drv = "GPKG"
        else:
            drv = 'ESRI Shapefile'
        gdf.to_file(outfile, drv)
    else:
        openfile = gdal.Open(file)
        src_crs = openfile.GetSpatialRef()
        Warp(outfile, openfile, srcSRS=src_crs, dstSRS=OUT_CRS)

Parallel functions

In [None]:
def parallel_converter(files, OUT_CRS, JOBS):
    from joblib import Parallel, delayed
    Parallel (n_jobs=JOBS)(delayed(converter)(files[i], OUT_CRS)
                            for i in range(len(files)))
    

In [None]:
def chunk_creator(item_list, chunksize):
    it = iter(item_list)
    while True:
        chunk = tuple(itertools.islice(it, chunksize))
        if not chunk:
            break
        yield chunk

Main function

In [None]:
def main():        
    # List all files
    extensions = set()
    all_files = get_paths(DATA_PATH, ['tiff','tif','gpkg','shp'])
    for file in all_files:
        pathname, exten = os.path.splitext(file) 
        extensions.add(exten)
    for exts in extensions:
        exts=exts.split('.')[1]
        print(exts)
        make_folder(DST_PATH, exts)
        
    
    # Check available resources
    import psutil
    avram=psutil.virtual_memory().total >> 3
    if avram > 31 and len(all_files) <5000:
        JOBS=psutil.cpu_count(logical=True)
    elif avram > 31 and len(all_files)>5000:
        JOBS=psutil.cpu_count(logical=True)
    elif avram <=31 and len(all_files)<5000:
        JOBS=psutil.cpu_count(logical=True)
    elif avram <= 31 and len(all_files) > 5000:
        JOBS=psutil.cpu_count(logical=False)
    
    # Create chunks for parallel processing
    filerange = len(all_files)
    chunksize = round(filerange/JOBS)
    if chunksize <1:
        chunksize=1
        JOBS = filerange
    chunks = []
    for c in chunk_creator(all_files, JOBS):
        chunks.append(c)
               
   
    # Parallel processing
    with tqdm(total=len(all_files),
             desc = 'Generating files',
             unit='File') as pbar:
        
        for i in range(len(chunks)):
            files = chunks[i]    
            # print(files)
            parallel_converter(files, OUT_CRS, JOBS)
            
           
            pbar.update(JOBS)

In [None]:

if __name__ == "__main__":

    
    ## PATHS
    
    WORK_PATH = input(str('Path of the output folder '))           
    DATA_PATH = input(str('Path to data files folder:'))
    orig_dir = os.path.basename(DATA_PATH)
    DST_PATH = WORK_PATH+'/'+orig_dir    
    make_folder(WORK_PATH, orig_dir)  
    OUT_CRS = '+proj=utm +zone=33 +datum=WGS84 +units=m +no_defs'
    #OUT_CRS = input(str('Output crs string'))
    
    main()   