In [3]:
import os
import re
from tqdm.auto import tqdm
import time
import gc
import geopandas as gpd
import rasterio
from rasterio.crs import CRS
from imageprocessing import convert_SHPtoPNG

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
def dropna_SHPs(folder, pattern = r'^tile_shp.*\.shp$'):
    '''
    returns: 
        -   runtime
        -   number of removed image-mask pairs
    '''
    
    ad_extensions = ['.cpg','.dbf','.prj','.shx']
    
    bin_list = []
    
    filenames = os.listdir(folder)
    total_length = len(filenames)
    with tqdm(total=total_length) as pbar:
        for filename in filenames:
            if re.match(pattern, filename):
                shapefile = gpd.read_file(os.path.join(folder,filename))
                if len(shapefile.geometry)==0:
                    splitted = filename.split('.')[0].split('_')
                    bin_list.append((splitted[2],splitted[3]))
                    os.remove(os.path.join(folder,filename))
                    os.remove(os.path.join(folder,filename.replace('shp','tif')))
                    for ext in ad_extensions:
                        if os.path.exists(os.path.join(folder,filename[:-4]+ext)):
                            os.remove(os.path.join(folder,filename[:-4]+ext))
            pbar.update(1)
            
    gc.collect()
    return bin_list

100%|██████████| 2700/2700 [00:05<00:00, 451.63it/s]


[('24', '11'), ('0', '16'), ('23', '7'), ('21', '5'), ('0', '17'), ('18', '0'), ('2', '12'), ('24', '10'), ('2', '10'), ('24', '12'), ('0', '15'), ('4', '9'), ('21', '7'), ('10', '17'), ('24', '13'), ('2', '11'), ('0', '8'), ('24', '17'), ('23', '1'), ('0', '10'), ('0', '11'), ('23', '0'), ('24', '16'), ('0', '9'), ('2', '9'), ('24', '14'), ('2', '16'), ('23', '2'), ('21', '0'), ('0', '12'), ('23', '3'), ('18', '5'), ('2', '17'), ('24', '15'), ('2', '8'), ('20', '16'), ('16', '17'), ('19', '2'), ('24', '2'), ('22', '13'), ('4', '11'), ('4', '10'), ('22', '12'), ('24', '3'), ('19', '3'), ('20', '7'), ('20', '17'), ('6', '17'), ('20', '15'), ('24', '1'), ('4', '12'), ('22', '10'), ('22', '11'), ('4', '13'), ('24', '0'), ('20', '14'), ('6', '16'), ('20', '0'), ('8', '17'), ('6', '12'), ('24', '4'), ('3', '9'), ('4', '17'), ('22', '15'), ('22', '14'), ('4', '16'), ('24', '5'), ('3', '8'), ('8', '16'), ('6', '11'), ('18', '17'), ('22', '1'), ('14', '17'), ('1', '8'), ('22', '16'), ('4', '14

In [6]:
def set_valid_CRS(folder, pattern = r'^tile_tif.*\.tif$', desired_crs_epsg=23700):
    out_list = []
    filenames = os.listdir(folder)
    total_length = len(filenames)
    with tqdm(total=total_length) as pbar:
        for filename in filenames:
            if re.match(pattern, filename):
                with rasterio.open(os.path.join(folder,filename)) as src:
                    if src.crs == None:
                        src.crs = CRS.from_epsg(desired_crs_epsg)
                        print(f'{filename} set to EPSG:{src.crs}')
                        splitted = filename.split('.')[0].split('_')
                        out_list.append((splitted[2],splitted[3]))
            pbar.update(1)
    gc.collect() 
    return out_list

100%|██████████| 2700/2700 [00:01<00:00, 1692.47it/s]
