In [1]:
import os
import geopandas as gpd
import sys
wos = 'win' in sys.platform
if wos:
    from osgeo import gdal
    from multiprocess import Pool
    from funcs import get_bounds
else:
    import gdal
    from multiprocessing import Pool
import numpy as np
import numba
from tqdm.autonotebook import tqdm
import math
from shapely import geometry

  


In [2]:
import pkg_resources
pkg_resources.get_distribution('geopandas').version

'0.9.0'

In [17]:
tile_size_px = [1000,1000] #x,y
tile_oxerlap_px = 150
output_downsample = 0.5
output_folder = r''
geotiff_folder = r'/media/nick/2TB Working 2/Projects/GeoTIFF_tiler_data/VIVID_Landgate_20190910_112102323' if not wos else r'{}'.format(os.getcwd())+r'\\test_imagery\\VIVID_Landgate_20190910_112102323'
input_file_ext = '.tif'

In [18]:
%%time
# search folder and all sub folders for '.tif' files
geo_tiff_list = []
for root, dirs, files in os.walk(geotiff_folder):
    for file in files:
        if file.endswith(input_file_ext):
            geo_tiff_list.append(os.path.join(root, file))
            
len(geo_tiff_list) 

CPU times: user 1.27 ms, sys: 257 µs, total: 1.53 ms
Wall time: 888 µs


17

In [19]:
if not wos:
    def get_bounds(tif_path):
    #     open file
        data = gdal.Open(tif_path)
    #     grab bounds
        geoTransform = data.GetGeoTransform()
        left = geoTransform[0]
        top = geoTransform[3]
        right = left + geoTransform[1] * data.RasterXSize
        bottom = top + geoTransform[5] * data.RasterYSize
        return[top, left, bottom, right]

In [20]:
geo_tiff_bounds =get_bounds(geo_tiff_list[0])
geo_tiff_bounds

[-34.27734375, 116.455078125, -34.365234375, 116.54296875]

In [21]:
pool = Pool(3)
with pool:
    list(tqdm(pool.imap(get_bounds,geo_tiff_list[:3]), total=3))

  0%|          | 0/3 [00:00<?, ?it/s]

In [22]:
%%time
# use multiprocessing to extract raster bounds
# interesting when using a small number of geotiffs its is slightly quicker to just run this as a loop
# however once you get over a 100 or so this method is much quicker
with Pool() as pool:
    geo_tiff_bounds = list(tqdm(pool.imap(get_bounds, geo_tiff_list), total=len(geo_tiff_list)))
# convert list to np array
geo_tiff_bounds = np.array(geo_tiff_bounds)
# grab max extents
bound_y_max = float(geo_tiff_bounds[:,0].max()) #top
bound_x_min = float(geo_tiff_bounds[:,1].min()) #left
bound_y_min = float(geo_tiff_bounds[:,2].min()) #bottom
bound_x_max = float(geo_tiff_bounds[:,3].max()) #right

  0%|          | 0/17 [00:00<?, ?it/s]

CPU times: user 22.1 ms, sys: 51.8 ms, total: 73.9 ms
Wall time: 167 ms


In [23]:
# open one image to get the pixel size
test_raster = gdal.Open(geo_tiff_list[0])
test_raster_gt =test_raster.GetGeoTransform()
pixel_size_x = test_raster_gt[1]
pixel_size_y = test_raster_gt[5]
print(pixel_size_x,pixel_size_y)

4.487879136029412e-06 -4.487879136029412e-06


In [24]:
x_move = pixel_size_x*(tile_size_px[0]-tile_oxerlap_px)
y_move = pixel_size_y*(tile_size_px[1]-tile_oxerlap_px)

x_tile_size = pixel_size_x*tile_size_px[0]
y_tile_size = pixel_size_y*tile_size_px[1]
print(x_move,y_move)

0.003814697265625 -0.003814697265625


In [25]:
number_of_cols = math.ceil(abs((bound_x_max-bound_x_min)/x_move))
number_of_cols

139

In [26]:
number_of_rows = math.ceil(abs((bound_y_max-bound_y_min)/y_move))
number_of_rows

93

In [27]:
# !conda install -c conda-forge pygeos
gpd.options.use_pygeos = True
print(gpd.options.use_pygeos)

True


In [28]:
def is_tile_in_bounds(tile_dict,geo_tiff_bounds):
#     print(tile_dict)
    for geo_bounds in geo_tiff_bounds:
#         check is tile top is inside tif
        if (geo_bounds[0] > tile_dict['top'] > geo_bounds[2] or 
            geo_bounds[0] > tile_dict['bottom'] > geo_bounds[2]):
#             print('some')
            if geo_bounds[3] > tile_dict['left'] > geo_bounds[1]:
#                 print('yes')
                return True
#                 break
            if geo_bounds[3] > tile_dict['right'] > geo_bounds[1]:
#                 print('yes')
                return True
#                 break
    return False
            

In [29]:
# probs worth trying to speed this up with PyGEOS
# tile_coord_list = []
def make_polygons(row):
    tile_polygon_list = []
#     for row in range(0,number_of_rows):
    tile_top = bound_y_max + y_move*row
    tile_bottom = tile_top + y_tile_size

    tile_left = bound_x_min

    for col in range(0,number_of_cols):
        tile_left = bound_x_min + col*x_move
        tile_right = tile_left + x_tile_size
        tile_dict = {'top':tile_top,'left':tile_left,'bottom':tile_bottom,'right':tile_right}
        
        if is_tile_in_bounds(tile_dict,geo_tiff_bounds):
        
            polygon = {'geometry':geometry.Polygon([[tile_left, tile_top], [tile_right, tile_top], [tile_right, tile_bottom], [tile_left, tile_bottom]])}
            tile_polygon_list.append(polygon)
    return tile_polygon_list


In [37]:
%%time
# multiprocess making polygons
with Pool() as pool:
    tile_polygon_list = pool.map(make_polygons, range(0,number_of_rows))

# this is returned as a list of list so it must be flattened

tile_polygon_list = list(np.concatenate(tile_polygon_list).ravel())

CPU times: user 77.8 ms, sys: 32.8 ms, total: 111 ms
Wall time: 159 ms


In [39]:
%%time
#  convert into geodataframe
polygon_tiles_gpd = gpd.GeoDataFrame(tile_polygon_list,geometry='geometry',crs='EPSG:4326')

CPU times: user 20.5 ms, sys: 4.09 ms, total: 24.6 ms
Wall time: 23.3 ms


In [41]:
if not wos:
    polygon_tiles_gpd.to_file("/media/nick/2TB Working 2/Projects/GeoTIFF_tiler_data/output.gpkg", driver="GPKG")
else:
    polygon_tiles_gpd.to_file(r'{}'.format(os.getcwd())+r'\\test_imagery\\VIVID_Landgate_20190910_112102323\\output.gpkg', driver="GPKG")    

