In [76]:
import os
import geopandas as gpd
import gdal
import numpy as np
import numba
# import glob
from multiprocessing import Pool
from tqdm.auto import tqdm
import math
from shapely import geometry

In [2]:
tile_size_px = [1000,1000] #x,y
tile_oxerlap_px = 150
output_downsample = 0.5
output_folder = r''
geotiff_folder = r'/media/nick/2TB Working 2/Projects/GeoTIFF_tiler_data/VIVID_Landgate_20190910_112102323 (copy)'
input_file_ext = '.tif'

In [3]:
%%time
# search folder and all sub folders for '.tif' files
geo_tiff_list = []
for root, dirs, files in os.walk(geotiff_folder):
    for file in files:
        if file.endswith(input_file_ext):
            geo_tiff_list.append(os.path.join(root, file))
            
len(geo_tiff_list) 

CPU times: user 1.81 ms, sys: 0 ns, total: 1.81 ms
Wall time: 1.09 ms


272

In [15]:
def get_bounds(tif_path):
#     open file
    data = gdal.Open(tif_path)
#     grab bounds
    geoTransform = data.GetGeoTransform()
    left = geoTransform[0]
    top = geoTransform[3]
    right = left + geoTransform[1] * data.RasterXSize
    bottom = top + geoTransform[5] * data.RasterYSize
    return[top, left, bottom, right]

In [58]:
%%time
# use multiprocessing to extract raster bounds
# interesting when using a small number of geotiffs its is slightly quicker to just run this as a loop
# however once you get over a 100 or so this method is much quicker
with Pool() as pool:
    geo_tiff_bounds = list(tqdm(pool.imap(get_bounds, geo_tiff_list), total=len(geo_tiff_list)))
# convert list to np array
geo_tiff_bounds = np.array(geo_tiff_bounds)
# grab max extents
bound_y_max = float(geo_tiff_bounds[:,0].max()) #top
bound_x_min = float(geo_tiff_bounds[:,1].min()) #left
bound_y_min = float(geo_tiff_bounds[:,2].min()) #bottom
bound_x_max = float(geo_tiff_bounds[:,3].max()) #right

  0%|          | 0/272 [00:00<?, ?it/s]

CPU times: user 32.9 ms, sys: 56.9 ms, total: 89.8 ms
Wall time: 174 ms


In [59]:
# open one image to get the pixel size
test_raster = gdal.Open(geo_tiff_list[0])
test_raster_gt =test_raster.GetGeoTransform()
pixel_size_x = test_raster_gt[1]
pixel_size_y = test_raster_gt[5]
print(pixel_size_x,pixel_size_y)

4.487879136029412e-06 -4.487879136029412e-06


In [68]:
x_move = pixel_size_x*(tile_size_px[0]-tile_oxerlap_px)
y_move = pixel_size_y*(tile_size_px[1]-tile_oxerlap_px)

x_tile_size = pixel_size_x*tile_size_px[0]
y_tile_size = pixel_size_y*tile_size_px[1]
print(x_move,y_move)

0.003814697265625 -0.003814697265625


In [102]:
number_of_cols = math.ceil(abs((bound_x_max-bound_x_min)/x_move))
number_of_cols

139

In [103]:
number_of_rows = math.ceil(abs((bound_y_max-bound_y_min)/y_move))
number_of_rows

93

In [108]:
# probs worth trying to speed this up with PyGEOS
%%time
tile_coord_list = []

tile_top = bound_y_max

for row in tqdm(range(0,number_of_rows)):
    tile_top = bound_y_max + y_move*row
    tile_bottom = tile_top + y_tile_size
    
    tile_left = bound_x_min
    
    for col in range(0,number_of_cols):
        tile_left = bound_x_min + col*x_move
        tile_right = tile_left + x_tile_size        
        polygon = {'geometry':geometry.Polygon([[tile_left, tile_top], [tile_right, tile_top], [tile_right, tile_bottom], [tile_left, tile_bottom]])}
        tile_coord_list.append(polygon)



  0%|          | 0/93 [00:00<?, ?it/s]

CPU times: user 176 ms, sys: 26.3 ms, total: 202 ms
Wall time: 169 ms


In [105]:
polygon_tiles = gpd.GeoDataFrame(tile_coord_list,geometry='geometry',crs='EPSG:4326')

In [107]:
polygon_tiles.to_file("/media/nick/2TB Working 2/Projects/GeoTIFF_tiler_data/output.gpkg", driver="GPKG")

