In [18]:
import os
import geopandas as gpd
import sys
wos = 'win' in sys.platform
if wos:
    from osgeo import gdal
    from multiprocess import Pool
    from funcs import get_bounds
else:
    import gdal
    from multiprocessing import Pool
import numpy as np
import numba
# import glob
from tqdm.autonotebook import tqdm
import math
from shapely import geometry
  

In [8]:
tile_size_px = [1000,1000] #x,y
tile_oxerlap_px = 150
output_downsample = 0.5
output_folder = r''
geotiff_folder = r'/media/nick/2TB Working 2/Projects/GeoTIFF_tiler_data/VIVID_Landgate_20190910_112102323 (copy)' if not wos else r'{}'.format(os.getcwd())+r'\\test_imagery\\VIVID_Landgate_20190910_112102323'
input_file_ext = '.tif'

In [9]:
%%time
# search folder and all sub folders for '.tif' files
geo_tiff_list = []
for root, dirs, files in os.walk(geotiff_folder):
    for file in files:
        if file.endswith(input_file_ext):
            geo_tiff_list.append(os.path.join(root, file))
            
len(geo_tiff_list) 

Wall time: 1 ms


17

In [10]:
if not wos:
    def get_bounds(tif_path):
    #     open file
        data = gdal.Open(tif_path)
    #     grab bounds
        geoTransform = data.GetGeoTransform()
        left = geoTransform[0]
        top = geoTransform[3]
        right = left + geoTransform[1] * data.RasterXSize
        bottom = top + geoTransform[5] * data.RasterYSize
        return[top, left, bottom, right]

In [19]:
geo_tiff_bounds =get_bounds(geo_tiff_list[0])
geo_tiff_bounds

[-34.27734375, 116.103515625, -34.365234375, 116.19140625]

In [17]:
pool = Pool(3)
with pool:
    list(tqdm(pool.imap(get_bounds,geo_tiff_list[:3]), total=3))

  0%|          | 0/3 [00:00<?, ?it/s]

In [26]:
%%time
# use multiprocessing to extract raster bounds
# interesting when using a small number of geotiffs its is slightly quicker to just run this as a loop
# however once you get over a 100 or so this method is much quicker
with Pool() as pool:
    geo_tiff_bounds = list(tqdm(pool.imap(get_bounds, geo_tiff_list), total=len(geo_tiff_list)))
# convert list to np array
geo_tiff_bounds = np.array(geo_tiff_bounds)
# grab max extents
bound_y_max = float(geo_tiff_bounds[:,0].max()) #top
bound_x_min = float(geo_tiff_bounds[:,1].min()) #left
bound_y_min = float(geo_tiff_bounds[:,2].min()) #bottom
bound_x_max = float(geo_tiff_bounds[:,3].max()) #right

  0%|          | 0/17 [00:00<?, ?it/s]

Wall time: 782 ms


In [21]:
# open one image to get the pixel size
test_raster = gdal.Open(geo_tiff_list[0])
test_raster_gt =test_raster.GetGeoTransform()
pixel_size_x = test_raster_gt[1]
pixel_size_y = test_raster_gt[5]
print(pixel_size_x,pixel_size_y)

4.487879136029412e-06 -4.487879136029412e-06


In [22]:
x_move = pixel_size_x*(tile_size_px[0]-tile_oxerlap_px)
y_move = pixel_size_y*(tile_size_px[1]-tile_oxerlap_px)

x_tile_size = pixel_size_x*tile_size_px[0]
y_tile_size = pixel_size_y*tile_size_px[1]
print(x_move,y_move)

0.003814697265625 -0.003814697265625


In [23]:
number_of_cols = math.ceil(abs((bound_x_max-bound_x_min)/x_move))
number_of_cols

139

In [24]:
number_of_rows = math.ceil(abs((bound_y_max-bound_y_min)/y_move))
number_of_rows

93

In [33]:
%%time
# probs worth trying to speed this up with PyGEOS
tile_coord_list = []

tile_top = bound_y_max

for row in tqdm(range(0,number_of_rows)):
    tile_top = bound_y_max + y_move*row
    tile_bottom = tile_top + y_tile_size
    
    tile_left = bound_x_min
    
    for col in range(0,number_of_cols):
        tile_left = bound_x_min + col*x_move
        tile_right = tile_left + x_tile_size        
        polygon = {'geometry':geometry.Polygon([[tile_left, tile_top], [tile_right, tile_top], [tile_right, tile_bottom], [tile_left, tile_bottom]])}
        tile_coord_list.append(polygon)



  0%|          | 0/93 [00:00<?, ?it/s]

Wall time: 178 ms


In [29]:
polygon_tiles = gpd.GeoDataFrame(tile_coord_list,geometry='geometry',crs='EPSG:4326')

In [31]:
if not wos:
    polygon_tiles.to_file("/media/nick/2TB Working 2/Projects/GeoTIFF_tiler_data/output.gpkg", driver="GPKG")
else:
    polygon_tiles.to_file(r'{}'.format(os.getcwd())+r'\\test_imagery\\VIVID_Landgate_20190910_112102323\\output.gpkg', driver="GPKG")    

