In [1]:
import os
import geopandas as gpd
import sys
wos = 'win' in sys.platform
if wos:
    from osgeo import gdal
    from multiprocess import Pool
    from funcs import get_bounds
else:
    import gdal
    from multiprocessing import Pool
import numpy as np
from numba import jit
from tqdm.autonotebook import tqdm
import math
from shapely import geometry

  


In [100]:
tile_size_px = [1000,1000] #x,y
tile_oxerlap_px = 150
output_downsample = 0.5
output_folder = r'/media/nick/2TB Working 2/Projects/GeoTIFF_tiler_data/tiles'
geotiff_folder = r'/media/nick/2TB Working 2/Projects/GeoTIFF_tiler_data/VIVID_Landgate_20190910_112102323' if not wos else r'{}'.format(os.getcwd())+r'\\test_imagery\\VIVID_Landgate_20190910_112102323'
input_file_ext = '.tif'

In [3]:
# you must have v0.8 or up to use use_pygeos
import pkg_resources
pkg_resources.get_distribution('geopandas').version

'0.9.0'

In [4]:
# !conda install -c conda-forge pygeos
gpd.options.use_pygeos = True
print(gpd.options.use_pygeos)

True


In [5]:
%%time
# search folder and all sub folders for 'input_file_ext' files
geo_tiff_list = []
for root, dirs, files in os.walk(geotiff_folder):
    for file in files:
        if file.endswith(input_file_ext):
            geo_tiff_list.append(os.path.join(root, file))
            
len(geo_tiff_list) 

CPU times: user 3.33 ms, sys: 283 µs, total: 3.62 ms
Wall time: 1.86 ms


17

In [6]:
if not wos:
    def get_bounds(tif_path):
    #     open file
        data = gdal.Open(tif_path)
    #     grab bounds
        geoTransform = data.GetGeoTransform()
        left = geoTransform[0]
        top = geoTransform[3]
        right = left + geoTransform[1] * data.RasterXSize
        bottom = top + geoTransform[5] * data.RasterYSize
        geo_tiff_bounds_dict = {'top':top,'left':left,'bottom':bottom,'right':right,'tif_path':tif_path}
        return geo_tiff_bounds_dict

In [7]:
geo_tiff_bounds =get_bounds(geo_tiff_list[0])
geo_tiff_bounds

{'top': -34.27734375,
 'left': 116.455078125,
 'bottom': -34.365234375,
 'right': 116.54296875,
 'tif_path': '/media/nick/2TB Working 2/Projects/GeoTIFF_tiler_data/VIVID_Landgate_20190910_112102323/VIVID_Landgate_20190910_112102323103.tif'}

In [8]:
pool = Pool(3)
with pool:
    list(tqdm(pool.imap(get_bounds,geo_tiff_list[:3]), total=3))

  0%|          | 0/3 [00:00<?, ?it/s]

In [9]:
%%time
for i in geo_tiff_list:
    get_bounds(i)

CPU times: user 2.85 ms, sys: 0 ns, total: 2.85 ms
Wall time: 2.32 ms


In [10]:
%%time
# use multiprocessing to extract raster bounds
# interesting when using a small number of geotiffs its is slightly quicker to just run this as a loop
# however once you get over a 100 or so this method is much quicker
with Pool() as pool:
    geo_tiff_bounds = list(tqdm(pool.imap(get_bounds, geo_tiff_list), total=len(geo_tiff_list)))

# make new array with only bounds 
pure_bounds = []
for geo_tif_bounds in geo_tiff_bounds:
    pure_bounds.append([geo_tif_bounds['top'],geo_tif_bounds['left'],geo_tif_bounds['bottom'],geo_tif_bounds['right']])
# convert into numpy array
pure_bounds_np = np.array(pure_bounds)
# # grab max extents
bound_y_max = float(pure_bounds_np[:,0].max()) #top
bound_x_min = float(pure_bounds_np[:,1].min()) #left
bound_y_min = float(pure_bounds_np[:,2].min()) #bottom
bound_x_max = float(pure_bounds_np[:,3].max()) #right

  0%|          | 0/17 [00:00<?, ?it/s]

CPU times: user 27.2 ms, sys: 37.2 ms, total: 64.4 ms
Wall time: 169 ms


In [11]:
# open one image to get the pixel size
test_raster = gdal.Open(geo_tiff_list[0])
test_raster_gt =test_raster.GetGeoTransform()
pixel_size_x = test_raster_gt[1]
pixel_size_y = test_raster_gt[5]
print(pixel_size_x,pixel_size_y)

4.487879136029412e-06 -4.487879136029412e-06


In [12]:
x_move = pixel_size_x*(tile_size_px[0]-tile_oxerlap_px)
y_move = pixel_size_y*(tile_size_px[1]-tile_oxerlap_px)

x_tile_size = pixel_size_x*tile_size_px[0]
y_tile_size = pixel_size_y*tile_size_px[1]
print(x_move,y_move)

0.003814697265625 -0.003814697265625


In [13]:
# calc the number of cols so we can avoid using while loops
number_of_cols = math.ceil(abs((bound_x_max-bound_x_min)/x_move))
number_of_cols

139

In [14]:
# calc the number of rows so we can avoid using while loops
number_of_rows = math.ceil(abs((bound_y_max-bound_y_min)/y_move))
number_of_rows

93

In [20]:
geo_tiff_bounds[0]['top']

-34.27734375

In [47]:
# will return a list of geotiffs which intersect 
def intersect_tile_with_geotiffs(tile_dict,geo_tiff_bounds):
#     loop over each geotiff
    intersecting_geotiffs = []
    
    for geo_bounds in geo_tiff_bounds:
#         check is tile top or bottom is inside geotiff
        if (geo_bounds['top'] > tile_dict['top'] > geo_bounds['bottom'] or 
            geo_bounds['top'] > tile_dict['bottom'] > geo_bounds['bottom']):
#         check if left or right are inside a geotiff
            if geo_bounds['right'] > tile_dict['left'] > geo_bounds['left']:
                intersecting_geotiffs.append(geo_bounds['tif_path'])
            if geo_bounds['right'] > tile_dict['right'] > geo_bounds['left']:
                intersecting_geotiffs.append(geo_bounds['tif_path'])
    return intersecting_geotiffs

In [52]:
# will take tile bounds and only export them if they fall within a geotiff
# this is called row by row by pool below
def make_polygons(row):
    tile_polygon_list = []
    tile_top = bound_y_max + y_move*row
    tile_bottom = tile_top + y_tile_size
    tile_left = bound_x_min

    for col in range(0,number_of_cols):
        tile_left = bound_x_min + col*x_move
        tile_right = tile_left + x_tile_size
        tile_dict = {'top':tile_top,'left':tile_left,'bottom':tile_bottom,'right':tile_right}
        tile_list = np.array([tile_top,tile_left,tile_bottom,tile_right])
#         check if valid tile
        intersect = intersect_tile_with_geotiffs(tile_dict,geo_tiff_bounds)
        if len(intersect) > 0:
            polygon = {'geometry':geometry.Polygon([[tile_left, tile_top], [tile_right, tile_top], [tile_right, tile_bottom], [tile_left, tile_bottom]]),
                      'intersect':intersect}
            tile_polygon_list.append(polygon)
    return tile_polygon_list


In [53]:
%%time
tile_polygon_list = []
for i in range(0,number_of_rows):
    tile_polygon_list.append(make_polygons(i))

CPU times: user 157 ms, sys: 4.17 ms, total: 162 ms
Wall time: 155 ms


In [76]:
%%time
# multiprocess making polygons
with Pool() as pool:
    tile_polygon_list = pool.map(make_polygons, range(0,number_of_rows))

# this is returned as a list of list so it must be flattened
tile_polygon_list = list(np.concatenate(tile_polygon_list).ravel())

CPU times: user 69.5 ms, sys: 44.6 ms, total: 114 ms
Wall time: 157 ms


In [77]:
%%time
#  convert into geodataframe
polygon_tiles_gpd = gpd.GeoDataFrame(tile_polygon_list,geometry='geometry',crs='EPSG:4326')
del polygon_tiles_gpd['intersect']

CPU times: user 18 ms, sys: 3.67 ms, total: 21.6 ms
Wall time: 19.8 ms


In [78]:
polygon_tiles_gpd

Unnamed: 0,geometry
0,"POLYGON ((116.01562 -34.10156, 116.02011 -34.1..."
1,"POLYGON ((116.01944 -34.10156, 116.02393 -34.1..."
2,"POLYGON ((116.02325 -34.10156, 116.02774 -34.1..."
3,"POLYGON ((116.02707 -34.10156, 116.03156 -34.1..."
4,"POLYGON ((116.03088 -34.10156, 116.03537 -34.1..."
...,...
9311,"POLYGON ((116.52679 -34.45251, 116.53128 -34.4..."
9312,"POLYGON ((116.53061 -34.45251, 116.53510 -34.4..."
9313,"POLYGON ((116.53442 -34.45251, 116.53891 -34.4..."
9314,"POLYGON ((116.53824 -34.45251, 116.54273 -34.4..."


In [79]:
if not wos:
    polygon_tiles_gpd.to_file("/media/nick/2TB Working 2/Projects/GeoTIFF_tiler_data/output.gpkg", driver="GPKG")
else:
    polygon_tiles_gpd.to_file(r'{}'.format(os.getcwd())+r'\\test_imagery\\VIVID_Landgate_20190910_112102323\\output.gpkg', driver="GPKG")    



In [80]:
%%time
geo_tiff_with_tiles = []
# make a list of which tiles are within which geotiffs
for geo_tiff in geo_tiff_list:
    tiles_inside_geo_tiff = []
    for tile in tile_polygon_list:
        if geo_tiff in tile['intersect']:
            tiles_inside_geo_tiff.append(tile['geometry'])
    geo_tiff_with_tiles.append([geo_tiff,tiles_inside_geo_tiff])   

CPU times: user 21.6 ms, sys: 154 µs, total: 21.7 ms
Wall time: 21.1 ms


In [101]:
for geotiff in geo_tiff_with_tiles:
    geotiff_open = gdal.Open(geotiff[0])
    for tile in geotiff[1]:
        top = list(tile.bounds)[3]
        bottom = list(tile.bounds)[1]
        left = list(tile.bounds)[0]
        right =list(tile.bounds)[2]
        
        save_path = os.path.join(output_folder,'some_name.tif')
        tile_clip = gdal.Translate(save_path, geotiff_open, projWin = [left,top,right,bottom])
        tile_clip = None
        break
    break
    