In [1]:
import os
import rasterio
import geopandas as gpd
import pandas as pd
from shapely.geometry import box
from multiprocessing import Pool
from tqdm import tqdm

In [2]:
input_dir = '/home/nick/Desktop/test'
file_types = ('.ers','.tif')
export_dir = os.path.dirname(input_dir)
export_file_name = os.path.basename(input_dir)

In [3]:
raster_list = []

for root, dirs, files in os.walk(input_dir):
    for file in files:
        if file.endswith(file_types):
            raster_list.append(os.path.join(root, file))
            
len(raster_list) 

5

In [4]:
def index(raster):
    try:
        dataset = rasterio.open(raster)
        proj = dataset.crs.to_string()
        bounds = dataset.bounds
        geom = box(*bounds)
        raster_name = os.path.basename(raster)
        return({'name':raster_name,'path':raster,'crs':proj,'geometry':geom})
    except:
        print('could not open',raster)

In [5]:
with Pool() as p:
    polygons = list(tqdm(p.imap(index, raster_list), total=len(raster_list)))

100%|██████████| 5/5 [00:00<00:00, 77.18it/s]


In [6]:
all_polygons = pd.DataFrame(polygons)
crs_list = all_polygons['crs'].unique()
print(crs_list)
all_polygons.head()

['EPSG:28350']


Unnamed: 0,name,path,crs,geometry
0,South West 50m DEM (another copy).tif,/home/nick/Desktop/test/South West 50m DEM (an...,EPSG:28350,"POLYGON ((1113190 6105440, 1113190 6961000, 21..."
1,South West 50m DEM (4th copy).tif,/home/nick/Desktop/test/South West 50m DEM (4t...,EPSG:28350,"POLYGON ((1113190 6105440, 1113190 6961000, 21..."
2,South West 50m DEM (copy).tif,/home/nick/Desktop/test/South West 50m DEM (co...,EPSG:28350,"POLYGON ((1113190 6105440, 1113190 6961000, 21..."
3,South West 50m DEM.tif,/home/nick/Desktop/test/South West 50m DEM.tif,EPSG:28350,"POLYGON ((1113190 6105440, 1113190 6961000, 21..."
4,South West 50m DEM (3rd copy).tif,/home/nick/Desktop/test/South West 50m DEM (3r...,EPSG:28350,"POLYGON ((1113190 6105440, 1113190 6961000, 21..."


In [7]:
for crs in tqdm(crs_list):
    one_crs_df = all_polygons[all_polygons['crs']==crs]
    one_crs_gdf = gpd.GeoDataFrame(one_crs_df,crs=crs) #crs={'init' :crs}
    save_name = os.path.join(export_dir,(export_file_name+" crs "+crs.split(':')[1]+'.gpkg'))
    one_crs_gdf.to_file(save_name, driver ="GPKG")
    print(save_name)

100%|██████████| 1/1 [00:00<00:00, 24.25it/s]

/home/nick/Desktop/test crs 28350.gpkg



