In [1]:
import os
import gc
import rasterio
import geopandas as gpd
import numpy as np
import pandas as pd
from tqdm import tqdm
from rasterstats import zonal_stats

In [35]:
def reclass_reproject_raster(in_path, out_path, dst_crs, settlement_class=23):
    with rasterio.open(in_path) as src:
        transform, width, height = rasterio.warp.calculate_default_transform(
            src.crs, dst_crs, src.width, src.height, *src.bounds)
        kwargs = src.meta.copy()
        kwargs.update({
            'crs': dst_crs,
            'transform': transform,
            'width': width,
            'height': height
        })

        # keep only those rasters with a lable of settlement_class
        a = src.read(1)
        new_src = a.copy()
        new_src[np.where(a!=settlement_class)]=0
        new_src[np.where(a==settlement_class)]=1

        with rasterio.open(out_path, 'w', **kwargs) as dst:
            rasterio.warp.reproject(
                source=new_src,
                destination=rasterio.band(dst, 1),
                src_transform=src.transform,
                src_crs=src.crs,
                dst_transform=transform,
                dst_crs=dst_crs,
                resampling=rasterio.warp.Resampling.nearest)


In [17]:
# Processing GHS-SMOD for NUTs
settlement_dict = {30: 'URBAN CENTRE GRID CELL',
                   23: 'DENSE URBAN CLUSTER GRID CELL',
                   22: 'SEMI-DENSE URBAN CLUSTER GRID CELL',
                   21: 'SUBURBAN OR PERI-URBAN GRID CELL',
                   13: 'RURAL CLUSTER GRID CELL',
                   12: 'LOW DENSITY RURAL GRID CELL',
                   11: 'VERY LOW DENSITY RURAL GRID CELL',
                   10: 'WATER GRID CELL'}

for settlement_class in tqdm(settlement_dict.keys()):
    read_root = r'C:\1-Data\GHS_SMOD\raw data'
    out_folder = r'C:\1-Data\GHS_SMOD\reclassify_nuts' + '\\' + str(settlement_class)
    dst_crs = 'EPSG:3035'
    indicator = settlement_dict[settlement_class]
    if not os.path.exists(out_folder):
        os.makedirs(out_folder)

    '''
    # Reproject and reclass only the files not processed before. This is to avoid replicate work upon interruption and rerunning the script
    for folder in tqdm([i for i in os.listdir(read_root) if i not in os.listdir(out_folder)]):
        for pop_file in os.listdir(read_root + '\\' + folder):
            if pop_file.endswith('.tif'):
                in_path = read_root + '\\' + folder + '\\' + pop_file
                out_path = out_folder + '\\' + pop_file
                reclass_reproject_raster(in_path, out_path, dst_crs)
    '''

    # Zonal statistics looping all years of NUTS regions
    nuts_folder = r'C:\1-Data\NUTS'
    for nuts_file in tqdm(os.listdir(nuts_folder)):
        if nuts_file.endswith('.shp'):
            nuts_path = nuts_folder + '\\' + nuts_file
            nuts = gpd.read_file(nuts_path)
            nuts_year = nuts_file.split('_')[-2]
            df_comb = None
            for tif in os.listdir(out_folder):
                if tif.endswith('.tif'):
                    zs_temp = pd.DataFrame(zonal_stats(nuts_path, out_folder + '\\' + tif, stats="sum"))
                    df_temp = pd.merge(nuts[['NUTS_ID']], zs_temp, left_index=True, right_index=True)
                    df_temp.columns = ['geo','ObsValue']
                    df_temp['Indicator'] = indicator
                    df_temp['freq'] = '5 year'
                    df_temp['ObsTime'] = tif.split('_')[2][1:]
                    df_temp['unit'] = 'km2'
                    df_comb = df_temp if df_comb is None else pd.concat([df_comb, df_temp])
            geo_source = 'NUTS' + nuts_year
            df_comb['geo_source'] = geo_source
            df_comb.set_index('geo').to_csv(r'C:\2-Case studies\GHS_SMOD\GHS_SMOD_' + geo_source + '_' + indicator + '.csv') 


  0%|                                                                                            | 0/8 [00:00<?, ?it/s]
  0%|                                                                                           | 0/31 [00:00<?, ?it/s][A
 16%|█████████████▍                                                                     | 5/31 [07:00<36:25, 84.04s/it][A
 32%|██████████████████████████▏                                                      | 10/31 [17:56<39:07, 111.80s/it][A
 48%|███████████████████████████████████████▏                                         | 15/31 [25:50<27:44, 104.05s/it][A
 65%|████████████████████████████████████████████████████▉                             | 20/31 [32:31<17:21, 94.66s/it][A
 81%|██████████████████████████████████████████████████████████████████▏               | 25/31 [40:37<09:33, 95.55s/it][A
100%|██████████████████████████████████████████████████████████████████████████████████| 31/31 [49:01<00:00, 94.89s/it][A
 12%|██████████    

KeyboardInterrupt: 