In [1]:
import os
import gc
import rasterio
import geopandas as gpd
import numpy as np
import pandas as pd
from tqdm import tqdm
from osgeo import gdal
from rasterstats import zonal_stats

In [2]:
res = 200
for Character in tqdm(['MSZ','FUN']):
    read_folder = r'D:\0-data\GHS_BUILT_C' + '\\' + Character
    raster_list = []
    # generate a list of all raster tiles
    for file in os.listdir(read_folder):
        if file.endswith('.tif'):
            raster_list.append(read_folder + '\\' + file)
    # merge the raster tiles into one
    save_folder = r'D:\0-data\GHS_BUILT_eu\GHS_BUILT_C' + '\\' + Character
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)
    save_path = save_folder + '\\' + 'GHS_BUILT_C_2018_'+ Character + '_' + str(res) + '.tif'
    gdal.Warp(save_path, raster_list, format='GTiff', dstSRS='EPSG:3035', xRes=res, yRes=res, outputType=gdal.GDT_Byte, options=['COMPRESS=LZW'])

100%|████████████████████████████████████████████████████████████████████████████████████| 2/2 [03:15<00:00, 97.86s/it]


In [3]:
def reclass_reproject_raster(in_path, out_path, dst_crs, settlement_class):
    with rasterio.open(in_path) as src:
        transform, width, height = rasterio.warp.calculate_default_transform(
            src.crs, dst_crs, src.width, src.height, *src.bounds)
        kwargs = src.meta.copy()
        kwargs.update({
            'crs': dst_crs,
            'transform': transform,
            'width': width,
            'height': height
        })

        # keep only those rasters with a lable of settlement_class
        a = src.read(1)
        new_src = a.copy()
        new_src[np.where(a!=settlement_class)]=0
        new_src[np.where(a==settlement_class)]=1

        with rasterio.open(out_path, 'w', **kwargs) as dst:
            rasterio.warp.reproject(
                source=new_src,
                destination=rasterio.band(dst, 1),
                src_transform=src.transform,
                src_crs=src.crs,
                dst_transform=transform,
                dst_crs=dst_crs,
                resampling=rasterio.warp.Resampling.nearest)

In [14]:
# Processing GHS-C-MSZ for NUTs
settlement_dict = {1:'MSZ, open spaces, low vegetation surfaces NDVI <= 0.3',
                   2:'MSZ, open spaces, medium vegetation surfaces 0.3 < NDVI <=0.5',
                   3:'MSZ, open spaces, high vegetation surfaces NDVI > 0.5',
                   4:'MSZ, open spaces, water surfaces LAND < 0.5',
                   5:'MSZ, open spaces, road surfaces',
                   11:'MSZ, built spaces, residential, building height <= 3m',
                   12:'MSZ, built spaces, residential, 3m < building height <= 6m',
                   13:'MSZ, built spaces, residential, 6m < building height <= 15m',
                   14:'MSZ, built spaces, residential, 15m < building height <= 30m',
                   15:'MSZ, built spaces, residential, building height > 30m',
                   21:'MSZ, built spaces, non-residential, building height <= 3m',
                   22:'MSZ, built spaces, non-residential, 3m < building height <= 6m',
                   23:'MSZ, built spaces, non-residential, 6m < building height <= 15m',
                   24:'MSZ, built spaces, non-residential, 15m < building height <= 30m',
                   25:'MSZ, built spaces, non-residential, building height > 30m'}
Character = 'MSZ'

for settlement_class in tqdm(settlement_dict.keys()):
    read_root = r'D:\0-data\GHS_BUILT_eu\GHS_BUILT_C' + '\\' + Character
    out_folder = read_root + '\\' + str(settlement_class)
    dst_crs = 'EPSG:3035'
    indicator = settlement_dict[settlement_class]
    if not os.path.exists(out_folder):
        os.makedirs(out_folder)
    
    file = 'GHS_BUILT_C_2018_'+ Character + '_' + str(res) + '.tif'
    in_path = read_root + '\\' + file
    out_path = out_folder + '\\' + file
    reclass_reproject_raster(in_path, out_path, dst_crs, settlement_class)

    # Zonal statistics looping all years of NUTS regions
    nuts_folder = r'C:\1-Data\NUTS'
    for nuts_file in tqdm(os.listdir(nuts_folder)):
        if nuts_file.endswith('.shp'):
            nuts_path = nuts_folder + '\\' + nuts_file
            nuts = gpd.read_file(nuts_path)
            nuts_year = nuts_file.split('_')[-2]
            df_comb = None
            for tif in os.listdir(out_folder):
                if tif.endswith('.tif'):
                    zs_temp = pd.DataFrame(zonal_stats(nuts_path, out_folder + '\\' + tif, stats="sum"))
                    df_temp = pd.merge(nuts[['NUTS_ID']], zs_temp, left_index=True, right_index=True)
                    df_temp.columns = ['geo','ObsValue']
                    df_temp['Indicator'] = indicator
                    df_temp['freq'] = 'NA'
                    df_temp['ObsTime'] = '2018'
                    df_temp['unit'] = 'km2'
                    df_temp['ObsValue'] = df_temp['ObsValue'].apply(lambda x: x*(res/1000)**2)
                    df_comb = df_temp if df_comb is None else pd.concat([df_comb, df_temp])
            geo_source = 'NUTS' + nuts_year
            df_comb['geo_source'] = geo_source
            df_comb.set_index('geo').to_csv(r'D:\1-Case studies\GHS\GHS_C_MSZ' + geo_source + '_' + str(settlement_class) + '.csv') 

  0%|                                                                                           | 0/15 [00:00<?, ?it/s]
  0%|                                                                                           | 0/31 [00:00<?, ?it/s][A
 16%|█████████████▍                                                                     | 5/31 [02:07<11:02, 25.49s/it][A
 32%|██████████████████████████▍                                                       | 10/31 [05:00<10:47, 30.82s/it][A
 48%|███████████████████████████████████████▋                                          | 15/31 [07:08<07:35, 28.50s/it][A
 65%|████████████████████████████████████████████████████▉                             | 20/31 [09:03<04:49, 26.29s/it][A
 81%|██████████████████████████████████████████████████████████████████▏               | 25/31 [11:09<02:35, 25.93s/it][A
100%|██████████████████████████████████████████████████████████████████████████████████| 31/31 [13:21<00:00, 25.87s/it][A
  7%|█████▎        

In [15]:
# Processing GHS-C-FUN for NUTs
settlement_dict = {0:'non-built-up',
                   1:'built-up residential (RES)',
                   2:'built-up non-residential (NRES)'}
Character = 'FUN'

for settlement_class in tqdm(settlement_dict.keys()):
    read_root = r'D:\0-data\GHS_BUILT_eu\GHS_BUILT_C' + '\\' + Character
    out_folder = read_root + '\\' + str(settlement_class)
    dst_crs = 'EPSG:3035'
    indicator = settlement_dict[settlement_class]
    if not os.path.exists(out_folder):
        os.makedirs(out_folder)

    file = 'GHS_BUILT_C_2018_'+ Character + '_' + str(res) + '.tif'
    in_path = read_root + '\\' + file
    out_path = out_folder + '\\' + file
    reclass_reproject_raster(in_path, out_path, dst_crs, settlement_class)

    # Zonal statistics looping all years of NUTS regions
    nuts_folder = r'C:\1-Data\NUTS'
    for nuts_file in tqdm(os.listdir(nuts_folder)):
        if nuts_file.endswith('.shp'):
            nuts_path = nuts_folder + '\\' + nuts_file
            nuts = gpd.read_file(nuts_path)
            nuts_year = nuts_file.split('_')[-2]
            df_comb = None
            for tif in os.listdir(out_folder):
                if tif.endswith('.tif'):
                    zs_temp = pd.DataFrame(zonal_stats(nuts_path, out_folder + '\\' + tif, stats="sum"))
                    df_temp = pd.merge(nuts[['NUTS_ID']], zs_temp, left_index=True, right_index=True)
                    df_temp.columns = ['geo','ObsValue']
                    df_temp['Indicator'] = indicator
                    df_temp['freq'] = 'NA'
                    df_temp['ObsTime'] = '2018'
                    df_temp['unit'] = 'km2'
                    df_temp['ObsValue'] = df_temp['ObsValue'].apply(lambda x: x*(res/1000)**2)
                    df_comb = df_temp if df_comb is None else pd.concat([df_comb, df_temp])
            geo_source = 'NUTS' + nuts_year
            df_comb['geo_source'] = geo_source
            df_comb.set_index('geo').to_csv(r'D:\1-Case studies\GHS\GHS_C_' + Character + '_' + geo_source + '_' + str(settlement_class) + '.csv') 

  0%|                                                                                            | 0/3 [00:00<?, ?it/s]
  0%|                                                                                           | 0/31 [00:00<?, ?it/s][A
 16%|█████████████▍                                                                     | 5/31 [02:15<11:42, 27.03s/it][A
 32%|██████████████████████████▍                                                       | 10/31 [05:09<11:05, 31.69s/it][A
 48%|███████████████████████████████████████▋                                          | 15/31 [07:33<08:05, 30.33s/it][A
 65%|████████████████████████████████████████████████████▉                             | 20/31 [09:44<05:15, 28.69s/it][A
 81%|██████████████████████████████████████████████████████████████████▏               | 25/31 [12:07<02:51, 28.63s/it][A
100%|██████████████████████████████████████████████████████████████████████████████████| 31/31 [14:32<00:00, 28.14s/it][A
 33%|██████████████