In [1]:
import os
import gc
import geopandas as gpd
import numpy as np
import pandas as pd
from tqdm import tqdm
from osgeo import gdal
from rasterstats import zonal_stats

In [2]:
res = 500
GHS = 'GHS_BUILT_V'
indicator_list = {'Total':'Cubic metres of total built-up volume',
                  'NRES':'Cubic metres of NRES built-up volume allocated to dominant non-residential'}

for year in tqdm(range(1975, 2035, 5)):
    for Character in tqdm(['Total','NRES']):
        read_folder = r'C:\1-Data' + '\\' + GHS + '\\' + Character + '\\' + str(year)
        raster_list = [read_folder+'\\'+i for i in os.listdir(read_folder) if i.endswith('.tif')]
        # Set save path for the merged raster 
        save_folder = r'C:\1-Data\GHS_BUILT_eu' + '\\' + GHS + '\\' + Character 
        if not os.path.exists(save_folder):
            os.makedirs(save_folder)
        save_path = save_folder + '\\' + 'GHS_BUILT_C_' + str(year) + '_'+ Character + '_' + str(res) + '.tif'
        # Set resampling method to average, because this is a dataset with continuous values
        gdal.Warp(save_path, raster_list, format='GTiff', dstSRS='EPSG:3035', xRes=res, yRes=res, 
                  outputType=gdal.GDT_Float32, resampleAlg='average', options=['COMPRESS=LZW'])
    
        # Zonal statistics looping all years of NUTS regions
        nuts_folder = r'C:\1-Data\NUTS'
        for nuts_file in tqdm([i for i in os.listdir(nuts_folder) if i.endswith('.shp')]):
            nuts_path = nuts_folder + '\\' + nuts_file
            nuts = gpd.read_file(nuts_path)
            # Conduct zonal statistics and add other needed information
            zs_temp = pd.DataFrame(zonal_stats(nuts_path, save_path, stats="mean"))
            df_temp = pd.merge(nuts[['NUTS_ID']], zs_temp, left_index=True, right_index=True)
            df_temp.columns = ['geo','ObsValue']
            geo_source = 'NUTS' + nuts_file.split('_')[-2]
            df_temp['geo_source'] = geo_source
            df_temp['Indicator'] = indicator_list[Character]
            df_temp['freq'] = '5 years'
            df_temp['ObsTime'] = str(year)
            df_temp['unit'] = 'm3'
            # Save the table
            df_temp.set_index('geo').to_csv(r'C:\2-Case studies\GHS' + '\\' + GHS + '_' + Character + '_' + str(year) + '_' + geo_source + '.csv') 

  0%|                                                                                           | 0/12 [00:00<?, ?it/s]
  0%|                                                                                            | 0/2 [00:00<?, ?it/s][A

  0%|                                                                                            | 0/6 [00:00<?, ?it/s][A[A

 17%|██████████████                                                                      | 1/6 [00:49<04:06, 49.40s/it][A[A

 33%|████████████████████████████                                                        | 2/6 [02:05<04:20, 65.15s/it][A[A

 50%|██████████████████████████████████████████                                          | 3/6 [02:54<02:53, 57.91s/it][A[A

 67%|████████████████████████████████████████████████████████                            | 4/6 [03:40<01:46, 53.26s/it][A[A

 83%|██████████████████████████████████████████████████████████████████████              | 5/6 [04:31<00:52, 52.18s/it][

In [33]:
# Merge the data
read_folder = r'C:\2-Case studies\GHS'
save_folder = r'C:\2-Case studies'
for nuts in tqdm(['NUTS2003', 'NUTS2006', 'NUTS2010', 'NUTS2013', 'NUTS2016', 'NUTS2021']):
    df_comb = None
    for file in os.listdir(read_folder):
        if file.split('_')[-1] == nuts+'.csv':
            df_temp = pd.read_csv(read_folder + '\\' + file)
            df_comb = df_temp if df_comb is None else pd.concat([df_comb, df_temp])
    df_comb.to_csv(save_folder + '\\' + GHS + '_' + nuts + '.csv')
        

100%|████████████████████████████████████████████████████████████████████████████████████| 6/6 [00:04<00:00,  1.47it/s]
