In [1]:
import os
import gc
import rasterio
import geopandas as gpd
import numpy as np
import pandas as pd
from tqdm import tqdm
from rasterstats import zonal_stats
from rasterio.io import MemoryFile
from contextlib import contextmanager

In [1]:
def reclass_reproject_raster(in_path, out_path, dst_crs):
    with rasterio.open(in_path) as src:
        transform, width, height = rasterio.warp.calculate_default_transform(
            src.crs, dst_crs, src.width, src.height, *src.bounds)
        kwargs = src.meta.copy()
        kwargs.update({
            'crs': dst_crs,
            'transform': transform,
            'width': width,
            'height': height
        })

        # keep only those rasters with a lable of 23
        a = src.read(1)
        new_src = a.copy()
        new_src[np.where(a!=23)]=0
        new_src[np.where(a==23)]=1

        with rasterio.open(out_path, 'w', **kwargs) as dst:
            rasterio.warp.reproject(
                source=new_src,
                destination=rasterio.band(dst, 1),
                src_transform=src.transform,
                src_crs=src.crs,
                dst_transform=transform,
                dst_crs=dst_crs,
                resampling=rasterio.warp.Resampling.nearest)


In [None]:
# Reclassify and reproject all rasters
read_root = r'C:\1-Data\GHS_SMOD'
out_folder = r'C:\1-Data\GHS_SMOD_re'
dst_crs = 'EPSG:3035'
for folder in tqdm(os.listdir(read_root)):
    for pop_file in tqdm(os.listdir(read_root + '\\' + folder)):
        if pop_file.endswith('.tif'):
            in_path = read_root + '\\' + folder + '\\' + pop_file
            out_path = out_folder + '\\' + pop_file
            reclass_reproject_raster(in_path, out_path, dst_crs)
            

In [None]:
# zonal statistics looping all years of NUTS regions
nuts_folder = r'C:\1-Data\NUTS'
for nuts_file in os.listdir(nuts_folder):
    if nuts_file.endswith('.shp'):
        nuts_path = nuts_folder + '\\' + nuts_file
        nuts = gpd.read_file(nuts_path)
        year = nuts_file.split('_')[-2]
        df_comb = None
        for tif in tqdm(os.listdir(out_folder)):
            if tif.endswith('.tif'):
                zs_temp = pd.DataFrame(zonal_stats(nuts_path, out_folder + '\\' + tif))['count']
                df_temp = pd.merge(nuts[['NUTS_ID']], zs_temp, left_index=True, right_index=True)
                df_temp.columns = ['geo','ObsValue']
                df_temp['Indicator'] = 'DENSE URBAN CLUSTER GRID CELL'
                df_temp['freq'] = '5 year'
                df_temp['ObsTime'] = year
                df_temp['unit'] = 'km2'
                df_comb = df_temp if df_comb is None else pd.concat([df_comb, df_temp])
        geo_source = 'NUTS' + year
        df_comb['geo_source'] = geo_source
        df_comb.set_index('geo').to_csv(r'C:\Users\DemSc\Documents\GitHub\MapIneq\src\data-wrangling\Xiang\1-case studies\GHS_SMOD\GHS_SMOD_' + geo_source + '.csv')    