In [3]:
import os
import numpy as np
import pandas as pd
import netCDF4 as nc
import geopandas as gpd
from tqdm import tqdm
import rasterio
from osgeo import gdal, osr
from rasterstats import zonal_stats

# 1. Convert NC to raster

In [2]:
def nc_to_geotiff(read_path, save_path):
    nc_file = nc.Dataset(read_path)
    lat = nc_file.variables['latitude'][:]
    lon = nc_file.variables['longitude'][:]
    PM25 = np.asanyarray(nc_file.variables['PM25'])

    # get the spatial range of the netcdf
    Lonmin, Latmax, Lonmax, Latmin = [lon.min(), lat.max(), lon.max(), lat.min()]

    # calculate the resolution
    Num_lat = len(lat)
    Num_lon = len(lon)
    Lat_res = (Latmax - Latmin) / (float(Num_lat) - 1)
    Lon_res = (Lonmax - Lonmin) / (float(Num_lon) - 1)

    # create the tif file and save it into the virtual file system in memory
    driver = gdal.GetDriverByName('GTiff')
    out_tif = driver.Create('/vsimem/PM25.tif', Num_lon, Num_lat, 1, gdal.GDT_Float32)

    # set the spatial range of the tif file
    geotransform = (Lonmin, Lon_res, 0.0, Latmax, 0.0, -Lat_res)
    out_tif.SetGeoTransform(geotransform)

    # set the projection system
    prj = osr.SpatialReference()
    prj.ImportFromEPSG(4326)
    out_tif.SetProjection(prj.ExportToWkt())

    # check is the data are flipped and correct the data if yes
    if lat[0] <= lat[-1]: 
        PM25 = PM25[::-1]
    else:
        pass

    # write data into tif and close the file
    out_tif.GetRasterBand(1).WriteArray(PM25)
    # transform the projection to 3035 and save
    gdal.Warp(save_path, out_tif, srcSRS='EPSG:4326', dstSRS='EPSG:3035')
    out_tif.FlushCache() 
    out_tif = None

In [3]:
read_folder = '/data/xiang/1-Data/PM2.5/netcdf'
save_folder = '/data/xiang/1-Data/PM2.5/raster'

if __name__ == '__main__':
    for netc in tqdm(os.listdir(read_folder)):
        if netc.endswith('.nc'):
            read_path = read_folder + r'/' + netc
            save_path = save_folder + r'/' + netc[:-2] + 'tif'
            nc_to_geotiff(read_path, save_path)

  0%|                                                    | 0/25 [00:00<?, ?it/s]ERROR 1: Too many points (529 out of 529) failed to transform, unable to compute output bounds.
ERROR 1: Too many points (529 out of 529) failed to transform, unable to compute output bounds.
ERROR 1: Too many points (529 out of 529) failed to transform, unable to compute output bounds.
ERROR 1: Too many points (529 out of 529) failed to transform, unable to compute output bounds.
ERROR 1: Too many points (529 out of 529) failed to transform, unable to compute output bounds.
ERROR 1: Too many points (529 out of 529) failed to transform, unable to compute output bounds.
ERROR 1: Too many points (529 out of 529) failed to transform, unable to compute output bounds.
ERROR 1: Too many points (529 out of 529) failed to transform, unable to compute output bounds.
ERROR 1: Too many points (529 out of 529) failed to transform, unable to compute output bounds.
ERROR 1: Too many points (529 out of 529) failed to tran

# 2. Zonal statistics

In [None]:
nuts_folder = '/data/xiang/1-Data/NUTS/nuts version'
read_folder = '/data/xiang/1-Data/PM2.5/raster'
df_comb = None
for nuts_file in os.listdir(nuts_folder):
    if nuts_file.endswith('.shp'):
        nuts_file = nuts_folder + r'/' + nuts_file
        nuts = gpd.read_file(nuts_file)
        for PM_file in tqdm(os.listdir(read_folder)):
            read_file = read_folder + r'/' + PM_file
            year = PM_file.split('.')[-3][:4]
            # link the zonal statistics table to the boundary index
            zs_temp = pd.DataFrame(zonal_stats(nuts_file, read_file))['mean']
            df_temp = pd.merge(nuts[['NUTS_ID']], zs_temp, left_index=True, right_index=True)
            df_temp.columns = ['geo','ObsValue']
            df_temp['Indicator'] = 'PM2.5'
            df_temp['freq'] = '1 year'
            df_temp['ObsTime'] = year
            df_temp['unit'] = 'ug/m3'
            df_comb = df_temp if df_comb is None else pd.concat([df_comb, df_temp])
            # save the table
            geo_source = 'NUTS' + nuts_file.split('_')[-2]
            df_comb['geo_source'] = geo_source
            df_comb.set_index('geo').to_csv('/data/xiang/3-case studies/2-PM2.5/PM25_' + geo_source + '.csv')

100%|███████████████████████████████████████████| 25/25 [13:41<00:00, 32.87s/it]
 68%|█████████████████████████████▏             | 17/25 [13:40<05:38, 42.33s/it]