In [1]:
# import needed libraries
import os
BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath('__file__')))

from pathlib import Path
import glob

import xarray as xr
import rioxarray as rxr
import geopandas as gpd

In [2]:
nc_folder = f'{BASE_DIR}/geos'
nc_folder

'/Users/biplovbhandari/UAH/GRA_Work/AQ_Downscaling/geos'

In [3]:
output_path = f'{BASE_DIR}/forecast_data_thailand'
Path(output_path).mkdir(parents=True, exist_ok=True)
output_path

'/Users/biplovbhandari/UAH/GRA_Work/AQ_Downscaling/forecast_data_thailand'

In [4]:
nc_files = glob.glob(f'{nc_folder}/*.nc')
nc_files.sort()
len(nc_files)

551

In [18]:
# 97.031250000,105.806600545,5.366368082,20.625000000

bbox = gpd.read_file(f'{BASE_DIR}/BBOX/ThailandBBOX_final_upscaled.shp')
print(bbox.to_wkt())

       MINX      MINY        MAXX    MAXY        CNTX       CNTY        AREA  \
0  97.03125  5.366368  105.806601  20.625  101.418925  12.995684  133.899844   

       PERIM     HEIGHT     WIDTH  \
0  48.067965  15.258632  8.775351   

                                            geometry  
0  POLYGON ((97.03125 5.366368081767842, 97.03125...  


In [6]:
completed = 0

for j, nc_file in enumerate(nc_files):
    completed += 1
    if j % 10 == 0:
        print(f'Completed {completed} out of {len(nc_files)}: {round(completed / len(nc_files) * 100, 2)}%')

    try:
        nc = xr.open_dataset(f'{nc_file}')
    except:
        print(f'No data found in the file: {nc_file}')

    pm25 = nc.PM25
    # the forecast are for the next 3 days with 3 hours interval (i.e. 24/3 * 3 = 24 datetime)
    # we will extract for the same day (i.e. the next 8 datetime)
    for i in range(8):
        hour_i = pm25.isel(time=i)
        str_rep = str(hour_i.time.dt.strftime('%Y-%m-%d %H-%M-%S').values)
        str_rep = 'T'.join(str_rep.split(' '))
        year = str_rep[:10]
        Path(f'{output_path}/{year}').mkdir(parents=True, exist_ok=True)
        hour_i = hour_i.rename({'lat': 'y', 'lon': 'x'})
        hour_i = hour_i.rio.write_crs('epsg:4326')
        clipped = hour_i.rio.clip(bbox.geometry, bbox.crs, drop=True, invert=False)
        clipped.rio.to_raster(f'{output_path}/{year}/{str_rep}.tif')


Completed 1 out of 551: 0.18%
Completed 11 out of 551: 2.0%
Completed 21 out of 551: 3.81%
Completed 31 out of 551: 5.63%
Completed 41 out of 551: 7.44%
Completed 51 out of 551: 9.26%
Completed 61 out of 551: 11.07%
Completed 71 out of 551: 12.89%
Completed 81 out of 551: 14.7%
Completed 91 out of 551: 16.52%
Completed 101 out of 551: 18.33%
Completed 111 out of 551: 20.15%
Completed 121 out of 551: 21.96%
Completed 131 out of 551: 23.77%
Completed 141 out of 551: 25.59%
Completed 151 out of 551: 27.4%
Completed 161 out of 551: 29.22%
Completed 171 out of 551: 31.03%
Completed 181 out of 551: 32.85%
Completed 191 out of 551: 34.66%
Completed 201 out of 551: 36.48%
Completed 211 out of 551: 38.29%
No data found in the file: /Users/biplovbhandari/UAH/GRA_Work/AQ_Downscaling/geos/20200716.nc
Completed 221 out of 551: 40.11%
Completed 231 out of 551: 41.92%
Completed 241 out of 551: 43.74%
Completed 251 out of 551: 45.55%
Completed 261 out of 551: 47.37%
No data found in the file: /Users/b