In [1]:
import cartopy.crs as ccrs
import dask
import geopandas as gpd
import glob
import json
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import time
import xarray as xr
import yaml
import sys

In [2]:
location = {
     'Mexico City' : dict(lon=-99.0833, lat=19.4667, domain = 'CAM-22', #1
                          vmin= -5, vmax = 5, valid_t = 0.8, maxdis = 0.5, vtmin = -10, vtmax = 10),
     'Buenos Aires' : dict(lon=-58.416, lat=-34.559, domain = 'SAM-22', #2
                           vmin= -3, vmax = 3, valid_t = 0.5, maxdis = 0.5, vtmin = -10, vtmax = 10),
     'New York' : dict(lon=-74.2261, lat=40.8858, domain = 'NAM-22',#3
                       vmin= -3, vmax = 3, valid_t = 0.5, maxdis = 0.5, vtmin = -10, vtmax = 10),
     'Sydney' : dict(lon=151.01810, lat=-33.79170, domain = 'AUS-22', #4
                     vmin= -3, vmax = 3, valid_t = 0.6, maxdis = 0.5, vtmin = -10, vtmax = 10),
     'Beijing' : dict(lon=116.41, lat=39.90, domain = 'EAS-22', #5
                      vmin= -3, vmax = 3, valid_t = 0.5, maxdis = 0.5, vtmin = -10, vtmax = 10),
     'Tokyo' : dict(lon = 139.84, lat = 35.65, domain = 'EAS-22', #6
                    vmin= -3, vmax = 3, valid_t = 0.5, maxdis = 0.5, vtmin = -10, vtmax = 10),
     'Jakarta' : dict(lon = 106.81, lat = -6.2, domain = 'SEA-22', #7
                      vmin= -3, vmax = 3, valid_t = 0.35, maxdis = 1, vtmin = -10, vtmax = 10), 
     'Johannesburg' : dict(lon=28.183, lat=-25.733, domain = 'AFR-22', #8
                           vmin= -3, vmax = 3, valid_t = 0.5, maxdis = 0.5, vtmin = -10, vtmax = 10), # Pretoria center station
     'Riyadh' : dict(lon=46.73300, lat=24.7000, domain = 'WAS-22', #9
                     vmin= -3, vmax = 3, valid_t = 0.5, maxdis = 0.5, vtmin = -10, vtmax = 10), #Urban frac
     'Berlin' : dict(lon=13.4039, lat=52.4683, domain = 'EUR-11', #10
                     vmin= -3, vmax = 3, valid_t = 0.5, maxdis = 1, vtmin = -10, vtmax = 10),
     'Paris' : dict(lon=  2.35, lat=48.85, domain = 'EUR-11',  #11
                    vmin= -3, vmax = 3, valid_t = 0.5, maxdis = 2, vtmin = -10, vtmax = 10), # Problems with the city (ucdb_city)
     'London' : dict(lon= -0.13, lat=51.50, domain = 'EUR-11', #12
                     vmin= -3, vmax = 3, valid_t = 0.5, maxdis = 1, vtmin = -10, vtmax = 10),
     'Madrid' : dict(lon= -3.70, lat=40.42, domain = 'EUR-11', #13
                     vmin= -3, vmax = 3, valid_t = 0.5, maxdis = 0.5, vtmin = -10, vtmax = 10),
     'Los Angeles': dict(lon = -118.24, lat = 34.05, domain = 'NAM-22', #14
                         vmin= -3, vmax = 3, valid_t = 0.5, maxdis = 1, vtmin = -10, vtmax = 10),
     'Montreal': dict(lon = -73.56, lat = 45.50, domain = 'NAM-22', #15
                      vmin= -3, vmax = 3, valid_t = 0.5, maxdis = 0.5, vtmin = -10, vtmax = 10),
}

In [3]:
urban_thres = 0.10
orog_thres = -9999
tasmin_thres = -9999

In [4]:
rcms = {
  'AFR-22' : [
    'REMO2015',
    'RegCM4-7',
  ],
  'AUS-22' : [
    'REMO2015',
    'RegCM4-7',
  ],
  'CAM-22' : [
    'REMO2015',
    'RegCM4-7',
  ],
  'SAM-22' : [
    'REMO2015',
    'RegCM4-7',
  ],
  'WAS-22' : [
    'REMO2015',
    'RegCM4-7',
  ],
  'EUR-11': [
    'REMO2015',
    'RegCM4-6',
  ],
  'EAS-22': [
#    'RegCM4-0', # No opendap access
    'REMO2015'
  ],
  'SEA-22' : [
    'REMO2015',
    'RegCM4-7',
  ],
  'NAM-22': [
    'RegCM4_v4-4-rc8',
    'REMO2015',
  ]
}

In [5]:
def search_dic(var, dom, rcm):
  return(dict(
    project='CORDEX',
    experiment=['evaluation',],
    rcm_name = rcm,
    domain = dom,
    variable=[var,],
    time_frequency = 'day',
    facets = 'dataset_id'
  ))

In [6]:
def get_local_urls(var, vardic, ires='unused option'):
  dom = vardic['domain']
  rcm = vardic['rcm_name']
  var_ori =  vardic['variable'][0]
  if 'tasmax' in var:
      return(sorted(
        np.sort(glob.glob(f'/lustre/gmeteo/WORK/DATA/CORDEX-FPS-URB-RCC/{var}/{var_ori}_{dom}_*_*-{rcm}_*'))
        ))
  elif 'tasmin' in var:
       return(sorted(
        np.sort(glob.glob(f'/lustre/gmeteo/WORK/DATA/CORDEX-FPS-URB-RCC/{var}/*/*/{var_ori}_{dom}_*_*-{rcm}_*'))
        ))    

In [7]:
def dump_netcdf(var, location, opendap_urls):
  # Crop the area around the city and persist the data on a NetCDF file.
  ds = xr.open_dataset(opendap_urls[0])
  if 'time' in ds.dims: # No fixed variable   
      ds = xr.open_mfdataset(opendap_urls,
        parallel=True, chunks={'time':366},
        combine='nested', concat_dim='time', coords='minimal',
        drop_variables = ['time_bnds']
      )
  if 'x' in ds.dims and 'lon' in ds.dims:
      ds = ds.rename({'x': 'lon','y': 'lat'})# Fix during the interpolation
  if 'jx' in ds.dims and 'lon' in ds.dims:
      ds = ds.rename({'jx': 'lon','iy': 'lat'})# Fix during the interpolation

  # Fix longitudes beyond 180
  if (ds['lon'].values>180).any():
      ds['lon'][:] = np.where(ds['lon'].values > 180, ds['lon'].values-360, ds['lon'].values)
  # Crop area around the city
  if len(np.shape(ds.lon)) == 2:
      dist = (ds['lon']-location[city]['lon'])**2 + (ds['lat']-location[city]['lat'])**2
  elif len(np.shape(ds.lon)) == 1:
      xx, yy = np.meshgrid(ds['lon'], ds['lat'])
      dist = (xx-location[city]['lon'])**2 + (yy-location[city]['lat'])**2
  #[ilat], [ilon] = np.where(dist == np.min(dist))
  posi = np.where(dist == np.min(dist))
  if len(posi[0]) > 1:
      ilat, ilon =   ilat, ilon = [posi[0][0], posi[1][0]]
  else:
      [ilat], [ilon] = posi
  ds_city = ds.isel(**{
    select_name(rlat_names, ds.coords): slice(ilat-dlat,ilat+dlat),
    select_name(rlon_names, ds.coords): slice(ilon-dlon,ilon+dlon)
  }) 
  return ds_city

In [8]:
def select_name(names, avail_names):
  # Select variable/coordinate names among a list of potential names.
  # Potential names are matched against those available in the data set.
  return(list(names.intersection(list(avail_names)))[0])

In [9]:
def plot_index(ds_city_mean_R, ds_city_mean_C, ds_city_mean_N, var, dest, thres, vmin, vmax):
    
    if var in ['tasmin', 'tasmax']:
        cmap = plt.cm.RdBu_r
    elif var == 'orog':
        cmap = plt.cm.terrain
    elif var == 'sftuf':
        cmap = plt.cm.gray_r
    
    lonlat = ccrs.PlateCarree() 
    proj = lonlat                  ####
    fig, axs = plt.subplots(1, 3, figsize = (10, 20), sharex=True, sharey=True,
                            subplot_kw=dict(projection=lonlat))

    if rcm == 'REMO' and var == 'sftuf':
        var = 'urban'
    else: 
        var = var
    vmin = np.min(ds_city_mean_R[var])
    vmax = np.max(ds_city_mean_R[var])
    #Rotated
    ds_city_mean_R[var].where(ds_city_mean_R[var] > urban_thres).plot.pcolormesh(ax=axs[0], 
                    x='lon', y='lat', transform=lonlat, cmap=cmap, 
                    vmin = vmin, vmax = vmax, add_colorbar=False)
    axs[0].coastlines(resolution='10m', linewidth=1, color='gray')
    ucdb_city.plot(ax=axs[0], transform=lonlat, facecolor="none", edgecolor="red")
    (ds_city_mean_N[var]*np.nan).plot.pcolormesh(ax=axs[0], 
                    x='lon', y='lat', transform=lonlat, cmap=cmap, 
                    vmin = vmin, vmax = vmax, add_colorbar=False, edgecolors='k', linewidths=0.5)
    axs[0].set_title('Rotated')

    #Conservarive
    ds_city_mean_C[var].where(ds_city_mean_C[var] > urban_thres).plot.pcolormesh(ax=axs[1], 
                    x='lon', y='lat', transform=lonlat, cmap=cmap, 
                    vmin = vmin, vmax = vmax, add_colorbar=False)
    axs[1].coastlines(resolution='10m', linewidth=1, color='gray')
    axs[1].plot(ds_city_mean_R.lon, ds_city_mean_R.lat, color = 'lightgray', 
                marker='.', linewidth = 0, markersize=2)
    axs[1].set_title('Conservative')
    ucdb_city.plot(ax=axs[1], transform=lonlat, facecolor="none", edgecolor="red")
    #Neartes
    dd = ds_city_mean_N[var].where(ds_city_mean_N[var] > urban_thres).plot.pcolormesh(ax=axs[2], 
                    x='lon', y='lat', transform=lonlat, cmap=cmap, 
                    vmin = vmin, vmax = vmax, add_colorbar=False)
    axs[2].plot(ds_city_mean_R.lon, ds_city_mean_R.lat, color = 'lightgray', 
                marker='.', linewidth = 0, markersize=2)
    axs[2].set_title('Nearest')
    axs[2].coastlines(resolution='10m', linewidth=1, color='gray')
    ucdb_city.plot(ax=axs[2], transform=lonlat, facecolor="none", edgecolor="red")
    
    fig.subplots_adjust(right=0.8)
    cbar_ax = fig.add_axes([0.82, 0.445, 0.02, 0.10])
    fig.colorbar(dd, cax=cbar_ax, label = var)
    
    plt.savefig(dest + city + '_' + rcm + '.pdf', bbox_inches='tight')
    plt.close()

In [10]:
root = '/lustre/gmeteo/WORK/DATA/CORDEX-FPS-URB-RCC/nextcloud/'
ucdb_info = gpd.read_file(root  + 'CORDEX-CORE-WG/GHS_FUA_UCD/GHS_STAT_UCDB2015MT_GLOBE_R2019A_V1_2.gpkg')

## tasmin and tasmax

In [21]:
root = '/lustre/gmeteo/WORK/DATA/CORDEX-FPS-URB-RCC/nextcloud/CORDEX-CORE-WG/'
dest = '/lustre/gmeteo/WORK/diezsj/research/cordex-fps-urb-rcc/results/interp/'
vars = ['tasmax', 'tasmin']

for var in vars:
    dest = '/lustre/gmeteo/WORK/diezsj/research/cordex-fps-urb-rcc/results/interp/'
    dest = dest + var + '/'

    os.makedirs(dest,  exist_ok=True)

    for city in location.keys():
        domain = location[city]['domain']
        dlon = dlat = int(10 / (int(location[city]['domain'].split('-')[1])/11))
        ucdb_city = ucdb_info.query(f'UC_NM_MN =="{city}"').to_crs(crs = 'EPSG:4326')
        if city == 'London':
            ucdb_city = ucdb_city[ucdb_city['CTR_MN_NM'] == 'United Kingdom']
        for rcm in rcms[domain]:
    
            print(dest + city + '_' + rcm + '.png')
    
            # Rotated
            if not os.path.isfile(dest + city + '_' + rcm + '_rotated'  + '.nc'):
                files = get_local_urls(var, search_dic(var, domain, rcm))
                rlat_names = {'rlat', 'y'}; rlon_names = {'rlon', 'x'}
                ds_city = dump_netcdf(var, location, files)
                ds_city[var] = ds_city[var]-273.15
                ds_city_mean_R = ds_city.sel(time = slice('1980-01-01', '2000-01-01')).mean('time').compute()
                ds_city_mean_R.to_netcdf(dest + city + '_' + rcm + '_rotated'  + '.nc')
            else:
                ds_city_mean_R = xr.open_dataset(dest + city + '_' + rcm + '_rotated'  + '.nc')
            
            # Conservative
            if not os.path.isfile(dest + city + '_' + rcm + '_conservative'  + '.nc'):
                files = get_local_urls(var + '_C', search_dic(var, domain, rcm))
                rlat_names = {'lat'}; rlon_names = {'lon'}
                ds_city = dump_netcdf(var, location, files)
                ds_city[var] = ds_city[var]-273.15
                ds_city_mean_C = ds_city.sel(time = slice('1980-01-01', '2000-01-01')).mean('time').compute()
                ds_city_mean_C.to_netcdf(dest + city + '_' + rcm + '_conservative'  + '.nc')
            else:
                ds_city_mean_C = xr.open_dataset(dest + city + '_' + rcm + '_conservative'  + '.nc')
    
            # Nearest
            if not os.path.isfile(dest + city + '_' + rcm + '_nearest'  + '.nc'):
                files = get_local_urls(var + '_N', search_dic(var, domain, rcm))
                rlat_names = {'lat'}; rlon_names = {'lon'}
                ds_city = dump_netcdf(var, location, files)
                ds_city[var] = ds_city[var]-273.15
                ds_city_mean_N = ds_city.sel(time = slice('1980-01-01', '2000-01-01')).mean('time').compute()
                ds_city_mean_N.to_netcdf(dest + city + '_' + rcm + '_nearest'  + '.nc')
            else:
                ds_city_mean_N = xr.open_dataset(dest + city + '_' + rcm + '_nearest'  + '.nc')
    
            plot_index(ds_city_mean_R, ds_city_mean_C, ds_city_mean_N, var, dest, tasmin_thres, None, None)

/lustre/gmeteo/WORK/diezsj/research/cordex-fps-urb-rcc/results/interp/tasmax/Mexico City_REMO2015.png
/lustre/gmeteo/WORK/diezsj/research/cordex-fps-urb-rcc/results/interp/tasmax/Mexico City_RegCM4-7.png
/lustre/gmeteo/WORK/diezsj/research/cordex-fps-urb-rcc/results/interp/tasmax/Buenos Aires_REMO2015.png
/lustre/gmeteo/WORK/diezsj/research/cordex-fps-urb-rcc/results/interp/tasmax/Buenos Aires_RegCM4-7.png
/lustre/gmeteo/WORK/diezsj/research/cordex-fps-urb-rcc/results/interp/tasmax/New York_RegCM4_v4-4-rc8.png
/lustre/gmeteo/WORK/diezsj/research/cordex-fps-urb-rcc/results/interp/tasmax/New York_REMO2015.png
/lustre/gmeteo/WORK/diezsj/research/cordex-fps-urb-rcc/results/interp/tasmax/Sydney_REMO2015.png
/lustre/gmeteo/WORK/diezsj/research/cordex-fps-urb-rcc/results/interp/tasmax/Sydney_RegCM4-7.png
/lustre/gmeteo/WORK/diezsj/research/cordex-fps-urb-rcc/results/interp/tasmax/Beijing_REMO2015.png
/lustre/gmeteo/WORK/diezsj/research/cordex-fps-urb-rcc/results/interp/tasmax/Tokyo_REMO2015.p

In [28]:
for f in files:
    ds = xr.open_dataset(f)
    if not 'height' in ds.variables:
        print(f)
        sys.exit()

/lustre/gmeteo/WORK/DATA/CORDEX-FPS-URB-RCC/tasmin_C/download/v20230811/tasmin_NAM-22_ECMWF-ERAINT_evaluation_r1i1p1_ISU-RegCM4_v4-4-rc8_day_20080101-20081231.nc


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


'/lustre/gmeteo/WORK/DATA/CORDEX-FPS-URB-RCC/tasmin_C/download/v20230811/tasmin_NAM-22_ECMWF-ERAINT_evaluation_r1i1p1_ISU-RegCM4_v4-4-rc8_day_20080101-20081231.nc'

## Orography

In [22]:
root = '/lustre/gmeteo/WORK/DATA/CORDEX-FPS-URB-RCC/nextcloud/CORDEX-CORE-WG/'
dest = '/home/javi/Research/cordex-fps-urb-rcc/git/CORDEX-CORE-WG/pictures/interp/orog/'
dest = '/lustre/gmeteo/WORK/diezsj/research/cordex-fps-urb-rcc/results/interp/orog/'
var = 'orog'

In [23]:
for city in location.keys():
    domain = location[city]['domain']
    dlon = dlat = int(10 / (int(location[city]['domain'].split('-')[1])/11))
    ucdb_city = ucdb_info.query(f'UC_NM_MN =="{city}"').to_crs(crs = 'EPSG:4326')
    if city == 'London':
        ucdb_city = ucdb_city[ucdb_city['CTR_MN_NM'] == 'United Kingdom']

    os.makedirs(dest,  exist_ok=True)

    for rcm in ['REMO', 'RegCM']:
        # Rotated
        files = glob.glob(f'{root}{rcm}/orography/orog_{domain}*')
        rlat_names = {'rlat', 'y'}; rlon_names = {'rlon', 'x'}
        ds_city_R = dump_netcdf(var, location, files)
        if 'time' in ds_city_R.dims:
            ds_city_mean_R = ds_city_R.mean('time').compute()
        else:
            ds_city_mean_R = ds_city_R.copy()

        # Conservative
        files = glob.glob(f'{root}{rcm}/orography_C/orog_{domain}*')
        rlat_names = {'lat'}; rlon_names = {'lon'}
        ds_city_C = dump_netcdf(var, location, files)
        if 'time' in ds_city_R.dims:
            ds_city_mean_C = ds_city_C.mean('time').compute()
        else:
            ds_city_mean_C = ds_city_C.copy()

        # Nearest 
        files = glob.glob(f'{root}{rcm}/orography_N/orog_{domain}*')
        rlat_names = {'lat'}; rlon_names = {'lon'}
        ds_city_N = dump_netcdf(var, location, files)
        if 'time' in ds_city_R.dims:
            ds_city_mean_N = ds_city_N.mean('time').compute()
        else:
            ds_city_mean_N = ds_city_N.copy()

        plot_index(ds_city_mean_R, ds_city_mean_C, ds_city_mean_N, var, dest, orog_thres, 0, 3000)

## Urban Fraction

In [11]:
root = '/lustre/gmeteo/WORK/DATA/CORDEX-FPS-URB-RCC/nextcloud/CORDEX-CORE-WG/'
dest = '/home/javi/Research/cordex-fps-urb-rcc/git/CORDEX-CORE-WG/pictures/interp/sftuf/'
dest = '/lustre/gmeteo/WORK/diezsj/research/cordex-fps-urb-rcc/results/interp/sftuf/'

var = {'REMO': 'urban',
       'RegCM': 'sftuf'}

In [12]:
for city in location.keys():
    domain = location[city]['domain']
    dlon = dlat = int(10 / (int(location[city]['domain'].split('-')[1])/11))
    ucdb_city = ucdb_info.query(f'UC_NM_MN =="{city}"').to_crs(crs = 'EPSG:4326')
    if city == 'London':
        ucdb_city = ucdb_city[ucdb_city['CTR_MN_NM'] == 'United Kingdom']

    os.makedirs(dest,  exist_ok=True)
    
    for rcm in ['REMO', 'RegCM']:
        
        print(city)
        # Rotated
        if rcm == 'REMO':
            files = glob.glob(f'{root}{rcm}/urbanfraction/orig_v3/{domain}*')
        else:
            files = glob.glob(f'{root}{rcm}/urbanfraction/{domain}*')
        rlat_names = {'rlat', 'y', 'iy'}; rlon_names = {'rlon', 'x', 'jx'}
        if 'RegCM' in files[0] and 'NAM-22' in files[0]:
            continue
        ds_city_R = dump_netcdf(var[rcm], location, files)
        if 'time' in ds_city_R.dims:
            ds_city_mean_R = ds_city_R.mean('time').compute()
        else:
            ds_city_mean_R = ds_city_R.copy()

        # Conservative
        if rcm == 'REMO':
            files = glob.glob(f'{root}{rcm}/urbanfraction_C/orig_v3/{domain}*')
        else:
            files = glob.glob(f'{root}{rcm}/urbanfraction_C/{domain}*')
        rlat_names = {'lat'}; rlon_names = {'lon'}
        ds_city_C = dump_netcdf(var[rcm], location, files)
        if 'time' in ds_city_C.dims:
            ds_city_mean_C = ds_city_C.mean('time').compute()
        else:
            ds_city_mean_C = ds_city_C.copy()

        # Nearest 
        if rcm == 'REMO':
            files = glob.glob(f'{root}{rcm}/urbanfraction_N/orig_v3/{domain}*')
        else:
            files = glob.glob(f'{root}{rcm}/urbanfraction_N/{domain}*')
        rlat_names = {'lat'}; rlon_names = {'lon'}
        ds_city_N = dump_netcdf(var[rcm], location, files)
        if 'time' in ds_city_R.dims:
            ds_city_mean_N = ds_city_N.mean('time').compute()
        else:
            ds_city_mean_N = ds_city_N.copy()

        plot_index(ds_city_mean_R, ds_city_mean_C, ds_city_mean_N, 'sftuf', dest, urban_thres, 0, 0.5)

Mexico City
Mexico City
Buenos Aires
Buenos Aires
New York
New York
Sydney
Sydney
Beijing
Beijing
Tokyo
Tokyo
Jakarta
Jakarta
Johannesburg
Johannesburg
Riyadh
Riyadh
Berlin
Berlin
Paris
Paris
London
London
Madrid
Madrid
Los Angeles
Los Angeles
Montreal
Montreal
