In [4]:
### automatically refresh the buffer

%load_ext autoreload
%autoreload 2

### solve the auto-complete issue

%config Completer.use_jedi = False

%matplotlib inline

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter(action='ignore', category=FutureWarning)

### lvl 2 setups (systerm)

import os
import numpy as np
import pandas as pd
import xarray as xr

import matplotlib as mpl
import cartopy.crs as ccrs
import cartopy.feature as cfeature

import cartopy.feature as cfeature
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import warnings
warnings.filterwarnings('ignore')
from pylab import *
from matplotlib.colors import ListedColormap,LinearSegmentedColormap
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from matplotlib.patches import Wedge, Circle
import geopandas as gpd
from shapely.geometry import Point
from datetime import datetime
import glob

## PREC dataset output

In [2]:
def generate_the_99_quantile(prec_filtered,latt,lonn):

    arr_the = np.nanquantile(prec_filtered, 0.99, axis=0)
    ds_the = xr.Dataset({'p': ([ 'lat', 'lon'], arr_the)},
                    coords={'lon': (['lon'], lonn),
                            'lat': (['lat'], latt),})
    return ds_the


In [3]:
def create_max_datasets(arr_ep, latt, lonn):

    arr_freq = arr_ep.reshape(43, 92, 24, arr_ep.shape[1], arr_ep.shape[2])
    arr_maxp = np.nanmax(arr_freq, axis=(1, 2))
    arr_maxh = np.nanmax(arr_freq, axis=1)
    arr_maxh = np.nan_to_num(arr_maxh, nan=-1)
    arr_maxh_indices = np.nanargmax(arr_maxh, axis=1)
    arr_maxh_indices = arr_maxh_indices.astype(float)
    nan_mask = np.isnan(arr_maxp)
    arr_maxh_indices[nan_mask] = np.nan
    
    ds_maxp = xr.Dataset(
        {'p': (['time', 'lat', 'lon'], arr_maxp)},
        coords={
            'lon': (['lon'], lonn),
            'lat': (['lat'], latt),
            'time': ('time', np.arange(1980, 2023))
        }
    )

    ds_maxh = xr.Dataset(
        {'h': (['time', 'lat', 'lon'], arr_maxh_indices)},
        coords={
            'time': (['time'], np.arange(1980, 2023)),
            'lat': (['lat'], latt),
            'lon': (['lon'], lonn)
        }
    )
    
    
    arr_h = np.nanmean(~np.isnan(arr_freq), axis=1)*92

    ds_hour_count = xr.Dataset(
        {'c': (['year', 'hour', 'lat', 'lon'], arr_h)},
        coords={
            'year': (['year'], np.arange(1980, 2023)),
            'hour': (['hour'], np.arange(0, 24)),
            'lat': (['lat'], latt),
            'lon': (['lon'], lonn)
        }
    )

    return ds_maxp, ds_maxh, ds_hour_count

In [4]:
def create_season_mean_dataset(arr_ep, latt, lonn):

    arr_freq = arr_ep.reshape(43, 92, 24, arr_ep.shape[1], arr_ep.shape[2])
    arr_season = np.nanmean(arr_freq, axis=1)
    
    ds_season_mean = xr.Dataset(
        {'p': (['year', 'hour', 'lat', 'lon'], arr_season)},
        coords={
            'year': (['year'], np.arange(1980, 2023)),
            'hour': (['hour'], np.arange(0, 24)),
            'lat': (['lat'], latt),
            'lon': (['lon'], lonn)
        }
    )

    return ds_season_mean

In [79]:
base_path = '/N/project/Zli_lab/gongg/CONUS404_data/LST/UTC/'
file_pattern = 'PREC_ACC_NC.wrf2d_d01_????-??-??.nc'

folder_names = [
  
    'U-80', 'U-81', 'U-82', 'U-83', 'U-84', 'U-85', 'U-86', 
    'U-87', 'U-88'
]


for folder in folder_names:
    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
    full_path = os.path.join(base_path, folder, file_pattern)
    all_files = glob.glob(full_path)
    #####
    summer_files = [f for f in all_files if '-06-' in f or '-07-' in f or '-08-' in f or '-09-' in f]
    ds_summer = xr.open_mfdataset(summer_files)
    ds_jja = ds_summer.sel(time=ds_summer['time'].dt.month.isin([6, 7, 8]))
    lonn = ds_jja.lon.values
    latt = ds_jja.lat.values
    prec = ds_jja.p.values
    prec_filtered = np.where(prec >= 0.1, prec, np.nan)
    arr_the = np.nanpercentile(prec_filtered, 99, axis=0)
    arr_ep = np.copy(prec_filtered)
    arr_ep = np.where(prec_filtered >= arr_the, prec_filtered, np.nan)
    ds_the = generate_the_99_quantile(arr_ep, latt, lonn)
    ds_maxp, ds_maxh, ds_hour_count = create_max_datasets(arr_ep, latt, lonn)
    ds_season_mean = create_season_mean_dataset(arr_ep, latt, lonn)
    output_folder = '/N/project/Zli_lab/gongg/CONUS404_data/LST/JJA/'
    ds_the.to_netcdf(output_folder+'prec_the_'+folder+'.nc')
    ds_maxp.to_netcdf(output_folder+'prec_maxp_'+folder+'.nc')
    ds_maxh.to_netcdf(output_folder+'prec_maxh_'+folder+'.nc')
    ds_hour_count.to_netcdf(output_folder+'hour_count_'+folder+'.nc')
    ds_season_mean.to_netcdf(output_folder+'season_mean_'+folder+'.nc')

2024-10-07 10:11:52
2024-10-07 10:19:08
2024-10-07 10:25:55
2024-10-07 10:32:10
2024-10-07 10:38:26
2024-10-07 10:44:29
2024-10-07 10:50:45
2024-10-07 10:56:36
2024-10-07 11:03:07


## nighttime Temp dataset output

In [7]:
def create_temp_dataset(arr_t, latt, lonn):

    arr_ntt = arr_t.reshape(43, 92, 24, arr_t.shape[1], arr_t.shape[2])
    selected_data = np.concatenate((arr_ntt[:, :, 0:6, :, :], arr_ntt[:, :, 18:24, :, :]), axis=2)

    arr_tmin = np.nanmean(np.nanmin(selected_data, axis=2),axis=1)
    arr_tmean = np.nanmean(np.nanmean(selected_data, axis=2),axis=1)
    
    ds_tmin = xr.Dataset(
        {'t': (['year',  'lat', 'lon'], arr_tmin)},
        coords={
            'year': (['year'], np.arange(1980, 2023)),
            'lat': (['lat'], latt),
            'lon': (['lon'], lonn)
        }
    )
    
    ds_tmean = xr.Dataset(
        {'t': (['year',  'lat', 'lon'], arr_tmean)},
        coords={
            'year': (['year'], np.arange(1980, 2023)),
            'lat': (['lat'], latt),
            'lon': (['lon'], lonn)
        }
    )

    return ds_tmin,ds_tmean



def create_dtemp_dataset(arr_t, latt, lonn):

    arr_ntt = arr_t.reshape(43, 92, 24, arr_t.shape[1], arr_t.shape[2])
    selected_data = np.concatenate((arr_ntt[:, :, 0:6, :, :], arr_ntt[:, :, 18:24, :, :]), axis=2)

    arr_tmin = np.nanmean(np.nanmin(selected_data, axis=2),axis=1)
    arr_tmean = np.nanmean(np.nanmean(selected_data, axis=2),axis=1)
    
    ds_dtmin = xr.Dataset(
        {'dt': (['year',  'lat', 'lon'], arr_tmin)},
        coords={
            'year': (['year'], np.arange(1980, 2023)),
            'lat': (['lat'], latt),
            'lon': (['lon'], lonn)
        }
    )
    
    ds_dtmean = xr.Dataset(
        {'dt': (['year',  'lat', 'lon'], arr_tmean)},
        coords={
            'year': (['year'], np.arange(1980, 2023)),
            'lat': (['lat'], latt),
            'lon': (['lon'], lonn)
        }
    )

    return ds_dtmin,ds_dtmean

In [8]:
base_path = '/N/project/Zli_lab/gongg/CONUS404_data/LST/UTC/'
file_pattern = 'T2.wrf2d_d01_????-??-??.nc'

folder_names = [
    'U-50', 'U-51', 'U-52', 'U-53', 'U-54', 'U-55', 'U-56', 'U-57', 'U-58',

]


for folder in folder_names:
    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
    full_path = os.path.join(base_path, folder, file_pattern)
    all_files = glob.glob(full_path)
    #####
    summer_files = [f for f in all_files if '-06-' in f or '-07-' in f or '-08-' in f or '-09-' in f]
    ds_summer = xr.open_mfdataset(summer_files)
    ds_jja = ds_summer.sel(time=ds_summer['time'].dt.month.isin([6, 7, 8]))
    lonn = ds_jja.lon.values
    latt = ds_jja.lat.values
    arr_t = ds_jja.t2.values

    ds_tmin,ds_tmean = create_temp_dataset(arr_t, latt, lonn)

    output_folder = '/N/project/Zli_lab/gongg/CONUS404_data/LST/JJA/'
    ds_tmin.to_netcdf(output_folder+'temp_min_'+folder+'.nc')
    ds_tmean.to_netcdf(output_folder+'temp_mean_'+folder+'.nc')

2024-11-03 21:14:04
2024-11-03 21:18:56
2024-11-03 21:24:07
2024-11-03 21:29:20
2024-11-03 21:34:57
2024-11-03 21:39:56
2024-11-03 21:45:14
2024-11-03 21:50:38
2024-11-03 21:56:19


In [13]:
base_path = '/N/project/Zli_lab/gongg/CONUS404_data/LST/UTC/'
file_pattern = 'TD2.wrf2d_d01_????-??-??.nc'

folder_names = [
    'U-50', 'U-51', 'U-52', 'U-53', 'U-54', 'U-55', 'U-56', 'U-57', 'U-58',
    'U-60', 'U-61', 'U-62', 'U-63', 'U-64', 'U-65', 'U-66', 'U-67', 'U-68',
    'U-70', 'U-71', 'U-72', 'U-73', 'U-74', 'U-75', 'U-76', 'U-77', 'U-78',
    'U-80', 'U-81', 'U-82', 'U-83', 'U-84', 'U-85', 'U-86', 'U-87', 'U-88',
]


for folder in folder_names:
    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
    full_path = os.path.join(base_path, folder, file_pattern)
    all_files = glob.glob(full_path)
    #####
    summer_files = [f for f in all_files if '-06-' in f or '-07-' in f or '-08-' in f or '-09-' in f]
    ds_summer = xr.open_mfdataset(summer_files)
    ds_jja = ds_summer.sel(time=ds_summer['time'].dt.month.isin([6, 7, 8]))
    lonn = ds_jja.lon.values
    latt = ds_jja.lat.values
    arr_t = ds_jja.td2.values

    ds_dtmin,ds_dtmean = create_dtemp_dataset(arr_t, latt, lonn)

    output_folder = '/N/project/Zli_lab/gongg/CONUS404_data/LST/JJA/'
    ds_dtmin.to_netcdf(output_folder+'dtemp_min_'+folder+'.nc')
    ds_dtmean.to_netcdf(output_folder+'dtemp_mean_'+folder+'.nc')

2024-11-03 15:47:42
2024-11-03 15:53:06
2024-11-03 16:00:17
2024-11-03 16:06:08
2024-11-03 16:11:56
2024-11-03 16:17:39
2024-11-03 16:24:13
2024-11-03 16:30:08
2024-11-03 16:36:07
2024-11-03 16:41:49
2024-11-03 16:47:39
2024-11-03 16:53:20
2024-11-03 16:59:03
2024-11-03 17:04:42
2024-11-03 17:10:27
2024-11-03 17:16:02
2024-11-03 17:21:45
2024-11-03 17:27:36
2024-11-03 17:33:21
2024-11-03 17:38:53
2024-11-03 17:44:17
2024-11-03 17:49:34
2024-11-03 17:55:01
2024-11-03 18:00:22
2024-11-03 18:05:38
2024-11-03 18:10:50
2024-11-03 18:16:02
2024-11-03 18:21:19
2024-11-03 18:26:27
2024-11-03 18:31:22
2024-11-03 18:36:26
2024-11-03 18:41:27
2024-11-03 18:46:11
2024-11-03 18:51:06
2024-11-03 18:55:49
2024-11-03 19:00:38


## daytime temp

In [3]:
def create_temp_dataset(arr_t, latt, lonn):

    arr_ntt = arr_t.reshape(43, 92, 24, arr_t.shape[1], arr_t.shape[2])
    selected_data = np.concatenate((arr_ntt[:, :, 6:12, :, :], arr_ntt[:, :, 12:18, :, :]), axis=2)

    arr_tmin = np.nanmean(np.nanmin(selected_data, axis=2),axis=1)
    arr_tmean = np.nanmean(np.nanmean(selected_data, axis=2),axis=1)
    
    ds_tmin = xr.Dataset(
        {'t': (['year',  'lat', 'lon'], arr_tmin)},
        coords={
            'year': (['year'], np.arange(1980, 2023)),
            'lat': (['lat'], latt),
            'lon': (['lon'], lonn)
        }
    )
    
    ds_tmean = xr.Dataset(
        {'t': (['year',  'lat', 'lon'], arr_tmean)},
        coords={
            'year': (['year'], np.arange(1980, 2023)),
            'lat': (['lat'], latt),
            'lon': (['lon'], lonn)
        }
    )

    return ds_tmin,ds_tmean



def create_dtemp_dataset(arr_t, latt, lonn):

    arr_ntt = arr_t.reshape(43, 92, 24, arr_t.shape[1], arr_t.shape[2])
    selected_data = np.concatenate((arr_ntt[:, :, 6:12, :, :], arr_ntt[:, :, 12:18, :, :]), axis=2)

    arr_tmin = np.nanmean(np.nanmin(selected_data, axis=2),axis=1)
    arr_tmean = np.nanmean(np.nanmean(selected_data, axis=2),axis=1)
    
    ds_dtmin = xr.Dataset(
        {'dt': (['year',  'lat', 'lon'], arr_tmin)},
        coords={
            'year': (['year'], np.arange(1980, 2023)),
            'lat': (['lat'], latt),
            'lon': (['lon'], lonn)
        }
    )
    
    ds_dtmean = xr.Dataset(
        {'dt': (['year',  'lat', 'lon'], arr_tmean)},
        coords={
            'year': (['year'], np.arange(1980, 2023)),
            'lat': (['lat'], latt),
            'lon': (['lon'], lonn)
        }
    )

    return ds_dtmin,ds_dtmean

In [5]:
base_path = '/N/project/Zli_lab/gongg/CONUS404_data/LST/UTC/'
file_pattern = 'T2.wrf2d_d01_????-??-??.nc'

folder_names = [
    'U-50', 'U-51', 'U-52', 'U-53', 'U-54', 'U-55', 'U-56', 'U-57', 'U-58',
    'U-60', 'U-61', 'U-62', 'U-63', 'U-64', 'U-65', 'U-66', 'U-67', 'U-68',
    'U-70', 'U-71', 'U-72', 'U-73', 'U-74', 'U-75', 'U-76', 'U-77', 'U-78',
    'U-80', 'U-81', 'U-82', 'U-83', 'U-84', 'U-85', 'U-86', 'U-87', 'U-88',

]


for folder in folder_names:
    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
    full_path = os.path.join(base_path, folder, file_pattern)
    all_files = glob.glob(full_path)
    #####
    summer_files = [f for f in all_files if '-06-' in f or '-07-' in f or '-08-' in f or '-09-' in f]
    ds_summer = xr.open_mfdataset(summer_files)
    ds_jja = ds_summer.sel(time=ds_summer['time'].dt.month.isin([6, 7, 8]))
    lonn = ds_jja.lon.values
    latt = ds_jja.lat.values
    arr_t = ds_jja.t2.values

    ds_tmin,ds_tmean = create_temp_dataset(arr_t, latt, lonn)

    output_folder = '/N/project/Zli_lab/gongg/CONUS404_data/LST/JJA/'
    ds_tmin.to_netcdf(output_folder+'daytime_temp_min_'+folder+'.nc')
    ds_tmean.to_netcdf(output_folder+'daytime_temp_mean_'+folder+'.nc')

2024-11-04 21:53:30
2024-11-04 22:00:25
2024-11-04 22:05:59
2024-11-04 22:11:13
2024-11-04 22:16:37
2024-11-04 22:21:57
2024-11-04 22:27:50
2024-11-04 22:33:24
2024-11-04 22:38:55
2024-11-04 22:44:17
2024-11-04 22:49:41
2024-11-04 22:55:06
2024-11-04 23:00:28
2024-11-04 23:05:47
2024-11-04 23:11:12
2024-11-04 23:16:34
2024-11-04 23:21:49
2024-11-04 23:27:25
2024-11-04 23:32:52
2024-11-04 23:38:08
2024-11-04 23:43:23
2024-11-04 23:48:29
2024-11-04 23:53:36
2024-11-04 23:58:47
2024-11-05 00:04:07
2024-11-05 00:09:17
2024-11-05 00:14:34
2024-11-05 00:19:48
2024-11-05 00:24:45
2024-11-05 00:29:44
2024-11-05 00:34:18
2024-11-05 00:39:02
2024-11-05 00:43:44
2024-11-05 00:48:27
2024-11-05 00:53:00
2024-11-05 00:57:43
