In [5]:
### automatically refresh the buffer

%load_ext autoreload
%autoreload 2

### solve the auto-complete issue

%config Completer.use_jedi = False

%matplotlib inline

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter(action='ignore', category=FutureWarning)

### lvl 2 setups (systerm)

import os
import numpy as np
import pandas as pd
import xarray as xr

import matplotlib as mpl
import cartopy.crs as ccrs
import cartopy.feature as cfeature

import cartopy.feature as cfeature
from cartopy.mpl.ticker import LongitudeFormatter, LatitudeFormatter
import warnings
warnings.filterwarnings('ignore')
from pylab import *
from matplotlib.colors import ListedColormap,LinearSegmentedColormap
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
from matplotlib.patches import Wedge, Circle
import geopandas as gpd
from shapely.geometry import Point
from datetime import datetime
import glob

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
def generate_the_99_quantile(prec_filtered,latt,lonn):

    arr_the = np.nanquantile(prec_filtered, 0.99, axis=0)
    ds_the = xr.Dataset({'p': ([ 'lat', 'lon'], arr_the)},
                    coords={'lon': (['lon'], lonn),
                            'lat': (['lat'], latt),})
    return ds_the


In [22]:
def create_max_datasets(arr_ep, latt, lonn):

    arr_freq = arr_ep.reshape(43, 90, 24, arr_ep.shape[1], arr_ep.shape[2])
    arr_maxp = np.nanmax(arr_freq, axis=(1, 2))
    arr_maxh = np.nanmax(arr_freq, axis=1)
    arr_maxh = np.nan_to_num(arr_maxh, nan=-1)
    arr_maxh_indices = np.nanargmax(arr_maxh, axis=1)
    arr_maxh_indices = arr_maxh_indices.astype(float)
    nan_mask = np.isnan(arr_maxp)
    arr_maxh_indices[nan_mask] = np.nan
    
    ds_maxp = xr.Dataset(
        {'p': (['time', 'lat', 'lon'], arr_maxp)},
        coords={
            'lon': (['lon'], lonn),
            'lat': (['lat'], latt),
            'time': ('time', np.arange(1979, 2022))
        }
    )

    ds_maxh = xr.Dataset(
        {'h': (['time', 'lat', 'lon'], arr_maxh_indices)},
        coords={
            'time': (['time'], np.arange(1979, 2022)),
            'lat': (['lat'], latt),
            'lon': (['lon'], lonn)
        }
    )
    
    
    arr_h = np.nanmean(~np.isnan(arr_freq), axis=1)*92

    ds_hour_count = xr.Dataset(
        {'c': (['year', 'hour', 'lat', 'lon'], arr_h)},
        coords={
            'year': (['year'], np.arange(1979, 2022)),
            'hour': (['hour'], np.arange(0, 24)),
            'lat': (['lat'], latt),
            'lon': (['lon'], lonn)
        }
    )

    return ds_maxp, ds_maxh, ds_hour_count

In [23]:
def create_season_mean_dataset(arr_ep, latt, lonn):

    arr_freq = arr_ep.reshape(43, 90, 24, arr_ep.shape[1], arr_ep.shape[2])
    arr_season = np.nanmean(arr_freq, axis=1)
    
    ds_season_mean = xr.Dataset(
        {'p': (['year', 'hour', 'lat', 'lon'], arr_season)},
        coords={
            'year': (['year'], np.arange(1979, 2022)),
            'hour': (['hour'], np.arange(0, 24)),
            'lat': (['lat'], latt),
            'lon': (['lon'], lonn)
        }
    )

    return ds_season_mean

In [24]:
base_path = '/N/project/Zli_lab/gongg/CONUS404_data/LST/UTC/'
file_pattern = 'PREC_ACC_NC.wrf2d_d01_????-??-??.nc'

folder_names = [
  
    'U-80', 'U-81', 'U-82', 'U-83', 'U-84', 'U-85', 'U-86', 
    'U-87', 'U-88'
]


for folder in folder_names:
    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
    full_path = os.path.join(base_path, folder, file_pattern)
    all_files = glob.glob(full_path)
    #####
    summer_files = [f for f in all_files if '-12-' in f or '-01-' in f or '-02-' in f or '-03-' in f]
    ds_summer = xr.open_mfdataset(summer_files)
    ds_jja = ds_summer.sel(time=ds_summer['time'].dt.month.isin([12, 1, 2]))
    mask = ds_jja.time.dt.month != 2 | (ds_jja.time.dt.day != 29)
    ds_jja = ds_jja.where(mask, drop=True)
    lonn = ds_jja.lon.values
    latt = ds_jja.lat.values
    prec = ds_jja.p.values
    prec_filtered = np.where(prec >= 0.1, prec, np.nan)
    arr_the = np.nanpercentile(prec_filtered, 99, axis=0)
    arr_ep = np.copy(prec_filtered)
    arr_ep = np.where(prec_filtered >= arr_the, prec_filtered, np.nan)
    ds_the = generate_the_99_quantile(arr_ep, latt, lonn)
    ds_maxp, ds_maxh, ds_hour_count = create_max_datasets(arr_ep, latt, lonn)
    ds_season_mean = create_season_mean_dataset(arr_ep, latt, lonn)
    output_folder = '/N/project/Zli_lab/gongg/CONUS404_data/LST/DJF/'
    ds_the.to_netcdf(output_folder+'prec_the_'+folder+'.nc')
    ds_maxp.to_netcdf(output_folder+'prec_maxp_'+folder+'.nc')
    ds_maxh.to_netcdf(output_folder+'prec_maxh_'+folder+'.nc')
    ds_hour_count.to_netcdf(output_folder+'hour_count_'+folder+'.nc')
    ds_season_mean.to_netcdf(output_folder+'season_mean_'+folder+'.nc')

2024-10-14 01:30:41
2024-10-14 01:32:57
2024-10-14 01:39:31
2024-10-14 01:45:14
2024-10-14 01:51:17
2024-10-14 01:57:07
2024-10-14 02:02:49
2024-10-14 02:08:16
2024-10-14 02:13:52


In [29]:
base_path = '/N/project/Zli_lab/gongg/CONUS404_data/LST/UTC/'
file_pattern = 'PREC_ACC_NC.wrf2d_d01_????-??-??.nc'

folder_names = [
 'U-68',
    'U-70', 'U-71', 'U-72', 'U-73', 'U-74', 'U-75', 'U-76', 'U-77', 'U-78'
]


for folder in folder_names:
    print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime()))
    full_path = os.path.join(base_path, folder, file_pattern)
    all_files = glob.glob(full_path)
    #####
    summer_files = [f for f in all_files if '-12-' in f or '-01-' in f or '-02-' in f or '-03-' in f]
    ds_summer = xr.open_mfdataset(summer_files)
    ds_jja = ds_summer.sel(time=ds_summer['time'].dt.month.isin([12, 1, 2]))
    mask = ds_jja.time.dt.month != 2 | (ds_jja.time.dt.day != 29)
    ds_jja = ds_jja.where(mask, drop=True)
    lonn = ds_jja.lon.values
    latt = ds_jja.lat.values
    prec = ds_jja.p.values
    prec_filtered = np.where(prec >= 0.1, prec, np.nan)
    arr_the = np.nanpercentile(prec_filtered, 99, axis=0)
    arr_ep = np.copy(prec_filtered)
    arr_ep = np.where(prec_filtered >= arr_the, prec_filtered, np.nan)
    ds_the = generate_the_99_quantile(arr_ep, latt, lonn)
    ds_maxp, ds_maxh, ds_hour_count = create_max_datasets(arr_ep, latt, lonn)
    ds_season_mean = create_season_mean_dataset(arr_ep, latt, lonn)
    output_folder = '/N/project/Zli_lab/gongg/CONUS404_data/LST/DJF/'
    ds_the.to_netcdf(output_folder+'prec_the_'+folder+'.nc')
    ds_maxp.to_netcdf(output_folder+'prec_maxp_'+folder+'.nc')
    ds_maxh.to_netcdf(output_folder+'prec_maxh_'+folder+'.nc')
    ds_hour_count.to_netcdf(output_folder+'hour_count_'+folder+'.nc')
    ds_season_mean.to_netcdf(output_folder+'season_mean_'+folder+'.nc')

2024-10-14 07:26:32
2024-10-14 07:32:28
2024-10-14 07:38:19
2024-10-14 07:43:53
2024-10-14 07:49:28
2024-10-14 07:55:11
2024-10-14 08:00:42
2024-10-14 08:06:19
2024-10-14 08:11:50
2024-10-14 08:17:15


In [27]:
all_files

[]