# Notebook for resampling WRF data

### Import necessary libraries

In [1]:
import xarray as xr
import numpy as np
import dask
import dask.dataframe as dd
import netCDF4 as nc
from netCDF4 import Dataset
import pandas as pd

## Import WRF HIST dataset- 15 years (1990-2005) of hourly data (00Z-00Z) for 365(366, leap year) Julian calendar days 

In [2]:
hist_hail = xr.open_mfdataset('/home/scratch/ahaberlie/AFWA_HAIL/HIST/*/HAIL_MAX2D_historical_*.nc') #Import entire WRF HIST dataset, hourly data 1990-2005

In [3]:
hist_hail #Print out the dataset

Unnamed: 0,Array,Chunk
Bytes,613.81 GiB,115.15 MiB
Shape,"(131007, 899, 1399)","(24, 899, 1399)"
Count,16416 Tasks,5472 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 613.81 GiB 115.15 MiB Shape (131007, 899, 1399) (24, 899, 1399) Count 16416 Tasks 5472 Chunks Type float32 numpy.ndarray",1399  899  131007,

Unnamed: 0,Array,Chunk
Bytes,613.81 GiB,115.15 MiB
Shape,"(131007, 899, 1399)","(24, 899, 1399)"
Count,16416 Tasks,5472 Chunks
Type,float32,numpy.ndarray


### Convert values of HAIL_MAX2D to units of inches (given in m)

In [4]:
hail = hist_hail.HAIL_MAX2D * 39.3701 #Convert to in

## Resample by year- returns 16 years

In [5]:
yearly_max_resample = hail.groupby('Time.year').max(dim = 'Time') #Use xarray's groupby function to group time dimension by yearly max (can change to min, mean, 
#median, and stdev)

In [6]:
yearly_max_resample #Make sure resample returns 16 years

Unnamed: 0,Array,Chunk
Bytes,76.76 MiB,4.80 MiB
Shape,"(16, 899, 1399)","(1, 899, 1399)"
Count,34709 Tasks,16 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 76.76 MiB 4.80 MiB Shape (16, 899, 1399) (1, 899, 1399) Count 34709 Tasks 16 Chunks Type float32 numpy.ndarray",1399  899  16,

Unnamed: 0,Array,Chunk
Bytes,76.76 MiB,4.80 MiB
Shape,"(16, 899, 1399)","(1, 899, 1399)"
Count,34709 Tasks,16 Chunks
Type,float32,numpy.ndarray


In [7]:
yearly_max_resample.to_netcdf('/home/scratch/jgoodin/yearly_resamples/WRF_yearly_max_resample.nc') #Save resampled file to netcdf

## Resample by season (DJF, MAM, JJA, SON)- returns 4 seasons

In [5]:
seasonal_median_resample = hail.groupby('Time.season').median(dim = 'Time') #Resample to seasonal max (can change to min, mean, median, and stdev)

In [6]:
seasonal_median_resample #Make sure 4 seasons are returned

Unnamed: 0,Array,Chunk
Bytes,19.19 MiB,3.81 kiB
Shape,"(4, 899, 1399)","(1, 25, 39)"
Count,366412 Tasks,10080 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 19.19 MiB 3.81 kiB Shape (4, 899, 1399) (1, 25, 39) Count 366412 Tasks 10080 Chunks Type float32 numpy.ndarray",1399  899  4,

Unnamed: 0,Array,Chunk
Bytes,19.19 MiB,3.81 kiB
Shape,"(4, 899, 1399)","(1, 25, 39)"
Count,366412 Tasks,10080 Chunks
Type,float32,numpy.ndarray


In [9]:
seasonal_median_resample.to_netcdf('/home/scratch/jgoodin/seasonal_resamples/WRF_seasonal_max_resample.nc') #Save resampled file to netcdf

## Resample by month- returns 12 months

In [7]:
monthly_median_resample = hail.groupby('Time.month').median(dim = 'Time') #Resample to monthly max (can change to min, mean, median, and stdev)

In [8]:
monthly_median_resample #Make sure 12 months are returned

Unnamed: 0,Array,Chunk
Bytes,57.57 MiB,11.50 kiB
Shape,"(12, 899, 1399)","(1, 31, 95)"
Count,297037 Tasks,19140 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 57.57 MiB 11.50 kiB Shape (12, 899, 1399) (1, 31, 95) Count 297037 Tasks 19140 Chunks Type float32 numpy.ndarray",1399  899  12,

Unnamed: 0,Array,Chunk
Bytes,57.57 MiB,11.50 kiB
Shape,"(12, 899, 1399)","(1, 31, 95)"
Count,297037 Tasks,19140 Chunks
Type,float32,numpy.ndarray


In [8]:
monthly_max_resample.to_netcdf('/home/scratch/jgoodin/monthly_resamples/WRF_monthly_max_resample.nc') #Save resampled file to netcdf

## Resample by day- returns 365 (366, leap year) Julian calendar days (00Z - 00Z)

In [26]:
daily_max_resample = hail.groupby('Time.dayofyear').max(dim = 'Time') #Resample to daily max (can change to min, mean, median, and stdev)

In [27]:
daily_max_resample #Make sure 366 days are returned

Unnamed: 0,Array,Chunk
Bytes,1.71 GiB,4.80 MiB
Shape,"(366, 899, 1399)","(1, 899, 1399)"
Count,35023 Tasks,366 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 1.71 GiB 4.80 MiB Shape (366, 899, 1399) (1, 899, 1399) Count 35023 Tasks 366 Chunks Type float32 numpy.ndarray",1399  899  366,

Unnamed: 0,Array,Chunk
Bytes,1.71 GiB,4.80 MiB
Shape,"(366, 899, 1399)","(1, 899, 1399)"
Count,35023 Tasks,366 Chunks
Type,float32,numpy.ndarray


In [28]:
daily_max_resample.to_netcdf('/home/scratch/jgoodin/daily_resamples/WRF_daily_min_resample.nc') #Save resampled file to netcdf

## Convective resampling (12Z - 12Z)

### Import datetime and timedelta libraries for adjusting time series

In [5]:
import datetime
from datetime import datetime, timedelta

## Subtract timedelta of 12 hours from dataset to adjust time series to 12Z-12Z instead of default 00Z-00Z

In [6]:
hist_hail['Time'] = hist_hail.Time - pd.Timedelta(hours= 12) #Subtract timedelta of 12 hours from 'Time' variable in dataset
#hist_hail = hist_hail.Time - timedelta(hours = 12)

In [7]:
hist_hail #Make sure beginning and end time stamps contain "T12"

Unnamed: 0,Array,Chunk
Bytes,613.81 GiB,115.15 MiB
Shape,"(131007, 899, 1399)","(24, 899, 1399)"
Count,16416 Tasks,5472 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 613.81 GiB 115.15 MiB Shape (131007, 899, 1399) (24, 899, 1399) Count 16416 Tasks 5472 Chunks Type float32 numpy.ndarray",1399  899  131007,

Unnamed: 0,Array,Chunk
Bytes,613.81 GiB,115.15 MiB
Shape,"(131007, 899, 1399)","(24, 899, 1399)"
Count,16416 Tasks,5472 Chunks
Type,float32,numpy.ndarray


In [8]:
hist_hail.to_netcdf('/home/scratch/jgoodin/WRF_HIST_conv_resample.nc') #Save adjusted dataset to netcdf- can further resample using methods above