# ICON_NWP_CRM time and spatial averaging
Anna Mackie, 2022

Processing for long channel simulations from CM1. Three SSTs (295, 300 and 305K). Please see Wing et al. (2018) for simulation and variable descriptions.

This script, for different data:
- reads in data from CEDA archive
- takes the average over 24 hours and 32 x 32 grid points (equivilent to 96 km x 96km). NB last blocks may have more grid points to ensure all grid points used
- saves as npy files

This is done for a number of variables. The code is grouped for
1. 2D variables
2. 3D variables
3. Cloud fraction

Cloud fraction follows the method of Wing et al., (2020) which uses a threshold value for cloud condensate.

In [1]:
# activate virtual environmnet required for metpy
import sys
import pathlib
import platform
venv_path = '~/nb-venvs/metpy_venv'
sys.path.append(str(pathlib.Path(f'{venv_path}/lib/python{platform.python_version_tuple()[0]}.{platform.python_version_tuple()[1]}/site-packages/').expanduser()))

import metpy
print(metpy.__file__)

/home/users/arm33/nb-venvs/metpy_venv/lib/python3.10/site-packages/metpy/__init__.py


In [2]:
# Activate the venv for metpy
import virtualenv
import pip
import os

venv_dir = '/home/users/arm33/nb-venvs/venv-notebook'
activate_file = os.path.join(venv_dir, "bin", "activate_this.py")
exec(open(activate_file).read(), dict(__file__=activate_file))

# First let's assert that we cannot import `fixnc`
try:
    import metpy
except ModuleNotFoundError as err:
    print('Failed to import "metpy" as expected')
    
# pip install a package using the venv as a prefix
pip.main(["install", "--prefix", venv_dir, "metpy"])

import metpy
print(metpy.__file__)

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.


/home/users/arm33/nb-venvs/metpy_venv/lib/python3.10/site-packages/metpy/__init__.py


In [3]:
import numpy as np
import numpy.ma as ma
import matplotlib.pyplot as plt
from netCDF4 import Dataset
from metpy.calc import saturation_mixing_ratio
from metpy.units import units
import numpy.ma as ma
import os
import sys
sys.path.append('../../')
import funcs


## Model specific inputs
Different models have slightly different set ups (eg grid points, file names etc)

'Blocks' refer to the grid post-spatial averaging

In [4]:
model = 'ICON_NWP_CRM' 

#temp labels
tempsIn =  ['large_295', 'large_300', 'large_305']
tempsOut =  ['large295', 'large300', 'large305']

In [5]:
#read in sample nc file to get dimensions
datapath3D = '/badc/rcemip/data/'+ model +'/RCE_' + tempsIn[0]+ '/3D/'
nc_ta = Dataset(datapath3D + model + '-RCE_' + tempsIn[0] + '-3D_last25d.nc')
ta = nc_ta.variables['ta'][:]
tsize, levsize , ysize, xsize = np.shape(ta) # UCLA has levels in 4th dimension
nc_ta.close()
print(tsize, levsize, ysize, xsize)


101 75 2000 134


In [6]:
#set time, levels, y and x dimensions for this model
tsize, levsize, ysize, xsize = 101, 75, 2000, 134

## Parameters for all models

In [7]:
bk = 32 # number of x/y gridpoints in a block
nodays = 25 # number of days

In [8]:
# set up spaitial averaging
x_orig = np.arange(xsize) # total number of x points (rows)
y_orig = np.arange(ysize) # total number of y points (columns)

ny = int(len(y_orig)/bk) # number of x/y blocks in the grid
y_new = np.arange(ny*bk)
ybk = np.asarray(np.split(y_new, ny))

nx = int(len(x_orig)/bk) # number of x/y blocks in the grid
x_new = np.arange(nx*bk)
xbk = np.asarray(np.split(x_new, nx))
print('no blocks in x direction: ', nx, '; no in y direction: ',ny)


no blocks in x direction:  4 ; no in y direction:  62


## 2D data

- lwcrf - longwave cloud radiative effect, calculated from rlut (outgoing longwave radiation, all sky) and rlutcs (clear sky)
- swcrf - as above, but for shortwave
- pr - surface precipitation rate
- tas - near surface air temperature

In [6]:
#time params for 2D data
#set time params
ts = 24 # number of hours want to integrate over
nd = int(nodays*ts)
totalt = np.arange(nd)# total number of hours 
tbk = np.asarray(np.split(totalt, nodays))
print('takes the average over ' + str(nodays) + ' periods of ' + str(ts)+ ' hour averages')    

takes the average over 25 periods of 24 hour averages


In [7]:

dp = '/home/users/arm33/RCEMIP/'+ model + '/processed_new'

temp_count = 0
for temp in tempsIn:
    datapath = '/badc/rcemip/data/'+ model + '/RCE_' + temp + '/2D/'
    nc = Dataset( datapath + model + '-RCE_' + temp + '-2D.nc')
    lwas_full = nc.variables['rlut'][-nd:,]
    print('done lwas')
    lwcs_full = nc.variables['rlutcs'][-nd:,]
    print('done lwcs')
    swas_full = nc.variables['rsut'][-nd:,]
    swcs_full = nc.variables['rsutcs'][-nd:,]
    print('done swcs')
    pr_full = nc.variables['pr'][-nd:,]
    nc.close()
    print('finished reading')
    lwcrf_full = lwcs_full - lwas_full
    swcrf_full = swcs_full - swas_full
    pr_full = nc.variables['pr'][-nd:,]
    lwcrf_bk, swcrf_bk= np.empty((nodays, ny, nx)), np.empty((nodays, ny, nx)) 
    pr_bk= np.empty((nodays, ny, nx))
    for t in range(nodays):
        
        for i in range(ny):
                if i == ny-1: # check if it's the last block, if yes then use all remaining gridpoints
                    endy = y_orig[-1]+1
                else:
                    endy = ybk[i,-1]+1
                for j in range(nx): # ditto
                    if j == nx-1:
                        endx = x_orig[-1]+1
                    else:
                        endx = xbk[j,-1]+1

                    lwcrf_bk[t,i,j] = np.nanmean(lwcrf_full[tbk[t,0]:tbk[t, -1]+1,  ybk[i,0]:endy, xbk[j,0]: endx])
                    swcrf_bk[t,i,j] = np.nanmean(swcrf_full[tbk[t,0]:tbk[t, -1]+1,  ybk[i,0]:endy, xbk[j,0]: endx])
                    pr_bk[t,i,j] = np.nanmean(pr_full[tbk[t,0]:tbk[t, -1]+1,  ybk[i,0]:endy, xbk[j,0]: endx])


    lwcrf_bk.dump(dp + '/2D/'+ tempsOut[temp_count] + 'lwcrf' + str(ts) + 'hrs.npy')
    swcrf_bk.dump(dp + '/2D/'+ tempsOut[temp_count] + 'swcrf' + str(ts) + 'hrs.npy')
    pr_bk.dump(dp + '/2D/'+ tempsOut[temp_count] + 'pr' + str(ts) + 'hrs.npy')
    temp_count +=1

## 3D data

Note that the 3D data is 6 hourly, so require different time parameters

- ua - eastward wind velocity
- va - northward wind velocity
- wa - vertical velocity
- pa - pressure
- cli - mass fraction of cloud liqid ice
- clw - mass fraction of cloud liquid water
- hus - specific humidity
- hur - relative humidity
- tntr - tendency of air temperature due to radiative heating
- ta - atmospheric temperature

In [6]:
#time params for 3D data
ts = 4 # number of timesteps want to integrate over
nd = int(nodays*ts)
totalt = np.arange(nd)# total number of timesteps 
tbk = np.asarray(np.split(totalt, nodays))
print('takes the average over ' + str(nodays) + ' periods of ' + str(ts)+ ' hour averages')    

takes the average over 25 periods of 4 hour averages


In [7]:
vars = ['ua', 'va','pa','cli', 'clw','wa','hus','hur', 'tntr', 'ta']

dp = '/home/users/arm33/RCEMIP/'+ model + '/processed_new'

temp_count = 0
for temp in tempsIn:
    print('-----------' + temp + '----------')
    for var in vars:
        print(var)
    
        var_bk = np.empty((nodays, levsize, ny, nx))
        datapath = '/badc/rcemip/data/'+ model +'/RCE_' + temp + '/3D/'
        for t in range(nodays):
            nc = Dataset( datapath + model + '-RCE_' + temp + '-3D_last25d.nc')
            var_full = nc.variables[var][tbk[t,0]:tbk[t,-1]+1,]
            nc.close()
            for i in range(ny):
                if i == ny-1: # check if it's the last block, if yes then use all remaining gridpoints
                    endy = y_orig[-1]+1
                else:
                    endy = ybk[i,-1]+1
                for j in range(nx): # ditto
                    if j == nx-1:
                        endx = x_orig[-1]+1
                    else:
                        endx = xbk[j,-1]+1
                    for k in range(levsize):
                        var_bk[t, k,i,j] = np.nanmean(var_full[:, k, ybk[i,0]:endy, xbk[j,0]: endx])



        var_bk.dump(dp + '/3D/' + tempsOut[temp_count] + var + '_profile_25d.npy')

    temp_count+=1


-----------large_295----------
ua
va
-----------large_300----------
ua
va
-----------large_305----------
ua
va


## Cloud fraction

This follows the procedure described in Wing et al., 2020

1. Read in clw, cli ta and pa for each day (four time steps)
2. Take the mean for that 24 hours
3. Calculate the saturation mixing ratio and the cloud condensate (= cli + clw)
4. for each 32 x 32 block, calculate the proportion of points where either the cloud condensate is grater than 0.00001, or greater than the 0.01 x the saturation mixing ratio

In [6]:
#time params for 3D data
ts = 4 # number of timesteps want to integrate over
nd = int(nodays*ts)
totalt = np.arange(nd)# total number of timesteps 
tbk = np.asarray(np.split(totalt, nodays))
print('takes the average over ' + str(nodays) + ' periods of ' + str(ts)+ ' hour averages')    

takes the average over 25 periods of 4 hour averages


In [15]:
#cloud fraction
dp = '/home/users/arm33/RCEMIP/'+ model + '/processed_new'  

nd = nodays*ts
tempCount = 0
for temp in tempsIn:
    print('-----------' + temp + '----------')
    #calc 24 hour means of cloud condensate and sat mix ratio
    satmixr =np.empty((nodays, levsize,len(y_orig), len(x_orig) ))
    cloudcon = np.empty((nodays, levsize,len(y_orig), len(x_orig) ))
    datapath = '/badc/rcemip/data/'+ model +'/RCE_' + temp + '/3D/'
    for t in range(nodays):       
        nc = Dataset( datapath + model + '-RCE_' + temp + '-3D_last25d.nc')
        clw = np.mean(nc.variables['clw'][tbk[t,0]:tbk[t,-1]+1, ], axis = 0)
        cli = np.mean(nc.variables['cli'][tbk[t,0]:tbk[t,-1]+1, ], axis = 0)
        pa = np.mean(nc.variables['pa'][tbk[t,0]:tbk[t,-1]+1,], axis = 0)
        pa = np.asarray(pa) * units.pascal
        ta = np.mean(nc.variables['ta'][tbk[t,0]:tbk[t,-1]+1,], axis =0)
        ta = np.asarray(ta) * units.K
        satmixr[t, ] = metpy.calc.saturation_mixing_ratio(pa, ta)
        cloudcon[t,]= clw + cli
        nc.close()

    #calc cloud frac
    cldfrac = np.zeros((nodays, levsize, ny, nx))
    for t in range(nodays):
        print(t)
        for k in range(levsize):
            for i in range(ny):
                if i == ny-1: # check if it's the last block, if yes then use all remaining gridpoints
                    endy = y_orig[-1]+1
                else:
                    endy = ybk[i,-1]+1
                for j in range(nx): # ditto
                    if j == nx-1:
                        endx = x_orig[-1]+1
                    else:
                        endx = xbk[j,-1]+1

                    cc = cloudcon[t,k, ybk[i,0]:endy, xbk[j,0]: endx].flatten()
                    sm = satmixr[t,k, ybk[i,0]:endy, xbk[j,0]: endx].flatten()
                    no_grid_points = len(sm)
                    count = 0
                    for r in range(no_grid_points):
                        if cc[r] > 0.00001:
                            count +=1
                        elif cc[r] > 0.01*sm[r]:
                            count +=1 

                    cldfrac[t,k, i , j ] = count/no_grid_points

    cldfrac.dump(dp + '/3D/'+ tempsOut[tempCount] + 'cldfrac_profile_25d.npy')
    tempCount +=1

-----------large_295----------
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
-----------large_300----------
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
-----------large_305----------
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
