# UKMO CASIM time and spatial averaging
Anna Mackie, 2022

Processing for long channel simulations from CM1. Three SSTs (295, 300 and 305K). Please see Wing et al. (2018) for simulation and variable descriptions.

This script, for different data:
- reads in data from CEDA archive
- takes the average over 24 hours and 32 x 32 grid points (equivilent to 96 km x 96km). NB last blocks may have more grid points to ensure all grid points used
- saves as npy files

This is done for a number of variables. The code is grouped for
1. 2D variables
2. 3D variables
3. Cloud fraction

Cloud fraction follows the method of Wing et al., (2020) which uses a threshold value for cloud condensate.

In [1]:
# activate virtual environmnet required for metpy
import sys
import pathlib
import platform
venv_path = '~/nb-venvs/metpy_venv'
sys.path.append(str(pathlib.Path(f'{venv_path}/lib/python{platform.python_version_tuple()[0]}.{platform.python_version_tuple()[1]}/site-packages/').expanduser()))

import metpy
print(metpy.__file__)

/home/users/arm33/nb-venvs/metpy_venv/lib/python3.10/site-packages/metpy/__init__.py


In [2]:
import numpy as np
import numpy.ma as ma
import matplotlib.pyplot as plt
from netCDF4 import Dataset
from metpy.calc import saturation_mixing_ratio
from metpy.units import units
import numpy.ma as ma
import os
import sys
sys.path.append('../../')
import funcs



## Model specific inputs
Different models have slightly different set ups (eg grid points, file names etc)

'Blocks' refer to the grid post-spatial averaging

In [3]:
model = 'UKMOi-vn11.0-CASIM' #in this model, time is in hours, with 24 hours to a file

#temp labels
temps =  ['large295', 'large300', 'large305']

In [4]:
#read in sample nc file to get dimensions
datapath3D = '/badc/rcemip/data/'+ model +'/RCE_' + temps[0]+ '/3D/'
nc_ta = Dataset(datapath3D + model + '_RCE_' + temps[0] + '_3D_10000330_00.nc')
ta = nc_ta.variables['ta'][:]
tsize, levsize , ysize, xsize = np.shape(ta) # UCLA has levels in 4th dimension
nc_ta.close()
print(tsize, levsize, ysize, xsize)


4 98 144 2016


In [4]:
# set time, levels, x and y dimensions
tsize, levsize, ysize, xsize = 4, 98, 144, 2016

## Parameters for all models

In [5]:
bk = 32 # number of x/y gridpoints in a block
nodays = 25 # number of days


In [6]:
# set up spaitial averaging
x_orig = np.arange(xsize) # total number of x points (rows)
y_orig = np.arange(ysize) # total number of y points (columns)

ny = int(len(y_orig)/bk) # number of x/y blocks in the grid
y_new = np.arange(ny*bk)
ybk = np.asarray(np.split(y_new, ny))

nx = int(len(x_orig)/bk) # number of x/y blocks in the grid
x_new = np.arange(nx*bk)
xbk = np.asarray(np.split(x_new, nx))
print('no blocks in x direction: ', nx, '; no in y direction: ',ny)


no blocks in x direction:  63 ; no in y direction:  4


## 2D data

- lwcrf - longwave cloud radiative effect, calculated from rlut (outgoing longwave radiation, all sky) and rlutcs (clear sky)
- swcrf - as above, but for shortwave
- pr - surface precipitation rate
- tas - near surface air temperature

In [18]:
#time params for 2D data
#set time params
ts = 24 # number of hours want to integrate over
nd = int(nodays*ts)
totalt = np.arange(nd)# total number of hours 
tbk = np.asarray(np.split(totalt, nodays))
print('takes the average over ' + str(nodays) + ' periods of ' + str(ts)+ ' hour averages')    

takes the average over 25 periods of 24 hour averages


In [19]:
dp = '/home/users/arm33/RCEMIP/'+ model + '/processed_new'

temps = ['large295','large300', 'large305']
for temp in temps:

    datapath ='/badc/rcemip/data/'+ model + '/RCE_' + temp + '/2D/'
    datalist = sorted(os.listdir(datapath))
    datalist = datalist[-nodays:] # last 25 days
    d = len(datalist)
    print('total number of blocks: ', nx*ny*d)
    for t in range(d):
        fn = datapath + datalist[t]
        nc = Dataset(fn)
        
        lwcrf = nc.variables['rlutcs'][:] - nc.variables['rlut'][:]
        swcrf = nc.variables['rsutcs'][:] - nc.variables['rsut'][:]
        
        pr = nc.variables['pr'][:]
        nc.close()
        #take mean over that day and spatial blocks
        lwcrf_bk, swcrf_bk= np.empty((ny, nx)), np.empty((ny, nx)) 
        pr_bk= np.empty((ny, nx))
        for i in range(ny):
                if i == ny-1: # check if it's the last block, if yes then use all remaining gridpoints
                    endy = y_orig[-1]+1
                else:
                    endy = ybk[i,-1]+1
                for j in range(nx): # ditto
                    if j == nx-1:
                        endx = x_orig[-1]+1
                    else:
                        endx = xbk[j,-1]+1

                    lwcrf_bk[i,j] = np.nanmean(lwcrf[:,  ybk[i,0]:endy, xbk[j,0]: endx])
                    swcrf_bk[i,j] = np.nanmean(swcrf[:,  ybk[i,0]:endy, xbk[j,0]: endx])
                    pr_bk[i,j] = np.nanmean(pr[:,  ybk[i,0]:endy, xbk[j,0]: endx])
        if t==0:
            lwcrf_mn = lwcrf_bk
            swcrf_mn = swcrf_bk
            pr_mn = pr_bk
        else:
            lwcrf_mn = np.dstack((lwcrf_mn, lwcrf_bk))
            swcrf_mn = np.dstack((swcrf_mn, swcrf_bk))
            pr_mn = np.dstack((pr_mn, pr_bk))

    lwcrf_mn = funcs.sortOut(lwcrf_mn)
    swcrf_mn = funcs.sortOut(swcrf_mn)    
    pr_mn = funcs.sortOut(pr_mn)
    
    lwcrf_mn.dump(dp + '/2D/'+ temp + 'lwcrf' + str(ts) + 'hrs.npy')
    swcrf_mn.dump(dp + '/2D/'+ temp + 'swcrf' + str(ts) + 'hrs.npy')
    pr_mn.dump(dp + '/2D/'+ temp + 'pr' + str(ts) + 'hrs.npy')
    print(temp + 'done')

total number of blocks:  6300
large295done
total number of blocks:  6300
large300done
total number of blocks:  6300
large305done


## 3D data

Note that the 3D data is 6 hourly, so require different time parameters

- ua - eastward wind velocity
- va - northward wind velocity
- wa - vertical velocity
- pa - pressure
- cli - mass fraction of cloud liqid ice
- clw - mass fraction of cloud liquid water
- hus - specific humidity
- hur - relative humidity
- tntr - tendency of air temperature due to radiative heating
- ta - atmospheric temperature

In [6]:
#time params for 3D data
ts = 4 # number of timesteps want to integrate over
nd = int(nodays*ts)
totalt = np.arange(nd)# total number of timesteps 
tbk = np.asarray(np.split(totalt, nodays))
print('takes the average over ' + str(nodays) + ' periods of ' + str(ts)+ ' hour averages')    

takes the average over 25 periods of 4 hour averages


In [None]:
vars = ['ua', 'va', 'pa','cli', 'clw','wa','hus','hur', 'tntr', 'ta']

dp = '/home/users/arm33/RCEMIP/'+ model + '/processed_new'


for temp in temps:

    datapath ='/badc/rcemip/data/'+ model + '/RCE_' + temp + '/3D/'
    datalist = sorted(os.listdir(datapath))
    datalist = datalist[-nodays:] # last 25 days
    d = len(datalist)
    for var in vars:
        print(var, temp)
        var_bk = np.empty((nodays, levsize, ny, nx))        
        for t in range(d):
            fn = datapath + datalist[t]
            nc = Dataset(fn)
            var_full = np.mean(nc.variables[var][:],axis = 0)
            nc.close()

            for i in range(ny):
                if i == ny-1: # check if it's the last block, if yes then use all remaining gridpoints
                    endy = y_orig[-1]+1
                else:
                    endy = ybk[i,-1]+1
                for j in range(nx): # ditto
                    if j == nx-1:
                        endx = x_orig[-1]+1
                    else:
                        endx = xbk[j,-1]+1
                    for k in range(levsize):
                        var_bk[t,k,i,j] = np.nanmean(var_full[k, ybk[i,0]:endy, xbk[j,0]: endx])



        var_bk.dump(dp + '/3D/'+ temp + var + '_profile_25d.npy')
        



ua large295
va large295
pa large295
cli large295


## Cloud fraction

This follows the procedure described in Wing et al., 2020

1. Read in clw, cli ta and pa for each day (four time steps)
2. Take the mean for that 24 hours
3. Calculate the saturation mixing ratio and the cloud condensate (= cli + clw)
4. for each 32 x 32 block, calculate the proportion of points where either the cloud condensate is grater than 0.00001, or greater than the 0.01 x the saturation mixing ratio

In [7]:
#time params for 3D data
ts = 4 # number of timesteps want to integrate over
nd = int(nodays*ts)
totalt = np.arange(nd)# total number of timesteps 
tbk = np.asarray(np.split(totalt, nodays))
print('takes the average over ' + str(nodays) + ' periods of ' + str(ts)+ ' hour averages')    

takes the average over 25 periods of 4 hour averages


In [8]:
#cloud fraction
dp = '/home/users/arm33/RCEMIP/'+ model + '/processed_new'  
nd = nodays*ts
for temp in temps:

    datapath = '/badc/rcemip/data/'+ model +'/RCE_' + temp + '/3D/'
    datalist = sorted(os.listdir(datapath))
    datalist = datalist[-nd:] # last 25 days
    d = len(datalist)

    satmixr =np.empty((nodays, levsize,len(y_orig), len(x_orig) ))
    cloudcon = np.empty((nodays, levsize,len(y_orig), len(x_orig) ))
    counter =0
    for t in range(d):
        fn = datapath + datalist[t]
        nc = Dataset(fn)
        clwf = np.mean(nc.variables['clw'][:], axis = 0)
        clif = np.mean(nc.variables['cli'][:], axis = 0)
        paf = np.mean(nc.variables['pa'][:], axis = 0)
        taf = np.mean(nc.variables['ta'][:], axis = 0)
        nc.close()

        pa = np.asarray(paf) * units.pascal
        ta = np.asarray(taf) * units.K

        satmixr[counter,] = metpy.calc.saturation_mixing_ratio(pa, ta)
        cloudcon[counter, ]= clwf + clif
        counter+=1
    print(np.shape(satmixr), np.shape(cloudcon))


    cldfrac = np.zeros((nodays, levsize, ny, nx))
    for t in range(nodays):
        print(t)
        for k in range(levsize):
            for i in range(ny):
                if i == ny-1: # check if it's the last block, if yes then use all remaining gridpoints
                    endy = y_orig[-1]+1
                else:
                    endy = ybk[i,-1]+1
                for j in range(nx): # ditto
                    if j == nx-1:
                        endx = x_orig[-1]+1
                    else:
                        endx = xbk[j,-1]+1

                    cc = cloudcon[t,k, ybk[i,0]:endy, xbk[j,0]: endx].flatten()
                    sm = satmixr[t,k, ybk[i,0]:endy, xbk[j,0]: endx].flatten()
                    no_grid_points = len(sm)
                    count = 0
                    for r in range(no_grid_points):
                        if cc[r] > 0.00001:
                            count +=1
                        elif cc[r] > 0.01*sm[r]:
                            count +=1 

                    cldfrac[t,k, i , j ] = count/no_grid_points

    cldfrac.dump(dp + '/3D/'+ temp + 'cldfrac_profile_25d.npy')

(25, 98, 144, 2016) (25, 98, 144, 2016)
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
