# Converting Daily Data to Monthly

## Setting up Notebook

In [1]:
# imports
from __future__ import print_function  # For py2 compatibility
import cdms2, numpy, vcs, cdutil, MV2

In [2]:
# Create time axis
ndays = 780
tim = cdms2.createAxis(range(780))
tim.units = 'days since 2015-11-3'
tim.id = 'time'
tim.designateTime()
tim_ac = tim.asComponentTime()
# Create daily bounds
cdutil.times.setTimeBoundsDaily(tim)

In [3]:
# Create a grid
grid = cdms2.createGaussianGrid(64)
lat, lon = grid.getAxisList()
nlat = len(lat)
nlon = len(lon)

In [4]:
# Create random 3D daily data
shape = (ndays, nlat, nlon)
# 780 days on T42 Gaussian grid between -25 and 30C
data = numpy.random.random(shape) * 55. - 25.

In [5]:
# Applies random mask, threshold at 85% so only about 15% of data is masked
data = MV2.masked_where(numpy.greater(numpy.random.random(shape),.85), data)

In [6]:
# Applies axes and attributes
data.setAxisList((tim, lat, lon))
data.id = 'tas'

# Create Monthly Data

In [7]:
monthly = cdutil.ANNUALCYCLE(data(latitude=(-90,90)))  # default grid has latitude reverted
print(monthly.shape) # 26 months worth of data
tim_monthly= monthly.getTime()
tim_monthly_comp = tim_monthly.asComponentTime() # for readibiltiy
print("First {}, and last: {}".format(tim_monthly_comp[0], tim_monthly_comp[-1]))

(26, 64, 128)
First 2015-11-16 0:0:0.0, and last: 2017-12-16 12:0:0.0


# How data in each cell?
Technically, based on the time axis, we had data ***every*** day.

In practice on a ***per cell basis*** the number of days having data varied, let's retrieve how data was actually used for each month

In [8]:
selectors = []
for month in range(12):  # Loop through all month
    selectors.append(cdutil.ANNUALCYCLE.slicer(tim, [month+1]))

We retrieve the criterias used for each individual months

### indices
for each month we get back 3 components, the first the first components contains the indices used for each month of each year

In [9]:
# Let's take a look at the januaries and novembers
jan = selectors[0]
feb = selectors[1]
nov = selectors[10]

In [10]:
print(len(jan[0]), len(nov[0])) # We had data for 2 januaries and 3 novembers

2 3


In [11]:
# For example the indidces used for January 2018 were:
print("2018:", jan[0][0])
# while the indices used for jan 2019 were:
print("2019:", jan[0][1])

2018: [59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89]
2019: [425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455]


### Bounds
The second component contains the bounds of each index, this is used to **weight** each slice appropriately. In this caes each days was full, hence every slice is weighted 1.

Again these are listed per year

In [12]:
print('2018 bounds:', jan[1][0])

2018 bounds: [[59.0, 60.0], [60.0, 61.0], [61.0, 62.0], [62.0, 63.0], [63.0, 64.0], [64.0, 65.0], [65.0, 66.0], [66.0, 67.0], [67.0, 68.0], [68.0, 69.0], [69.0, 70.0], [70.0, 71.0], [71.0, 72.0], [72.0, 73.0], [73.0, 74.0], [74.0, 75.0], [75.0, 76.0], [76.0, 77.0], [77.0, 78.0], [78.0, 79.0], [79.0, 80.0], [80.0, 81.0], [81.0, 82.0], [82.0, 83.0], [83.0, 84.0], [84.0, 85.0], [85.0, 86.0], [86.0, 87.0], [87.0, 88.0], [88.0, 89.0], [89.0, 90.0]]


### Summary

The last component gives of for each year, the total weight and the start of the first bounds, note thast our first feb has 29 days

In [13]:
print('January', jan[2])
print('February:', feb[2])

January [[31.0, 59.0], [31.0, 425.0]]
February: [[29.0, 90.0], [28.0, 456.0]]


Again this is based ***solely** on the time axis, in reality each cell had data on and off, so while the the generated monthly data shows data everywhere we want to know how days were actually used in each cell

In [14]:
x = vcs.init(bg=True)
gm = vcs.createboxfill()
x.plot(monthly, gm)  # data everywhere

<vcs.displayplot.Dp at 0x7f5a667c2ba8>

In [15]:
out_data = []
out_time_values = []
for sel in selectors: # Loop through each monthly selection
    for year in sel[0]:  # Loop through indices for each year
        start = year[0]
        end = year[-1]
        # We lost the time axis, we need to recreate it
        comp_time = tim_ac[start]
        #print("Dealing with:", comp_time)
        # unifiy units
        sub_tim = comp_time.torel("months since 2015")
        sub = data(time=slice(start, end+1))
        #print("Value in file:", sub_tim[0])
        number = MV2.count(sub, axis=0) # This tells us for each cell how many days where available
        # Now rshape data
        out_data.append(number)
        out_time_values.append(sub_tim.value)

print(out_time_values) # not ordered

x.clear()
x.plot(out_data[0])  # First jan

[12.0, 24.0, 13.0, 25.0, 14.0, 26.0, 15.0, 27.0, 16.0, 28.0, 17.0, 29.0, 18.0, 30.0, 19.0, 31.0, 20.0, 32.0, 21.0, 33.0, 10.0, 22.0, 34.0, 11.0, 23.0, 35.0]


<vcs.displayplot.Dp at 0x7f5a667c2d48>

In [16]:
indices = numpy.argsort(out_time_values)
print(indices)

[20 23  0  2  4  6  8 10 12 14 16 18 21 24  1  3  5  7  9 11 13 15 17 19
 22 25]


In [17]:
f = cdms2.open("monthly_number_of_data.nc","w")
sub_time = cdms2.createAxis(out_time_values)
sub_time.designateTime()
sub_time.units = "months since 2015"
new =[]
for index in indices:
    new.append(out_data[index])
# You could also do:
new = numpy.take(out_data, indices, axis=0)
new = MV2.array(new)
new.setAxisList((sub_time, lat, lon))

f.write(new, id=data.id)
f.close()

You can query different values of compression using the functions:
cdms2.getNetcdfShuffleFlag() returning 1 if shuffling is enabled, 0 otherwise
cdms2.getNetcdfDeflateFlag() returning 1 if deflate is used, 0 otherwise
cdms2.getNetcdfDeflateLevelFlag() returning the level of compression for the deflate method

If you want to turn that off or set different values of compression use the functions:
value = 0
cdms2.setNetcdfShuffleFlag(value) ## where value is either 0 or 1
cdms2.setNetcdfDeflateFlag(value) ## where value is either 0 or 1
cdms2.setNetcdfDeflateLevelFlag(value) ## where value is a integer between 0 and 9 included

To produce NetCDF3 Classic files use:
cdms2.useNetCDF3()
To Force NetCDF4 output with classic format and no compressing use:
cdms2.setNetcdf4Flag(1)
NetCDF4 file with no shuffling or deflate and noclassic will be open for parallel i/o
