In [1]:
import numpy as np
import numpy.ma as ma
from netCDF4 import Dataset

In [None]:
class preprocess():
    def __init__(self, model, variable, timestep_loop, 
                 first_begin, first_end, last_begin, last_end, start_loop, stop_loop):
        self.model = model
        self.variable = variable
        self.timestep_loop = timestep_loop
        self.first_begin, self.first_end = first_begin, first_end
        self.last_begin, self.last_end = last_begin, last_end
        self.start_loop, self.stop_loop = start_loop, stop_loop

        self.ulim, self.llim = 30, -30 # upper and lower limits of latitude
        self.lt, self.ln = 120, 360 # grid number of latitude and longitude 
        self.upscale_rate = 5 # from 1x1 to 5x5
        self.data_num= 12*self.timestep_loop # number of data in one file
        self.first_data_num = 12*(self.first_end - self.first_begin + 1)
        self.last_data_num = 12*(self.last_end - self.last_begin + 1)
        self.loopyr = (self.stop_loop - self.start_loop)/self.timestep_loop
        self.tm = int(self.data_num*self.loopyr+self.first_data_num+self.last_data_num) # number of all data

        self.datadir = f"/work/kajiyama/cdo/cmip6/{self.model}/{self.variable}"
        self.first_file = f"{self.datadir}/{self.variable}_{self.first_begin}-{self.first_end}.nc"
        self.last_file = f"{self.datadir}/{self.variable}_{self.last_begin}-{self.last_end}.nc"
        self.save_file = f"/work/kajiyama/preprocessed/cmip6/{self.model}" \
                         f"/{self.variable}_{self.model}.pickle"
        
    def make_val(self, conc_flag, first_flag=True, loop_flag=True, last_flag=True):
        # making empty box for save file
        val = np.empty((self.tm, self.lt, self.ln))

        # first netCDF4 files
        if first_flag is True:
            val[:self.first_data_num, :, :] = self._fill(
                    self._load_val(self.first_file, self.first_data_num, conc_flag))

        # middle netcdf4 files
        # time augument count
        if loop_flag is True:
            ind = self.data_num 
            #
            for i in range(self.start_loop, self.stop_loop+1, self.timestep_loop):
                # 
                file = f"{self.datadir}/{self.variable}_{i}-{i+self.timestep_loop-1}.nc"
                val[(self.first_data_num + ind - self.data_num):(self.first_data_num + ind),:,:] = self._fill(
                        self._load_val(file, self.data_num, conc_flag))
                ind += self.data_num
                print(i, i+self.timestep_loop-1)

        # last netcdf4 files
        if last_flag is True:
            val[-self.last_data_num:, :, :] = self._fill(
                    self._load_val(self.last_file, self.last_data_num, conc_flag))
            val = self._mask(val)
            
        return val

    def _fill(self, x):
        f = ma.filled(x,fill_value=99999)
        return f

    def _mask(self, x):
        m = ma.masked_where(x>9999, x)
        return m

    def _conc(self, x):
        c = x.copy()
        c = self._fill(c)
        x1, x2 = c[:,:,-180:], c[:,:,:180]
        c = np.concatenate([x1,x2],2)
        c = self._mask(c)

        return c

    def _load_val(self, file, data_num, conc_flag=True):
        ds = Dataset(file, 'r')
        val = ds.variables[self.variable][:]
        val = val[:data_num, ::-1, :]
        if conc_flag is True:
            val = self._conc(val[:,self.ulim:self.llim,:])
        else:
            val = val[:, self.ulim:self.llim, :]
        return val

In [18]:
def main():
    save_flag = False
    conc_flag = True
    first_flag = True
    loop_flag = False
    last_flag = False
    reverse_flag = False
    model = 'BCC-CSM2-MR'
    variable = 'tos'
    first_begin, first_end = 1850, 2014
    last_begin, last_end = 99999, 99999
    # if no loop is required, edit like start_loop=stop_loop=1851 and timestep_loop > 0
    start_loop, stop_loop = 99999, 99999
    timestep_loop = 99999

    pre = preprocess(model, variable, timestep_loop, 
                     first_begin, first_end, last_begin, last_end, start_loop, stop_loop)

    #tos = pre.make_val(conc_flag=conc_flag, first_flag=first_flag, loop_flag=loop_flag, last_flag=last_flag)
    file = f"{pre.datadir}/{pre.variable}_{pre.first_begin}-{pre.first_end}.nc"
    tos = pre._load_val(file, pre.data_num, conc_flag)
    print(tos.shape)

In [19]:
main()

(1980, 120, 360)
