In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>") )

In [10]:
import numpy as np
from netCDF4 import Dataset, num2date
import gdal
import pupygrib


fn_mf = "ana_input_1.nc"
fn_rain = "rr_ens_mean_0.1deg_reg_v20.0e_197901-201907_uk.nc"
fn  = "/home/u1862646/ATI/BNN/Data/Rain_Data_Nov19/rr_ens_mean_0.1deg_reg_v20.0e_197901-201907_uk.nc" 

# Testing Shermans new data extraction methods


## Rain 

In [None]:
dataset_rain = Dataset(fn_rain, "r", format="NETCDF4")
data_rain = dataset_rain.variables
print(dataset_rain.__doc__)

In [None]:
type(dataset_rain)

In [None]:
#data is an ordered dict
#loop through the keys and print it
print("The name of the variables are:")
for key in data_rain.keys():
    print(key)
print()


In [None]:
#print each item in dat
print("For each of the variables, here are the properties:\n")
for key, value in data_rain.items():
    print("Variable: " + key)
    print(value)
    print()

In [None]:
a = data_rain['rr'][1000].mask
np.count_nonzero(a)/np.size(a)

In [None]:
np.argwhere(a=False)

In [None]:
np.asarray( data['rr'] ).shape

In [None]:
data['time'][:20]

## model fields

In [None]:
mf_dataset = Dataset(fn_mf, "r", format="NETCDF4")
mf_data = mf_dataset.variables

In [None]:
#data is an ordered dict
#loop through the keys and print it
print("The name of the variables are:")
for key in mf_data.keys():
    print(key)
print()


In [None]:
#print each item in dat
print("For each of the variables, here are the properties:\n")
for key, value in mf_data.items():
    print("Variable: " + key)
    print(value)
    print()

In [None]:
mf_data['air_temperature'].shape

# Testing Data Generators

In [7]:
import tensorflow as tf
import netCDF4
from netCDF4 import Dataset
import gdal
import numpy as np

#TODO:(akanni-ade): add ability to return long/lat variable  implement long/lat

"""
    Example of how to use
    import Generators

    rr_ens file 
    _filename = "Data/Rain_Data/rr_ens_mean_0.1deg_reg_v20.0e_197901-201907_djf_uk.nc"
    rain_gen = Generator_rain(_filename, all_at_once=True)
    data = next(iter(grib_gen))

    Grib Files
    _filename = 'Data/Rain_Data/ana_coarse.grib'
    grib_gen = Generators.Generator_grib(fn=_filename, all_at_once=True)
    data = next(iter(grib_gen))

    Grib Files Location:
    _filename = 'Data/Rain_Data/ana_coarse.grib'
    grib_gen = Generators.Generator_grib(fn=_filename, all_at_once=True)
    arr_long, arr_lat = grib_gen.locaiton()
    #now say you are investingating the datum x = data[15,125]
    #   to get the longitude and latitude you must do
    long, lat = arr_long(15,125), arr_lat(15,125)


"""
class Generator():
    
    def __init__(self, fn = "", all_at_once=False, train_size=0.75, channel=None ):
        self.generator = None
        self.all_at_once = all_at_once
        self.fn = fn
        self.channel = channel
    
    def yield_all(self):
        pass

    def yield_iter(self):
        pass

    def long_lat(self):
        pass

    def __call__(self,x=0):
        if(self.all_at_once):
            return self.yield_all()
        else:
            return self.yield_iter(x)
    

# class Generator_rain(Generator):
#     def __init__(self, **generator_params):
#         super(Generator_rain, self).__init__(**generator_params)

#     def yield_all(self):
#         with Dataset(self.fn, "r", format="NETCDF4") as f:
#             _data = f.variables['rr'][:]
#             yield np.ma.getdata(_data), np.ma.getmask(_data)
    
#     def yield_iter(self):
#         with Dataset(self.fn, "r", format="NETCDF4") as f:
#             for chunk in f.variables['rr']:
#                 yield np.ma.getdata(chunk), np.ma.getmask(chunk)
        

class Generator_mf(Generator):
    """
        Creates a generator for the model_fields_data
    
        :param all_at_once: whether to return all data, or return data in batches

        :param channel: the desired channel of information in the grib file
            Default None, then concatenate all channels together and return
            If an integer return this band
    """

    def __init__(self, **generator_params):

        super(Generator_mf, self).__init__(**generator_params)

        self.vars_for_feature = ['unknown_local_param_137_128', 'unknown_local_param_133_128', 'air_temperature', 'geopotential', 'x_wind', 'y_wind' ]
        self.channel_count = len(self.vars_for_feature) 
        

    def yield_all(self):
        raise NotImplementedError
    
    def yield_iter(self):
        with Dataset(self.fn, "r", format="NETCDF4") as f:
            for tuple_mfs in zip( *[f.variables[var_name] for var_name in self.vars_for_feature ] ):
                
                list_datamask = [ (np.ma.getdata(_mar),np.ma.getmask(_mar) ) for _mar in tuple_mfs ]
                
                _data, _masks= list( zip (*list_datamask ) )
                
                
                stacked_data = np.stack(_data, axis=-1 )
                stacked_masks = np.stack(_masks, axis=-1 )
                
                yield stacked_data, stacked_masks
            
            
        
    def location(self):
        """
        Returns a 2 1D arrays
            arr_long: Longitudes
            arr_lat: Latitdues
        Example of how to use:


        """
        raise NotImplementedError
        
class Generator_rain2(Generator):
    def __init__(self, **generator_params):
        super(Generator_rain2, self).__init__(**generator_params)

    def yield_all(self,start_idx=0):
        with Dataset(self.fn, "r", format="NETCDF4") as f:
            _data = f.variables['rr'][start_idx:]
            yield np.ma.getdata(_data), np.ma.getmask(_data)   
            
    def yield_iter(self,start_idx=0):
        f = Dataset(self.fn, "r", format="NETCDF4")
        #with Dataset(self.fn, "r", format="NETCDF4") as f:
            #for chunk in f.variables['rr'][start_idx:]:
        
        #for chunk in f.variables['rr'][start_idx:]:
        for chunk in f.variables['rr'][start_idx:]:
            yield np.ma.getdata(chunk), np.ma.getmask(chunk)

## rain genenerator

In [12]:
#rain_gen = Generator_rain(fn=fn_rain, all_at_once=False)()
rain_gen2 = Generator_rain2(fn=fn,all_at_once=False)()
data = next(iter(rain_gen2))
data

(array([[9.96921e+36, 9.96921e+36, 9.96921e+36, ..., 0.00000e+00,
         0.00000e+00, 0.00000e+00],
        [9.96921e+36, 9.96921e+36, 9.96921e+36, ..., 0.00000e+00,
         0.00000e+00, 0.00000e+00],
        [9.96921e+36, 9.96921e+36, 9.96921e+36, ..., 0.00000e+00,
         0.00000e+00, 0.00000e+00],
        ...,
        [9.96921e+36, 9.96921e+36, 9.96921e+36, ..., 9.96921e+36,
         9.96921e+36, 9.96921e+36],
        [9.96921e+36, 9.96921e+36, 9.96921e+36, ..., 9.96921e+36,
         9.96921e+36, 9.96921e+36],
        [9.96921e+36, 9.96921e+36, 9.96921e+36, ..., 9.96921e+36,
         9.96921e+36, 9.96921e+36]], dtype=float32),
 array([[ True,  True,  True, ..., False, False, False],
        [ True,  True,  True, ..., False, False, False],
        [ True,  True,  True, ..., False, False, False],
        ...,
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True],
        [ True,  True,  True, ...,  True,  True,  True]]))

In [9]:
fn_rain

'rr_ens_mean_0.1deg_reg_v20.0e_197901-201907_uk.nc'

In [15]:
ds_tar = tf.data.Dataset.from_generator(lambda : Generator_rain2(fn=fn, all_at_once=False)(0), output_types= ( tf.float32, tf.bool) )

In [16]:
next(iter(ds_tar) )

(<tf.Tensor: id=95, shape=(100, 140), dtype=float32, numpy=
 array([[9.96921e+36, 9.96921e+36, 9.96921e+36, ..., 0.00000e+00,
         0.00000e+00, 0.00000e+00],
        [9.96921e+36, 9.96921e+36, 9.96921e+36, ..., 0.00000e+00,
         0.00000e+00, 0.00000e+00],
        [9.96921e+36, 9.96921e+36, 9.96921e+36, ..., 0.00000e+00,
         0.00000e+00, 0.00000e+00],
        ...,
        [9.96921e+36, 9.96921e+36, 9.96921e+36, ..., 9.96921e+36,
         9.96921e+36, 9.96921e+36],
        [9.96921e+36, 9.96921e+36, 9.96921e+36, ..., 9.96921e+36,
         9.96921e+36, 9.96921e+36],
        [9.96921e+36, 9.96921e+36, 9.96921e+36, ..., 9.96921e+36,
         9.96921e+36, 9.96921e+36]], dtype=float32)>,
 <tf.Tensor: id=96, shape=(100, 140), dtype=bool, numpy=
 array([[ True,  True,  True, ..., False, False, False],
        [ True,  True,  True, ..., False, False, False],
        [ True,  True,  True, ..., False, False, False],
        ...,
        [ True,  True,  True, ...,  True,  True,  True],

## model feild generator

In [None]:
mf_gen = Generator_mf(fn=fn_mf, all_at_once=False)()
data = next(iter(mf_gen))

In [None]:
data[0].shape

In [None]:
mask.shape

# Fixing mismatch in size of Feature and Target

In [None]:
rain_lat = data_rain['latitude'] #100
rain_lon = data_rain['longitude'] #140

mf_latlon = mf_data['latitude_longitude']
mf_lat = mf_data['latitude'] #103
rr_ens_mean_0.1deg_reg_v20.0e_197901-201907_uk.nc = mf_data['longitude'] #144

In [None]:
rain_lat[:]

In [None]:
mf_lat[::-1]

In [None]:
#Latitude transformation 
mf_lat[::-1][1:-2] - rain_lat[:]

In [None]:
mf_lat[-2:1:-1] - rain_lat[:]

In [None]:
rain_lon[:]

In [None]:
mf_lon[:]

In [None]:
rain_lon[:] - mf_lon[2:-2]