# Adding channels to the data
We are adding encoded channels to our (time, latitude, longitude, channels) input. The encoded latitude and longitude channels are in the range of [-1 , 1].

In [12]:
import xarray as xr
from lossycomp.encodings import *
import numpy as np
test = xr.open_mfdataset('/lsdf/kit/scc/projects/abcde/1980/*/ERA5.pl.temperature.nc', combine='by_coords')
test_data = test.sel(time=slice('1980-01-14T16:00:00','1980-01-15T07:00:00'),level = slice(978,1000), longitude=slice(6,20.75), latitude=slice(55.5,43.75))
test_data = test_data.transpose('time', 'latitude', 'longitude', 'level')
test_data

Unnamed: 0,Array,Chunk
Bytes,184.32 kB,184.32 kB
Shape,"(16, 48, 60, 1)","(16, 48, 60, 1)"
Count,38 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 184.32 kB 184.32 kB Shape (16, 48, 60, 1) (16, 48, 60, 1) Count 38 Tasks 1 Chunks Type float32 numpy.ndarray",16  1  1  60  48,

Unnamed: 0,Array,Chunk
Bytes,184.32 kB,184.32 kB
Shape,"(16, 48, 60, 1)","(16, 48, 60, 1)"
Count,38 Tasks,1 Chunks
Type,float32,numpy.ndarray


In [13]:
#Getting latitude and longitude values

lat = test_data['t'].coords['latitude'].values
lon = test_data['t'].coords['longitude'].values

In [14]:
# Encoding the latitude and longitude values, comment them out if you dont want to encode

lat_st = np.stack([encode_lat(x) for x in lat])
lon_st = np.stack([encode_lon(x) for x in lon])

In [15]:
#Splitting the 2 attributes in order to build our grid of values

lat1, lat2 = np.hsplit(lat_st, 2)
lon1, lon2 = np.hsplit(lon_st, 2)

In [16]:
# Building 2 meshgrids with the 4 attributes

xx, yy = np.meshgrid(lon1, lat1)
xx2, yy2 = np.meshgrid(lon2, lat2)

#xx, yy = np.meshgrid(lon, lat) # Without encoding

In [17]:
# Concatenate the values to generate our 4 grids with the values. We multiply it by time so we get (time, lat, lon) grids.

coords_lat = np.concatenate([[xx]] * len(test_data.time), axis=0)
coords_lon = np.concatenate([[yy]] * len(test_data.time), axis=0)
coords_lat1 = np.concatenate([[xx2]] * len(test_data.time), axis=0)
coords_lon1 = np.concatenate([[yy2]] * len(test_data.time), axis=0)

#coords_lat = np.concatenate([[xx]] * len(test_data.time), axis=0) #Without encoding
#coords_lon = np.concatenate([[yy]] * len(test_data.time), axis=0) #Without encoding

In [18]:
# We expand one dimension since the we the input to the model requires the values as (time, lat, lon, value).

coords_lat = np.expand_dims(coords_lat, axis=3)
coords_lon = np.expand_dims(coords_lon, axis=3)

coords_lat1 = np.expand_dims(coords_lat1, axis=3)
coords_lon1 = np.expand_dims(coords_lon1, axis=3)

#coords_lat = np.expand_dims(coords_lat, axis=3) #Without encoding
#coords_lon = np.expand_dims(coords_lon, axis=3) #Without encoding

In [21]:
# We get the attribute values in order to concatenate it with the other information/channels.
temp = test_data['t'].values

(16, 48, 60, 1)

In [20]:
# We concatenate them at the channel dimension.

all_val =  np.concatenate((temp, coords_lat, coords_lon, coords_lat1, coords_lon1 ),axis = 3)
#all_val =  np.concatenate((temp, coords_lat, coords_lon),axis = 3) #Without encoding

In [22]:
all_val.shape  #Check the number of channels

(16, 48, 60, 5)

In [23]:
# Check the values we get
all_val[0,0,0,:]

array([ 2.76354767e+02, -1.04528463e-01, -9.33580426e-01, -9.94521895e-01,
        3.58367950e-01])

In [24]:
test_data.isel(latitude = 3, longitude = 1).t.values  #Check the attribute values

array([[276.54477],
       [276.76416],
       [276.74606],
       [276.70084],
       [276.7958 ],
       [276.89758],
       [276.70987],
       [276.88855],
       [276.979  ],
       [276.98807],
       [276.979  ],
       [276.94736],
       [276.9383 ],
       [276.94736],
       [277.01068],
       [277.11926]], dtype=float32)

In [29]:
input_data = np.expand_dims(all_val[:,3,1,0], axis=1)
assert np.all(test_data.isel(latitude = 3, longitude = 1).t.values == input_data), "Data is not the same." #Check if they are the same values as the input.

True

## Testing

In [30]:
import numpy as np
import dask
from collections import OrderedDict
from lossycomp.dataLoader import DataGenerator, data_preprocessing, split_data

In [31]:
dask.config.set(**{'array.slicing.split_large_chunks': False})

file = '/lsdf/kit/scc/projects/abcde/1979/*/ERA5.pl.temperature.nc'
region = "europe"
var = OrderedDict({'t': 1000})

z, mean, std = data_preprocessing(file, var, region)

train, test = split_data(z, 0.70)

leads = dict(time = 16, longitude=49, latitude=49, level=1)

In [32]:
dg_train = DataGenerator(train, 10, leads, batch_size=1, load=True, mean= mean, std=std, coords = True) 

Loading data into RAM


In [33]:
dg_train.__getitem__(0)[0][0].shape

(16, 49, 49, 5)

In [34]:
dg_train2 = DataGenerator(train, 10, leads, batch_size=1, load=True, mean= mean, std=std, coords = False) 

Loading data into RAM


In [35]:
dg_train2.__getitem__(0)[0][0].shape

(16, 49, 49, 1)

In [36]:
(dg_train.__getitem__(0)[0][0][:,:,:,0] ==  dg_train2.__getitem__(0)[0][0][:,:,:,0]).all()

True