### Notebook to explore use of xarray in RASCIL

xarray must be installed:

    pip install xarray

In [1]:
import xarray

import numpy

from astropy.coordinates import SkyCoord
import astropy.units as u
from astropy.time import Time

from rascil.data_models.memory_data_models import Visibility, QA
from rascil.data_models.polarisation import PolarisationFrame
from rascil.processing_components import create_named_configuration, \
    create_visibility

Create a standard RASCIL Visibility object

In [2]:
lowcore = create_named_configuration('LOWBD2-CORE')
times = (numpy.pi / 43200.0) * numpy.arange(-4*3600, +4*3600.0, 1800)
frequency = numpy.linspace(1.0e8, 1.1e8, 3)
channel_bandwidth = numpy.array([1e7, 1e7, 1e7])
# Define the component and give it some spectral behaviour
f = numpy.array([100.0, 20.0, -10.0, 1.0])
flux = numpy.array([f, 0.8 * f, 0.6 * f])
phasecentre = SkyCoord(ra=+180.0 * u.deg, dec=-35.0 * u.deg, frame='icrs',
                       equinox='J2000')
vis = create_visibility(lowcore, times, frequency,
                             channel_bandwidth=channel_bandwidth,
                             phasecentre=phasecentre,
                             integration_time=30.0,
                             polarisation_frame=PolarisationFrame("linear"),
                             weight=1.0)
print(vis)

Visibility:
	Source: unknown
	Number of visibilities: 657360
	Number of channels: 3
	Frequency: [1.00e+08 1.05e+08 1.10e+08]
	Channel bandwidth: [10000000.]
	Number of polarisations: 4
	Visibility shape: (657360, 4)
	Number flags: 0
	Polarisation Frame: linear
	Phasecentre: <SkyCoord (ICRS): (ra, dec) in deg
    (180., -35.)>
	Configuration: LOWBD2-CORE
	Metadata: None



First construct an xarray.DataArray for the visibility. We name the dimensions and give the coordinates.

In [6]:
xvis_array = xarray.DataArray(vis.vis,
                              dims=["time", "polarisation"],
                              coords={"time": vis.time, "polarisation": vis.polarisation_frame.names})
print(xvis_array)

<xarray.DataArray (time: 657360, polarisation: 4)>
array([[0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
       [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
       [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
       ...,
       [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
       [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j],
       [0.+0.j, 0.+0.j, 0.+0.j, 0.+0.j]])
Coordinates:
  * time          (time) float64 5.085e+09 5.085e+09 ... 5.085e+09 5.085e+09
  * polarisation  (polarisation) <U2 'XX' 'XY' 'YX' 'YY'


Now we can create an xarray.Dataset which is a dictionary of DataArrays and a list of attributes

In [8]:
dims = ("time", "polarisation", "spatial")

coords = {"time": vis.time,
          "polarisation": vis.polarisation_frame.names,
          "spatial": numpy.zeros([3])}

xvis_dict = {}
xvis_dict["visibility"] = xarray.DataArray(vis.vis, dims=["time", "polarisation"])
xvis_dict["uvw"] = xarray.DataArray(vis.uvw, dims=["time", "spatial"])
xvis_dict["uvwdist"] = xarray.DataArray(vis.uvwdist, dims=["time"])
xvis_dict["datetime"] = \
    xarray.DataArray(Time(vis.time / 86400.0, format='mjd', scale='utc').datetime64, dims=["time"])
xvis_dict["weight"] = xarray.DataArray(vis.weight, dims=["time", "polarisation"])
xvis_dict["imaging_weight"] = xarray.DataArray(vis.imaging_weight,
                                               dims=["time", "polarisation"])
xvis_dict["flags"] = xarray.DataArray(vis.flags, dims=["time", "polarisation"])
xvis_dict["frequency"] = xarray.DataArray(vis.frequency, dims=["time"])
xvis_dict["channel_bandwidth"] = xarray.DataArray(vis.channel_bandwidth, dims=["time"])
xvis_dict["integration_time"] = xarray.DataArray(vis.integration_time, dims=["time"])
xvis = xarray.Dataset(xvis_dict, coords=coords)
xvis.attrs['source'] = vis.source
xvis.attrs['meta'] = vis.meta

xarray has excellent informative prints for most objects.

In [5]:
print(xvis)

<xarray.Dataset>
Dimensions:            (polarisation: 4, spatial: 3, time: 657360)
Coordinates:
  * time               (time) float64 5.085e+09 5.085e+09 ... 5.085e+09
  * polarisation       (polarisation) <U2 'XX' 'XY' 'YX' 'YY'
  * spatial            (spatial) float64 0.0 0.0 0.0
Data variables:
    visibility         (time, polarisation) complex128 0j 0j 0j 0j ... 0j 0j 0j
    uvw                (time, spatial) float64 10.69 12.45 16.41 ... 31.07 74.17
    uvwdist            (time) float64 16.41 17.23 18.05 ... 67.43 70.8 74.17
    datetime           (time) datetime64[ns] 2020-01-01T17:30:36.436666595 .....
    weight             (time, polarisation) float64 1.0 1.0 1.0 ... 1.0 1.0 1.0
    imaging_weight     (time, polarisation) float64 1.0 1.0 1.0 ... 1.0 1.0 1.0
    flags              (time, polarisation) int64 0 0 0 0 0 0 0 ... 0 0 0 0 0 0
    frequency          (time) float64 1e+08 1.05e+08 ... 1.05e+08 1.1e+08
    channel_bandwidth  (time) float64 1e+07 1e+07 1e+07 ... 1e+07 1

Take a slice in times and polarisation

In [6]:
print(xvis.visibility[100:110, 0:1])

<xarray.DataArray 'visibility' (time: 10, polarisation: 1)>
array([[0.+0.j],
       [0.+0.j],
       [0.+0.j],
       [0.+0.j],
       [0.+0.j],
       [0.+0.j],
       [0.+0.j],
       [0.+0.j],
       [0.+0.j],
       [0.+0.j]])
Coordinates:
  * time          (time) float64 5.085e+09 5.085e+09 ... 5.085e+09 5.085e+09
  * polarisation  (polarisation) <U2 'XX'


By label

In [7]:
print(xvis.sel({"polarisation":["XX", "YY"]}))

<xarray.Dataset>
Dimensions:            (polarisation: 2, spatial: 3, time: 657360)
Coordinates:
  * time               (time) float64 5.085e+09 5.085e+09 ... 5.085e+09
  * polarisation       (polarisation) <U2 'XX' 'YY'
  * spatial            (spatial) float64 0.0 0.0 0.0
Data variables:
    visibility         (time, polarisation) complex128 0j 0j 0j 0j ... 0j 0j 0j
    uvw                (time, spatial) float64 10.69 12.45 16.41 ... 31.07 74.17
    uvwdist            (time) float64 16.41 17.23 18.05 ... 67.43 70.8 74.17
    datetime           (time) datetime64[ns] 2020-01-01T17:30:36.436666595 .....
    weight             (time, polarisation) float64 1.0 1.0 1.0 ... 1.0 1.0 1.0
    imaging_weight     (time, polarisation) float64 1.0 1.0 1.0 ... 1.0 1.0 1.0
    flags              (time, polarisation) int64 0 0 0 0 0 0 0 ... 0 0 0 0 0 0
    frequency          (time) float64 1e+08 1.05e+08 ... 1.05e+08 1.1e+08
    channel_bandwidth  (time) float64 1e+07 1e+07 1e+07 ... 1e+07 1e+07 1e+07

By antenna1

In [8]:
print(xvis.where(xvis.antenna1<10).uvw)

<xarray.DataArray 'uvw' (time: 657360, spatial: 3)>
array([[10.69008852, 12.4457611 , 16.40655241],
       [11.22459295, 13.06804915, 17.22688003],
       [11.75909738, 13.69033721, 18.04720765],
       ...,
       [        nan,         nan,         nan],
       [        nan,         nan,         nan],
       [        nan,         nan,         nan]])
Coordinates:
  * time     (time) float64 5.085e+09 5.085e+09 ... 5.085e+09 5.085e+09
  * spatial  (spatial) float64 0.0 0.0 0.0


By uvw distance

In [9]:
print(xvis.where(xvis.uvwdist<40.0).uvw)

<xarray.DataArray 'uvw' (time: 657360, spatial: 3)>
array([[10.69008852, 12.4457611 , 16.40655241],
       [11.22459295, 13.06804915, 17.22688003],
       [11.75909738, 13.69033721, 18.04720765],
       ...,
       [        nan,         nan,         nan],
       [        nan,         nan,         nan],
       [        nan,         nan,         nan]])
Coordinates:
  * time     (time) float64 5.085e+09 5.085e+09 ... 5.085e+09 5.085e+09
  * spatial  (spatial) float64 0.0 0.0 0.0


By time

In [10]:
print(xvis.where(xvis.datetime>numpy.datetime64("2020-01-01T23:00:00")).datetime)

<xarray.DataArray 'datetime' (time: 657360)>
array([                          'NaT',                           'NaT',
                                 'NaT', ...,
       '2020-01-02T00:59:22.717916630', '2020-01-02T00:59:22.717916630',
       '2020-01-02T00:59:22.717916630'], dtype='datetime64[ns]')
Coordinates:
  * time     (time) float64 5.085e+09 5.085e+09 ... 5.085e+09 5.085e+09


Sorting: the visibility is constructed so that antenna1 varies least. Let's try a sort by antenna2.

In [11]:
print(xvis.sortby("antenna2").antenna2)

<xarray.DataArray 'antenna2' (time: 657360)>
array([  1,   1,   1, ..., 165, 165, 165])
Coordinates:
  * time     (time) float64 5.085e+09 5.085e+09 ... 5.085e+09 5.085e+09


Rebinning in one data coordinate - let's try bins in uvwdist. Only print out the uvwdist range and number of samples
for each bin.

In [12]:
%timeit -n 1 -r 1 for result in xvis.groupby_bins("uvwdist", bins=25): print(result[0], result[1].dims['time'])


(7.509, 19.087] 17095
(19.087, 30.384] 28203
(30.384, 41.68] 39165
(41.68, 52.976] 46858
(52.976, 64.272] 52582
(64.272, 75.568] 55662
(75.568, 86.864] 56502
(86.864, 98.16] 55611
(98.16, 109.456] 52740
(109.456, 120.752] 48051
(120.752, 132.048] 42634
(132.048, 143.344] 36774
(143.344, 154.64] 30309
(154.64, 165.936] 24541
(165.936, 177.232] 19332
(177.232, 188.528] 15286
(188.528, 199.825] 11626
(199.825, 211.121] 8735
(211.121, 222.417] 6226
(222.417, 233.713] 4339
(233.713, 245.009] 2691
(245.009, 256.305] 1482
(256.305, 267.601] 639
(267.601, 278.897] 233
(278.897, 290.193] 44
4.47 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


Try to index by antenna1, antenna2

In [13]:
xvis=xvis.set_index(index=("antenna1", "antenna2"))
print(xvis)

<xarray.Dataset>
Dimensions:            (index: 657360, polarisation: 4, spatial: 3, time: 657360)
Coordinates:
  * time               (time) float64 5.085e+09 5.085e+09 ... 5.085e+09
  * polarisation       (polarisation) <U2 'XX' 'XY' 'YX' 'YY'
  * spatial            (spatial) float64 0.0 0.0 0.0
  * index              (index) MultiIndex
  - antenna1           (index) int64 0 0 0 0 0 0 0 ... 163 163 163 164 164 164
  - antenna2           (index) int64 1 1 1 2 2 2 3 ... 165 165 165 165 165 165
Data variables:
    visibility         (time, polarisation) complex128 0j 0j 0j 0j ... 0j 0j 0j
    uvw                (time, spatial) float64 10.69 12.45 16.41 ... 31.07 74.17
    uvwdist            (time) float64 16.41 17.23 18.05 ... 67.43 70.8 74.17
    datetime           (time) datetime64[ns] 2020-01-01T17:30:36.436666595 .....
    weight             (time, polarisation) float64 1.0 1.0 1.0 ... 1.0 1.0 1.0
    imaging_weight     (time, polarisation) float64 1.0 1.0 1.0 ... 1.0 1.0 1.0
    fl

In [19]:
print(xvis)
import pandas
midx = pandas.MultiIndex.from_arrays([numpy.arange(10), numpy.arange(1,11)],
                                     names=("antenna1", "antenna2"))
xvis.loc[dict(antenna1=[1,2,4], antenna2=[5,6])].visibility

<xarray.Dataset>
Dimensions:            (index: 657360, polarisation: 4, spatial: 3, time: 657360)
Coordinates:
  * time               (time) float64 5.085e+09 5.085e+09 ... 5.085e+09
  * polarisation       (polarisation) <U2 'XX' 'XY' 'YX' 'YY'
  * spatial            (spatial) float64 0.0 0.0 0.0
  * index              (index) MultiIndex
  - antenna1           (index) int64 0 0 0 0 0 0 0 ... 163 163 163 164 164 164
  - antenna2           (index) int64 1 1 1 2 2 2 3 ... 165 165 165 165 165 165
Data variables:
    visibility         (time, polarisation) complex128 0j 0j 0j 0j ... 0j 0j 0j
    uvw                (time, spatial) float64 10.69 12.45 16.41 ... 31.07 74.17
    uvwdist            (time) float64 16.41 17.23 18.05 ... 67.43 70.8 74.17
    datetime           (time) datetime64[ns] 2020-01-01T17:30:36.436666595 .....
    weight             (time, polarisation) float64 1.0 1.0 1.0 ... 1.0 1.0 1.0
    imaging_weight     (time, polarisation) float64 1.0 1.0 1.0 ... 1.0 1.0 1.0
    fl

ValueError: Vectorized selection is not available along level variable: antenna1