## Multidimensional dataset object

In [1]:
from spectrochempy.api import *


        SpectroChemPy's API
        Version   : 0.1a1.3
        Copyright : 2014-2017, LCS - Laboratory for Catalysis and Spectrochempy
            


### Create a ND-Dataset from scratch

Multidimensional array are defined in Spectrochempy using the **NDDataset** object.

Below is an example, with a 3D-array with axes. 

Let's first create the 3 one-dimentional axis, for which we can define labels, units, and masks! 

In [2]:
axe0 = Axis(coords = np.linspace(200., 300., 3),
            labels = ['cold', 'normal', 'hot'],
            mask = None,
            units = "K",
            title = 'temperature')

axe1 = Axis(coords = np.linspace(0., 60., 100),
            labels = None,
            mask = None,
            units = "minutes",
            title = 'time-on-stream')

axe2 = Axis(coords = np.linspace(4000., 1000., 10),
            labels = None,
            mask = None,
            units = "cm^-1",
            title = 'wavelength')

Here is the displayed info for axe1 for instance:

In [3]:
axe1

0,1
title,Time-on-stream
coordinates,"[ 0 0.606 ..., 59.4 60]"
units,min


Now we create some 3D data:

In [4]:
nd_data=np.array([np.array([np.sin(axe2.data*2.*np.pi/4000.)*np.exp(-y/60.) for y in axe1.data])*float(T) 
         for T in axe0.data])**2

The dataset is now create with these data and defined axis:

In [5]:
mydataset = NDDataset(nd_data,
               axes = [axe0, axe1, axe2],
               title='Absorbance',
               units='absorbance'
              )

mydataset.description = """Dataset example created for this tutorial. 
It's a 3-D dataset (with dimensionless intensity)"""

mydataset.author = 'Tintin and Milou'

We can get some information about this object:

In [6]:
mydataset

0,1
author,Tintin and Milou
created,0001-01-01 00:00:00
last modified,0001-01-01 00:00:00
description,Dataset example created for this tutorial. It's a 3-D dataset (with  dimensionless intensity)
data,"title Absorbance shape 3 x 100 x 10 units AU values  [[[ 2.4e-27 1e+04 ..., 3e+04 4e+04]  [2.35e-27 9.8e+03 ..., 2.94e+04 3.92e+04]  ..., [3.31e-28 1.38e+03 ..., 4.14e+03 5.52e+03]  [3.25e-28 1.35e+03 ..., 4.06e+03 5.41e+03]]  [[3.75e-27 1.56e+04 ..., 4.69e+04 6.25e+04]  [3.67e-27 1.53e+04 ..., 4.59e+04 6.13e+04]  ..., [5.18e-28 2.16e+03 ..., 6.47e+03 8.63e+03]  [5.07e-28 2.11e+03 ..., 6.34e+03 8.46e+03]]  [[ 5.4e-27 2.25e+04 ..., 6.75e+04 9e+04]  [5.29e-27 2.21e+04 ..., 6.62e+04 8.82e+04]  ..., [7.46e-28 3.11e+03 ..., 9.32e+03 1.24e+04]  [7.31e-28 3.05e+03 ..., 9.14e+03 1.22e+04]]]"
axis 0,titleTemperature  coordinates[ 200 250 300]  unitsK  labels['cold' 'normal' 'hot']
axis 1,"titleTime-on-stream  coordinates[ 0 0.606 ..., 59.4 60]  unitsmin"
axis 2,"titleWavelength  coordinates[ 4e+03 3.67e+03 ..., 1.33e+03 1e+03]  unitscm-1"

0,1
title,Absorbance
shape,3 x 100 x 10
units,AU
values,"[[[ 2.4e-27 1e+04 ..., 3e+04 4e+04]  [2.35e-27 9.8e+03 ..., 2.94e+04 3.92e+04]  ..., [3.31e-28 1.38e+03 ..., 4.14e+03 5.52e+03]  [3.25e-28 1.35e+03 ..., 4.06e+03 5.41e+03]]  [[3.75e-27 1.56e+04 ..., 4.69e+04 6.25e+04]  [3.67e-27 1.53e+04 ..., 4.59e+04 6.13e+04]  ..., [5.18e-28 2.16e+03 ..., 6.47e+03 8.63e+03]  [5.07e-28 2.11e+03 ..., 6.34e+03 8.46e+03]]  [[ 5.4e-27 2.25e+04 ..., 6.75e+04 9e+04]  [5.29e-27 2.21e+04 ..., 6.62e+04 8.82e+04]  ..., [7.46e-28 3.11e+03 ..., 9.32e+03 1.24e+04]  [7.31e-28 3.05e+03 ..., 9.14e+03 1.22e+04]]]"

0,1
title,Temperature
coordinates,[ 200 250 300]
units,K
labels,['cold' 'normal' 'hot']

0,1
title,Time-on-stream
coordinates,"[ 0 0.606 ..., 59.4 60]"
units,min

0,1
title,Wavelength
coordinates,"[ 4e+03 3.67e+03 ..., 1.33e+03 1e+03]"
units,cm-1


NDDataset can be sliced like conventional numpy-array...

In [7]:
new = mydataset[..., 0]
new

0,1
author,christian@wifi12154personnel.ensicaen.fr
created,2017-07-12 18:08:29.924213
last modified,0001-01-01 00:00:00
description,Dataset example created for this tutorial. It's a 3-D dataset (with  dimensionless intensity)
data,"title Absorbance shape 3 x 100 x 1 units AU values  [[[ 2.4e-27]  [2.35e-27]  ..., [3.31e-28]  [3.25e-28]]  [[3.75e-27]  [3.67e-27]  ..., [5.18e-28]  [5.07e-28]]  [[ 5.4e-27]  [5.29e-27]  ..., [7.46e-28]  [7.31e-28]]]"
axis 0,titleTemperature  coordinates[ 200 250 300]  unitsK  labels['cold' 'normal' 'hot']
axis 1,"titleTime-on-stream  coordinates[ 0 0.606 ..., 59.4 60]  unitsmin"
axis 2,titleWavelength  coordinates[ 4e+03]  unitscm-1

0,1
title,Absorbance
shape,3 x 100 x 1
units,AU
values,"[[[ 2.4e-27]  [2.35e-27]  ..., [3.31e-28]  [3.25e-28]]  [[3.75e-27]  [3.67e-27]  ..., [5.18e-28]  [5.07e-28]]  [[ 5.4e-27]  [5.29e-27]  ..., [7.46e-28]  [7.31e-28]]]"

0,1
title,Temperature
coordinates,[ 200 250 300]
units,K
labels,['cold' 'normal' 'hot']

0,1
title,Time-on-stream
coordinates,"[ 0 0.606 ..., 59.4 60]"
units,min

0,1
title,Wavelength
coordinates,[ 4e+03]
units,cm-1


or using the axes labels:

In [8]:
new = mydataset['hot']
new

0,1
author,christian@wifi12154personnel.ensicaen.fr
created,2017-07-12 18:08:29.950619
last modified,0001-01-01 00:00:00
description,Dataset example created for this tutorial. It's a 3-D dataset (with  dimensionless intensity)
data,"title Absorbance shape 1 x 100 x 10 units AU values  [[[ 5.4e-27 2.25e+04 ..., 6.75e+04 9e+04]  [5.29e-27 2.21e+04 ..., 6.62e+04 8.82e+04]  ..., [7.46e-28 3.11e+03 ..., 9.32e+03 1.24e+04]  [7.31e-28 3.05e+03 ..., 9.14e+03 1.22e+04]]]"
axis 0,titleTemperature  coordinates[ 300]  unitsK  labels['hot']
axis 1,"titleTime-on-stream  coordinates[ 0 0.606 ..., 59.4 60]  unitsmin"
axis 2,"titleWavelength  coordinates[ 4e+03 3.67e+03 ..., 1.33e+03 1e+03]  unitscm-1"

0,1
title,Absorbance
shape,1 x 100 x 10
units,AU
values,"[[[ 5.4e-27 2.25e+04 ..., 6.75e+04 9e+04]  [5.29e-27 2.21e+04 ..., 6.62e+04 8.82e+04]  ..., [7.46e-28 3.11e+03 ..., 9.32e+03 1.24e+04]  [7.31e-28 3.05e+03 ..., 9.14e+03 1.22e+04]]]"

0,1
title,Temperature
coordinates,[ 300]
units,K
labels,['hot']

0,1
title,Time-on-stream
coordinates,"[ 0 0.606 ..., 59.4 60]"
units,min

0,1
title,Wavelength
coordinates,"[ 4e+03 3.67e+03 ..., 1.33e+03 1e+03]"
units,cm-1


Single-element dimension are kept but can also be squeezed easily:

In [9]:
new = new.squeeze()
new

0,1
author,christian@wifi12154personnel.ensicaen.fr
created,2017-07-12 18:08:29.971767
last modified,0001-01-01 00:00:00
description,Dataset example created for this tutorial. It's a 3-D dataset (with  dimensionless intensity)
data,"title Absorbance shape 100 x 10 units AU values  [[ 5.4e-27 2.25e+04 ..., 6.75e+04 9e+04]  [5.29e-27 2.21e+04 ..., 6.62e+04 8.82e+04]  ..., [7.46e-28 3.11e+03 ..., 9.32e+03 1.24e+04]  [7.31e-28 3.05e+03 ..., 9.14e+03 1.22e+04]]"
axis 0,"titleTime-on-stream  coordinates[ 0 0.606 ..., 59.4 60]  unitsmin"
axis 1,"titleWavelength  coordinates[ 4e+03 3.67e+03 ..., 1.33e+03 1e+03]  unitscm-1"

0,1
title,Absorbance
shape,100 x 10
units,AU
values,"[[ 5.4e-27 2.25e+04 ..., 6.75e+04 9e+04]  [5.29e-27 2.21e+04 ..., 6.62e+04 8.82e+04]  ..., [7.46e-28 3.11e+03 ..., 9.32e+03 1.24e+04]  [7.31e-28 3.05e+03 ..., 9.14e+03 1.22e+04]]"

0,1
title,Time-on-stream
coordinates,"[ 0 0.606 ..., 59.4 60]"
units,min

0,1
title,Wavelength
coordinates,"[ 4e+03 3.67e+03 ..., 1.33e+03 1e+03]"
units,cm-1


To plot a dataset, use the `plot` command (generic plot). As the NDDataset is 2D, a contour plot is displayed by default.

In [10]:
new.plot()

<IPython.core.display.Javascript object>

We can change or add labels to axes after creation of the dataset

In [11]:
from datetime import datetime, timedelta
axe1.labels = [timedelta(minutes=t) for t in axe1.data]
axe1

0,1
title,Time-on-stream
coordinates,"[ 0 0.606 ..., 59.4 60]"
units,min
labels,"[datetime.timedelta(0) datetime.timedelta(0, 36, 363636) ...,  datetime.timedelta(0, 3563, 636364) datetime.timedelta(0, 3600)]"


In [12]:
axe1[20].labels
#todo: this should be a single object not a a zeooD array!!!!

array(datetime.timedelta(0, 727, 272727), dtype=object)

Dataset can be transposed

In [13]:
newT = new.T
newT

0,1
author,christian@wifi12154personnel.ensicaen.fr
created,2017-07-12 18:08:30.189686
last modified,0001-01-01 00:00:00
description,Dataset example created for this tutorial. It's a 3-D dataset (with  dimensionless intensity)
data,"title Absorbance shape 10 x 100 units AU values  [[ 5.4e-27 5.29e-27 ..., 7.46e-28 7.31e-28]  [2.25e+04 2.21e+04 ..., 3.11e+03 3.05e+03]  ..., [6.75e+04 6.62e+04 ..., 9.32e+03 9.14e+03]  [ 9e+04 8.82e+04 ..., 1.24e+04 1.22e+04]]"
axis 0,"titleWavelength  coordinates[ 4e+03 3.67e+03 ..., 1.33e+03 1e+03]  unitscm-1"
axis 1,"titleTime-on-stream  coordinates[ 0 0.606 ..., 59.4 60]  unitsmin"

0,1
title,Absorbance
shape,10 x 100
units,AU
values,"[[ 5.4e-27 5.29e-27 ..., 7.46e-28 7.31e-28]  [2.25e+04 2.21e+04 ..., 3.11e+03 3.05e+03]  ..., [6.75e+04 6.62e+04 ..., 9.32e+03 9.14e+03]  [ 9e+04 8.82e+04 ..., 1.24e+04 1.22e+04]]"

0,1
title,Wavelength
coordinates,"[ 4e+03 3.67e+03 ..., 1.33e+03 1e+03]"
units,cm-1

0,1
title,Time-on-stream
coordinates,"[ 0 0.606 ..., 59.4 60]"
units,min


In [14]:
newT.plot()

<IPython.core.display.Javascript object>

### Loading of experimental data

Now, lets load a NMR dataset (in the Bruker format).

The builtin **data_dir** variable contains a path to our *test*'s data:

In [15]:
# let check if this directory exists and display its actual content:
import os, glob
  
def listdir(initial,esp=""):
    for f in glob.glob(os.path.join(initial,'*')):
        fb = os.path.basename(f)
        if not fb.startswith('acqu') and \
            not fb.startswith('pulse') :
            print(esp, fb)
        if os.path.isdir(f):
            esp1 = esp +'|'+' '*1
            listdir(f, esp1)

if os.path.exists(data_dir):
    listdir(data_dir)

 irdata
|  NH4Y-activation.SPG
 nmrdata
|  bruker
| |  tests
| | |  nmr
| | | |  bruker_1d
| | | | |  1
| | | | | |  fid
| | | |  bruker_2d
| | | | |  1
| | | | | |  ser
| | | | |  2
| | | | | |  ser
| | | |  bruker_3d
| | | | |  1
| | | | | |  ser
| | | | |  2
| | | | | |  ser
|  simpson
| |  simpson_1d
| | |  rr.in
| |  simpson_2d
| | |  2d.in


In [16]:
# assume the data
path = os.path.join(data_dir, 'nmrdata','bruker', 'tests', 'nmr','bruker_1d')

# load the data in a new dataset
ndd = NDDataset()
ndd.read_bruker_nmr(path, expno=1, remove_digital_filter=True)

# view it...
fig1 = ndd.plot() 
fig1

<IPython.core.display.Javascript object>

In [17]:
path = os.path.join(data_dir, 'nmrdata','bruker', 'tests', 'nmr','bruker_2d')

# create an empty dataset 
ndd2 = NDDataset()

# load the data
ndd2.read_bruker_nmr(path, expno=1, remove_digital_filter=True)

# view it...
ndd2.x.to('ms')
ndd2.y.to('ms')
fig2 = ndd2.plot() 
fig2

<IPython.core.display.Javascript object>