In [1]:
import h5py
import os
import numpy as np

from ramandecompy import dataprep

In [2]:
os.remove('string_test.hdf5')
dataprep.new_hdf5('string_test')

In [3]:
hdf5 = h5py.File('string_test.hdf5', 'r+')

In [4]:
hdf5['dataset'] = [1, 2, 3, 4, 5, 6, 7]
hdf5.close()

In [5]:
dataprep.view_hdf5('string_test.hdf5')

**** string_test.hdf5 ****
dataset


In [6]:
def create_dataset(h5py_file):
    data = (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 'label')
    my_datatype = np.dtype([('fraction', np.float),
                        ('center', np.float),
                        ('sigma', np.float),
                        ('amplitude', np.float),
                        ('fwhm', np.float),
                        ('height', np.float),
                        ('area under the curve', np.float),
                        ('label', h5py.special_dtype(vlen=str))])
    dataset = h5py_file.create_dataset('label_added', (len(data),), dtype=my_datatype)
    data_array = np.array(data, dtype=my_datatype)
    dataset = data_array

In [7]:
hdf5 = h5py.File('string_test.hdf5', 'r+')
data = (1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 'label')
my_datatype = np.dtype([('fraction', np.float),
                    ('center', np.float),
                    ('sigma', np.float),
                    ('amplitude', np.float),
                    ('fwhm', np.float),
                    ('height', np.float),
                    ('area under the curve', np.float),
                    ('label', h5py.special_dtype(vlen=str))])
dataset = hdf5.create_dataset('label_added', (1,), dtype=my_datatype)
data_array = np.array(data, dtype=my_datatype)

In [8]:
data_array

array((1., 2., 3., 4., 5., 6., 7., 'label'),
      dtype=[('fraction', '<f8'), ('center', '<f8'), ('sigma', '<f8'), ('amplitude', '<f8'), ('fwhm', '<f8'), ('height', '<f8'), ('area under the curve', '<f8'), ('label', 'O')])

In [9]:
dataprep.view_hdf5('string_test.hdf5')

**** string_test.hdf5 ****
dataset
label_added


In [10]:
list(hdf5['label_added'])

[(0., 0., 0., 0., 0., 0., 0., '')]

In [11]:
dataset[...] = data_array

In [12]:
list(hdf5['label_added'])

[(1., 2., 3., 4., 5., 6., 7., 'label')]

In [13]:
foo = list(hdf5['label_added'])
foo[0][7]

'label'

In order to save strings and floats to the same dataset it was necessary to create a custom dtype which could be applied to a tuple of a known shape, with the last element being a string. Therefore we need to update our existing code so that it expects each peak dataset to contain only a single tuple.

In [14]:
# this can be achived with this line of code
foo2 = hdf5['label_added'][:]
# since the first and only) element of the dataset is the tuple
foo2[0]

(1., 2., 3., 4., 5., 6., 7., 'label')

In [15]:
type(foo2[0][0])

numpy.float64

In [16]:
peak_param = [0]*7
peak_param[0] = 1
peak_param[1] = 2
peak_param[2] = 3
peak_param[3] = 4.0
peak_param[4] = 5
peak_param[5] = 6.0
peak_param[6] = 7
print(peak_param)
print(type(peak_param))

peak_tuple = tuple(peak_param)
print(peak_tuple)
print(type(peak_tuple))


[1, 2, 3, 4.0, 5, 6.0, 7]
<class 'list'>
(1, 2, 3, 4.0, 5, 6.0, 7)
<class 'tuple'>


In [17]:
fraction, sigma, center, amplitude = peak_tuple[0:4]

In [18]:
fraction

1

In [19]:
sigma

2

In [20]:
center

3

In [21]:
amplitude

4.0

ok so everything looks gucci. super easy to save as tuple

In [22]:
test = [1,2,3,4,5,6,7,'test']
test[:7]

[1, 2, 3, 4, 5, 6, 7]

ok so now a function that will add a label to an `existing peak_dataset`
# Brandon --------v  

In [48]:
def add_label(hdf5_filename, temp, time, peak, label):
    """
    Function that adds a label to a peak dataset in the hdf5 file
    """
    # open hdf5 file as read/write
    hdf5 = h5py.File(hdf5_filename, 'r+')
    # extract existing data from peak dataset
    peak_data = list(hdf5['{}C/{}s/{}'.format(temp, time, peak)])[0]
    # make a new tuple that contains the orginal data as well as the label
    label_tuple = (label,)
    data = tuple(peak_data) +label_tuple
    # delete the old dataset so the new one can be saved
    del hdf5['{}C/{}s/{}'.format(temp, time, peak)]
    # define a custom datatype that allows for a string as the the last tuple element
    my_datatype = np.dtype([('fraction', np.float),
                        ('center', np.float),
                        ('sigma', np.float),
                        ('amplitude', np.float),
                        ('fwhm', np.float),
                        ('height', np.float),
                        ('area under the curve', np.float),
                        ('label', h5py.special_dtype(vlen=str))])
    # recreate the old dataset in the hdf5 file
    dataset = hdf5.create_dataset('{}C/{}s/{}'.format(temp, time, peak), (1,), dtype=my_datatype)
    # apply custom dtype to data tuple
    data_array = np.array(data, dtype=my_datatype)
    # write new values to the blank dataset
    dataset[...] = data_array
    hdf5.close()
    return

In [50]:
hdf5_filename = 'spectrafit_dev3.hdf5'
temp = 300
time = 25
peak = 'Peak_01'
label = '[Hydrogen]'

add_label(hdf5_filename, temp, time, peak, label)

In [51]:
hdf5 = h5py.File(hdf5_filename, 'r+') 
foo = hdf5['{}C/{}s/{}'.format(temp, time, peak)][0]
foo
# del hdf5['{}C/{}s/{}'.format(temp, time, peak)]

(2.66633437e-06, 9.47784158, 314.77, 251.85266517, 18.95568315, 12.48172261, 251.85253919, '[Hydrogen]')

In [34]:
hdf5.close()

In [27]:
result = [1,2,3,4,5,6,7,8]
foo = [tuple(result[:7]),]
foo[0]

(1, 2, 3, 4, 5, 6, 7)

In [28]:
data = tuple(result[:7])
data_array = np.array(data, dtype='<f8')

In [29]:
data_array

array([1., 2., 3., 4., 5., 6., 7.])

In [30]:
# import pandas as pd
# from ramandecompy import spectrafit

# hdf5_filename = 'spectrafit_dev3.hdf5'
# exp_filename = '../ramandecompy/tests/test_files/FA_3.6wt%_300C_55s.csv'


# # r+ is read/write mode and will fail if the file does not exist
# exp_file = h5py.File(hdf5_filename, 'r+')
# if exp_filename.split('.')[-1] == 'xlsx':
#     data = pd.read_excel(exp_filename, header=None, names=('wavenumber', 'counts'))
# elif exp_filename.split('.')[-1] == 'csv':
#     data = pd.read_csv(exp_filename, header=None, names=('wavenumber', 'counts'))
# else:
#     print('data file type not recognized')
# # ensure that the data is listed from smallest wavenumber first
# if data['wavenumber'][:1].values > data['wavenumber'][-1:].values:
#     data = data.iloc[::-1]
#     data.reset_index(inplace=True, drop=True)
# else:
#     pass
# # peak detection and data fitting
# fit_result, residuals = spectrafit.fit_data(data['wavenumber'].values, data['counts'].values)
# # extract experimental parameters from filename
# specs = exp_filename.split('/')[-1].split('.')[:-1]
# if len(specs) > 1:
#     spec = ''
#     for _, element in enumerate(specs):
#         spec = str(spec+element)
#     specs = spec
# specs = specs.split('_')
# time = specs[-1]
# temp = specs[-2]
# # write data to .hdf5
# exp_file['{}/{}/wavenumber'.format(temp, time)] = data['wavenumber']
# exp_file['{}/{}/counts'.format(temp, time)] = data['counts']
# exp_file['{}/{}/residuals'.format(temp, time)] = residuals
# for i, result in enumerate(fit_result):
#     my_datatype = np.dtype([('fraction', np.float),
#                     ('center', np.float),
#                     ('sigma', np.float),
#                     ('amplitude', np.float),
#                     ('fwhm', np.float),
#                     ('height', np.float),
#                     ('area under the curve', np.float)])
#     if i < 9:
#         dataset = exp_file.create_dataset('{}/{}/Peak_0{}'.format(temp, time, i+1), (1,), dtype=my_datatype)
#     else:
#         dataset = exp_file.create_dataset('{}/{}/Peak_{}'.format(temp, time, i+1), (1,), dtype=my_datatype)
#     # apply data to tuple
#     data = tuple(result[:7])
#     data_array = np.array(data, dtype=my_datatype)
#     # write new values to the blank dataset
#     dataset[...] = data_array
# print('Data from {} fit with compound pseudo-Voigt model. Results saved to {}.'.format(exp_filename, hdf5_filename))
# exp_file.close()

In [31]:
data_array

array([1., 2., 3., 4., 5., 6., 7.])