## Introduction to visualizing data in the eeghdf files

In [None]:
# %load explore-eeghdf-files-basics.py
# Here is an example of how to do basic exploration of what is in the eeghdf file. I show how to discover the fields in the file and to plot them.
# 
# I have copied the stacklineplot from my python-edf/examples code to help with display. Maybe I will put this as a helper or put it out as a utility package to make it easier to install.

from __future__ import print_function, division, unicode_literals
# %matplotlib inline
%matplotlib notebook

import matplotlib
import matplotlib.pyplot as plt
#import seaborn
import pandas as pd
import numpy as np
import h5py
from pprint import pprint

import stacklineplot


# matplotlib.rcParams['figure.figsize'] = (18.0, 12.0)
matplotlib.rcParams['figure.figsize'] = (12.0, 8.0)

In [2]:
hdf = h5py.File('./archive/DA05505C_1-1+.eeghdf')

In [3]:
pprint(list(hdf.items()))
pprint(list(hdf['patient'].attrs.items()))

[('patient', <HDF5 group "/patient" (0 members)>),
 ('record-0', <HDF5 group "/record-0" (10 members)>)]
[('patient_name', '77, Subject'),
 ('patientcode', '383f6cc99c7652bf96d9be9ea44606a8'),
 ('gender', 'Male'),
 ('birthdate', '1990-01-01'),
 ('patient_additional', ''),
 ('gestatational_age_at_birth_days', -1.0),
 ('born_premature', 'unknown')]


In [4]:
rec = hdf['record-0']
pprint(list(rec.items()))
pprint(list(rec.attrs.items()))
years_old = rec.attrs['patient_age_days']/365
pprint("age in years: %s" % years_old)

[('edf_annotations', <HDF5 group "/record-0/edf_annotations" (3 members)>),
 ('physical_dimensions',
  <HDF5 dataset "physical_dimensions": shape (36,), type "|O">),
 ('prefilters', <HDF5 dataset "prefilters": shape (36,), type "|O">),
 ('signal_digital_maxs',
  <HDF5 dataset "signal_digital_maxs": shape (36,), type "<i4">),
 ('signal_digital_mins',
  <HDF5 dataset "signal_digital_mins": shape (36,), type "<i4">),
 ('signal_labels', <HDF5 dataset "signal_labels": shape (36,), type "|O">),
 ('signal_physical_maxs',
  <HDF5 dataset "signal_physical_maxs": shape (36,), type "<f8">),
 ('signal_physical_mins',
  <HDF5 dataset "signal_physical_mins": shape (36,), type "<f8">),
 ('signals', <HDF5 dataset "signals": shape (36, 609200), type "<i2">),
 ('transducers', <HDF5 dataset "transducers": shape (36,), type "|O">)]
[('start_isodatetime', '2006-08-10 18:50:55'),
 ('end_isodatetime', '2006-08-10 19:41:41'),
 ('number_channels', 36),
 ('number_samples_per_channel', 609200),
 ('sample_frequen

In [5]:
signals = rec['signals']
labels = rec['signal_labels']
electrode_labels = [str(s,'ascii') for s in labels]
numbered_electrode_labels = ["%d:%s" % (ii, str(labels[ii], 'ascii')) for ii in range(len(labels))]

#### Simple visualization of EEG (left anterior temporal seizure pattern)

In [21]:
# plot 10s epochs (multiples in DE)
ch0, ch1 = (0,19)
DE = 3 # how many 10s epochs to display
epoch = 53; ptepoch = 10*int(rec.attrs['sample_frequency'])
stacklineplot.stackplot(signals[ch0:ch1,epoch*ptepoch:(epoch+DE)*ptepoch],seconds=DE*10.0, ylabels=electrode_labels[ch0:ch1], yscale=0.3)
print("epoch:", epoch)


<IPython.core.display.Javascript object>

epoch: 53


In [7]:
annot = rec['edf_annotations']
#print(list(annot.items()))
#annot['texts'][:]

In [8]:
antext = [s.decode('utf-8') for s in annot['texts'][:]]
starts100ns = [xx for xx in annot['starts_100ns'][:]]
len(starts100ns), len(antext)

(38, 38)

In [10]:
import pandas as pd

In [11]:
df = pd.DataFrame(data=antext, columns=['text'])
df['starts100ns'] = starts100ns
df['starts_sec'] = df['starts100ns']/10**7

In [22]:
df # look at the annotations

Unnamed: 0,text,starts100ns,starts_sec
0,REC START A_DB EEG,0,0.0
1,A1+A2 OFF,0,0.0
2,texting,5197550000,519.755
3,*****SEIZURE 1,5332390000,533.239
4,first change,5415300000,541.53
5,stare vs watching tv,5437730000,543.773
6,turns and looks to r,5497230000,549.723
7,playing with phone ?,5626540000,562.654
8,clear staring off,5791480000,579.148
9,turns around all the,5947660000,594.766


In [13]:
df[df.text.str.contains('sz',case=False)]

Unnamed: 0,text,starts100ns,starts_sec


In [23]:
df[df.text.str.contains('seizure',case=False)] # find the seizure

Unnamed: 0,text,starts100ns,starts_sec
3,*****SEIZURE 1,5332390000,533.239


In [15]:
list(annot.items())

[('durations_char16',
  <HDF5 dataset "durations_char16": shape (38,), type "|S16">),
 ('starts_100ns', <HDF5 dataset "starts_100ns": shape (38,), type "<i8">),
 ('texts', <HDF5 dataset "texts": shape (38,), type "|O">)]