In [1]:
import numpy as np
from scipy.io import loadmat
import tensorflow as tf
import glob
import time
import matplotlib.pyplot as plt
%matplotlib inline

print("import done")

import done


In [49]:
def mat2tfr(p_file):
    # converting mat file as numpy
    mat = loadmat(p_file)
    data = mat['dataStruct']['data'][0][0]
    # getting the filename and retrieving the patient, segement and label data
    pat, seg, label = p_file.split('/')[-1].split('.')[0].split("_")
    dstfilename = "./dataset/" + pat + "_" + seg + "_" + label + ".tfr"
    print("Converting " + p_file + " ----> " + dstfilename)
    # TensorFlow Records writer
    with tf.python_io.TFRecordWriter(dstfilename) as tfrwriter:
        # Fill protobuff
        protobuf = tf.train.Example(features=tf.train.Features(feature={
                    'data' : tf.train.Feature(float_list=tf.train.FloatList(value=data.flatten().tolist())), 
                    'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[int(label)])), 
                }))
        write = tfrwriter.write(protobuf.SerializeToString())
        
        
def dataset(folder, num_files=None):
    # get files
    filenames = glob.glob(folder + "/*1.mat")
    # randomize read files
    np.random.shuffle(filenames)
    # truncate reading
    if num_files is not None:
        filenames = filenames[:num_files]
    print("Converting #%d files."%len(filenames))
    
    for files in filenames:
        t = time.time()
        mat2tfr(files)
        elapsed = time.time() - t
        print("elapsed: %.3fs"%elapsed)        
        
dataset("./data/train_3", 10)

print('finished')        

Converting #10 files.
Converting ./data/train_3/3_40_1.mat ----> ./dataset/3_40_1.tfr
elapsed: 3.026s
Converting ./data/train_3/3_121_1.mat ----> ./dataset/3_121_1.tfr
elapsed: 2.900s
Converting ./data/train_3/3_116_1.mat ----> ./dataset/3_116_1.tfr
elapsed: 2.689s
Converting ./data/train_3/3_131_1.mat ----> ./dataset/3_131_1.tfr
elapsed: 2.693s
Converting ./data/train_3/3_58_1.mat ----> ./dataset/3_58_1.tfr
elapsed: 2.647s
Converting ./data/train_3/3_3_1.mat ----> ./dataset/3_3_1.tfr
elapsed: 2.689s
Converting ./data/train_3/3_32_1.mat ----> ./dataset/3_32_1.tfr
elapsed: 2.682s
Converting ./data/train_3/3_137_1.mat ----> ./dataset/3_137_1.tfr
elapsed: 2.651s
Converting ./data/train_3/3_46_1.mat ----> ./dataset/3_46_1.tfr
elapsed: 2.666s
Converting ./data/train_3/3_70_1.mat ----> ./dataset/3_70_1.tfr
elapsed: 2.693s
finished


In [90]:
slim = tf.contrib.slim

_SAMPLE_POINTS = 240000
_CHANNELS = 16
_NUM_LABELS = 2
_BATCH_SIZE = 8

def read_dataset(folder):
    with tf.Graph().as_default():
        filenames = glob.glob(folder)
        print("Loading #%d files."%len(filenames))

        reader = tf.TFRecordReader

        keys_to_features = {
            'data': tf.FixedLenFeature([_SAMPLE_POINTS*_CHANNELS], tf.float32),
            'label': tf.FixedLenFeature([], tf.int64),
        }
        items_to_handlers = {
            'data': slim.tfexample_decoder.Tensor('data'),
            'label': slim.tfexample_decoder.Tensor('label'),      
        }    
        decoder = slim.tfexample_decoder.TFExampleDecoder(
            keys_to_features, items_to_handlers)

        items_to_descriptions = {
            'data': '240000 sample points of iEEG.',
            'label': 'Label 0 indicates interictal and 1 preictal.', 
        }

        dataset = slim.dataset.Dataset(
            data_sources=filenames, 
            reader=reader, 
            decoder=decoder, 
            num_samples=1, 
            items_to_descriptions=items_to_descriptions)

        data_provider = slim.dataset_data_provider.DatasetDataProvider(
            dataset, shuffle=True, num_epochs=None, common_queue_capacity=16, common_queue_min=1)

        data, label = data_provider.get(['data', 'label'])
        
        # Preprocess data
        data = tf.reshape(data, shape=[_SAMPLE_POINTS, _CHANNELS])
        data = tf.pack(tf.split(0, 100, data), axis=0)
        label = tf.one_hot(label, _NUM_LABELS, dtype=tf.int32)
        label = tf.reshape(tf.tile(label, [100]), shape=[100, _NUM_LABELS])
        
        # Remove dropout segments
        _, var = tf.nn.moments(data, axes=[1, 2])

        # Batch it up.
        data, label, var = tf.train.shuffle_batch([data, label, var], 
                                             batch_size=_BATCH_SIZE, 
                                             num_threads=1, 
                                             capacity=20*_BATCH_SIZE, 
                                             min_after_dequeue=6*_BATCH_SIZE, 
                                             enqueue_many=True)

        with tf.Session() as sess:    
            with slim.queues.QueueRunners(sess):
                for i in xrange(5): 
                    dt, lbl, std = sess.run([data, label, var])
                    print(dt.shape[:], lbl.shape[:])
                    print(std)

read_dataset('./dataset/*.tfr')
    
print('done')
    

Loading #20 files.
((8, 2400, 16), (8, 2))
[ 4299.83642578  1075.15820312  1465.13842773  3586.09326172  2526.08447266
  3635.19360352  1207.40075684  1794.80554199]
((8, 2400, 16), (8, 2))
[ 2272.60229492  3196.44238281  3242.44946289  4264.23828125  1604.73095703
  3269.10717773  2624.44750977  3160.12890625]
((8, 2400, 16), (8, 2))
[ 1348.60388184  1553.0970459   1224.58764648  2488.91552734  1563.36645508
  2760.24243164  5094.52294922  5588.71289062]
((8, 2400, 16), (8, 2))
[ 6174.93505859  1880.13635254  1577.26269531  1872.98999023  1629.27124023
  1372.38769531  2908.1965332   5466.98388672]
((8, 2400, 16), (8, 2))
[ 1776.24829102  1904.90185547  1457.45373535  1528.82873535  3815.08642578
  3056.20263672  4095.33422852  2270.77758789]
done


In [58]:
def plot_eeg(data):
    plt.figure(figsize=(10,20))
    for i in range(0,16):
        plt.subplot(8,2,i+1)
        plt.plot(data[:,i])
    
    #plt.savefig('foo.pdf', bbox_inches='tight')