In [5]:
import sys
#sys.path = [p for p in sys.path if p.find('/opt/apps/software/') == -1]
from glob import glob
from IPython.display import display, HTML

import tensorflow as tf2
import numpy as np
npdt = np.float32

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"
import re


# import keras
# keras.backend.tensorflow_backend._get_available_gpus()

In [6]:

def shifted_zscore_cf(x, inverse=False):
    # without zeros shifted by 10 std to avoid negative values
    mean = [-10410.677490234375, -10414.565185546875, 6.00097793340683, -10413.194091796875]
    std = [1266.3062, 1266.7249, 0.13912384, 1266.6195]

    if inverse:
        x = (x*std)+mean
    else:
        x = (x-mean)/std
    
    return x

def preprocess(serialized_example):
    
    features = tf2.io.parse_example(
        serialized_example,
        features={
            'gram': tf2.io.FixedLenFeature([], tf2.string),
            'label': tf2.io.FixedLenFeature([], tf2.string)
        })
    data = tf2.io.decode_raw(features['gram'], tf2.float32)
    label = tf2.io.decode_raw(features['label'], tf2.float32)
    data.set_shape((None, 999))
    label.set_shape((None, 4))
    return data, label

def merge_datasets(vfiles, batch_size, prep):

    yv = []
    yl = []
    for filename in vfiles:
        ds = tf2.data.TFRecordDataset(filename)
        ds = ds.batch(batch_size=batch_size)
        ds = ds.map(map_func=preprocess)
        for v,l in ds:
            v = np.array(v).astype(npdt)
            l = np.array(l).astype(npdt)
            yv.append(v)
            l = np.apply_along_axis(prep, axis=1, arr=l) 
            yl.append(l)
        
    x = np.vstack(yv)
    y = np.vstack(yl)

    return x,y


In [8]:
names = ['DA', '5HT', 'pH', 'NE']
speed = 'slow'
data_prefix = os.path.join('/mnt/nfs/proj/in-vitro/Mark/four_analyte/', speed, 'allin')
output_prefix = '/mnt/nfs/proj/in-vitro/Leonardo/cf_data'
if not (os.path.exists(output_prefix)):
    os.makedirs(output_prefix, exist_ok=True)

probes = [
    'CF025', 'CF027', 'CF057', 'CF064', 'CF066', 'CF078', 'CF081', 'CF082'
]

print(f'Converting data to numpy')
print()

normalize_data = lambda x: x
revert_data = lambda x: x

batch_size = 2048

def atoi(text):
    return int(text) if text.isdigit() else text

def natural_keys(text):
    return [atoi(c) for c in re.split(r'(\d+)', text)]

for probe in probes:
    print(f'converting probe {probe} to numpy file...')
        
    probe_list = sum([
        sorted(tf2.io.gfile.glob(os.path.join(data_prefix, x, 'total_records', '*')),
               key=natural_keys) for x in [probe]
    ], [])

    x_probe, y_probe = merge_datasets(probe_list, batch_size, normalize_data)
    
    probe_file = os.path.join(output_prefix, f'{probe}.npz')
    np.savez(probe_file, x=x_probe, y=y_probe)
    
print(' done.')


Converting data to numpy

converting probe CF025 to numpy file...
converting probe CF027 to numpy file...
converting probe CF057 to numpy file...
converting probe CF064 to numpy file...
converting probe CF066 to numpy file...
converting probe CF078 to numpy file...
converting probe CF081 to numpy file...
converting probe CF082 to numpy file...
 done.


In [4]:
for x in [x_train, y_train, x_val, y_val, x_test, y_test]:
    print(x.shape)

(369900, 999, 1)
(369900, 4)
(61650, 999, 1)
(61650, 4)
(61650, 999, 1)
(61650, 4)
