In [1]:
%matplotlib inline

In [2]:
import datajoint as dj
from scipy.io import loadmat
import h5py
import numpy as np
import matplotlib.pyplot as plt
import os, cv2
import pandas as pd
import seaborn as sns


In [3]:
def save_dict_to_hdf5(dic, filename):
    
    with h5py.File(filename, 'w') as h5file:
        recursively_save_dict_contents_to_group(h5file, '/', dic)

def load_dict_from_hdf5(filename):

    with h5py.File(filename, 'r') as h5file:
        return recursively_load_dict_contents_from_group(h5file, '/')



def recursively_save_dict_contents_to_group( h5file, path, dic):

    # argument type checking
    if not isinstance(dic, dict):
        raise ValueError("must provide a dictionary")        

    if not isinstance(path, str):
        raise ValueError("path must be a string")
    if not isinstance(h5file, h5py._hl.files.File):
        raise ValueError("must be an open h5py file")
    # save items to the hdf5 file
    for key, item in dic.items():
        #print(key,item)
        key = str(key)
        if isinstance(item, list):
            item = np.array(item)
            #print(item)
        if not isinstance(key, str):
            raise ValueError("dict keys must be strings to save to hdf5")
        # save strings, numpy.int64, and numpy.float64 types
        if isinstance(item, (np.int64, np.float64, str, np.float, float, np.float32,int)):
            #print( 'here' )
            h5file[path + key] = item
            if not h5file[path + key].value == item:
                raise ValueError('The data representation in the HDF5 file does not match the original dict.')
        # save numpy arrays
        elif isinstance(item, np.ndarray):            
            try:
                h5file[path + key] = item
            except:
                item = np.array(item).astype('|S9')
                h5file[path + key] = item
            if not np.array_equal(h5file[path + key].value, item):
                raise ValueError('The data representation in the HDF5 file does not match the original dict.')
        # save dictionaries
        elif isinstance(item, dict):
            recursively_save_dict_contents_to_group(h5file, path + key + '/', item)
        # other types cannot be saved and will result in an error
        else:
            #print(item)
            raise ValueError('Cannot save %s type.' % type(item))

def recursively_load_dict_contents_from_group( h5file, path): 

    ans = {}
    for key, item in h5file[path].items():
        if isinstance(item, h5py._hl.dataset.Dataset):
            ans[key] = item.value
        elif isinstance(item, h5py._hl.group.Group):
            ans[key] = recursively_load_dict_contents_from_group(h5file, path + key + '/')
    return ans            
def recursively_make_fake_dict(dic, temp_dict={}):
    """
    ...
    """
    for key, item in dic.items():
        if not isinstance(item, dict):
            
            try:
                item_length = len(item)
                if item_length > 10:
                    temp_dict[key] = np.zeros(700)
                else:
                    temp_dict[key] = np.zeros(item_length)
                
            except:
                # single value
                temp_dict[key] = 0.0
                
        elif isinstance(item, dict):
            temp_dict[key] = {}
            recursively_make_fake_dict(item, temp_dict[key])
        else:
            raise ValueError('Cannot save %s type'%type(item))
            
    return temp_dict

In [4]:
orig = load_dict_from_hdf5("/mnt/scratch07/synicix_dev/datasets/static22845-10-5-preproc0.h5")



In [5]:
orig.keys()

dict_keys(['behavior', 'condition_hashes', 'images', 'item_info', 'neurons', 'pupil_center', 'responses', 'statistics', 'tiers', 'trial_idx', 'types'])

In [6]:
stimulus = dj.create_virtual_module('stimulus','pipeline_stimulus')
imagenet = dj.create_virtual_module('imagenet','pipeline_imagenet')

Connecting donnie@10.28.0.34:3306


In [7]:
image_cases = ['biased_correlated', 'biased_uncorrelated', 'unbiased_correlated', 'unbiased_uncorrelated']
classes = ['imagenet_v2_rgb',
           'imagenet_v2_rgb_g_b_no_normalization',
           'imagenet_v2_rgb_range_30_225_mean127',
           'imagenet_v2_rgb_g_b_channels_separately_joined']

img_dict = dict()

for case, _class in zip(image_cases, classes):
    img_dict[case] = dict(img_class= _class, img_ids= None)

In [8]:
for _key, _val in img_dict.items():
    
    if _key == 'unbiased_correlated' or _key == 'unbiased_uncorrelated':
        # this works for now cuz there is only 1 album. later we might need more restriction
        img_dict[_key]['img_ids'] = (stimulus.StaticImage.Image & 'image_class = "{}"'.format(img_dict[_key]['img_class'])).fetch('image_id')
    
    elif _key == 'biased_correlated':
        img_dict[_key]['img_ids'] =(stimulus.StaticImage.Image & 'image_class = "imagenet_v2_rgb_range_30_225_mean127"').fetch('image_id')
                

    elif _key == 'biased_uncorrelated':
        
        img_dict[_key]['img_ids'] = (stimulus.StaticImage.Image & 'image_class = "imagenet_v2_rgb_g_b_channels_separately_joined"').fetch('image_id')


In [9]:
# condition_hashes
condition_hashes = [str.encode('fake_condition_{}'.format(i)) for i in range(5000)]
for oracle_inds in range(100):
    condition_hashes +=[str.encode('fake_condition_{}'.format(oracle_inds+5000))] * 10

condition_hashes = np.array(condition_hashes)

In [10]:
img_dict

{'biased_correlated': {'img_class': 'imagenet_v2_rgb',
  'img_ids': array([    62,    121,    151, ..., 124635, 124642, 124645])},
 'biased_uncorrelated': {'img_class': 'imagenet_v2_rgb_g_b_no_normalization',
  'img_ids': array([   0,    1,    2, ..., 5097, 5098, 5099])},
 'unbiased_correlated': {'img_class': 'imagenet_v2_rgb_range_30_225_mean127',
  'img_ids': array([    62,    121,    151, ..., 124635, 124642, 124645])},
 'unbiased_uncorrelated': {'img_class': 'imagenet_v2_rgb_g_b_channels_separately_joined',
  'img_ids': array([   0,    1,    2, ..., 5097, 5098, 5099])}}

In [11]:
# generate item_info
def generate_item_info(img_dict):
    colorframeprojector_channel_1 = np.array([str.encode('2')]* 6000)
    colorframeprojector_channel_2 = np.array([str.encode('3')]* 6000)
    colorframeprojector_channel_3 = np.array([str.encode('None')]* 6000)
    colorframeprojector_last_flip = np.arange(6000)
    colorframeprojector_pre_blank_period = np.array([0.3] * 6000)
    colorframeprojector_presentation_time = np.array([0.5] * 6000)
    colorframeprojector_projector_config_id = np.ones(6000) * 6
    colorframeprojector_projector_id = np.ones(6000, dtype=np.int64)
    colorframeprojector_trial_ts = np.array([str.encode("Timestamp('2020-03-04 13:54:51')")] * 6000)
    condition_hash = condition_hashes
    animal_id = np.zeros(6000,dtype=np.uint32)
    scan_idx = np.zeros(6000, dtype=np.uint32)
    session = np.zeros(6000, dtype=np.uint32)
    trial_idx = np.arange(6000, dtype=np.uint32)

    
    for _key, _val in img_dict.items():
        
        colorframeprojector_image_class = np.array([str.encode(img_dict[_key]['img_class'])] * 6000)
        colorframeprojector_image_id = img_dict[_key]['img_ids']


        item_info = dict(animal_id = animal_id,
                         colorframeprojector_channel_1 = colorframeprojector_channel_1,
                         colorframeprojector_channel_2 = colorframeprojector_channel_2,
                         colorframeprojector_channel_3 = colorframeprojector_channel_3,
                         colorframeprojector_last_flip = colorframeprojector_last_flip,
                         colorframeprojector_pre_blank_period = colorframeprojector_pre_blank_period,
                         colorframeprojector_presentation_time = colorframeprojector_presentation_time,
                         colorframeprojector_projector_config_id = colorframeprojector_projector_config_id,
                         colorframeprojector_projector_id = colorframeprojector_projector_id,
                         colorframeprojector_trial_ts = colorframeprojector_trial_ts,
                         condition_hash = condition_hash,
                         colorframeprojector_image_class = colorframeprojector_image_class,
                         colorframeprojector_image_id = colorframeprojector_image_id,
                         scan_idx = scan_idx,
                         session = session,
                         trial_idx = trial_idx
                        )

        img_dict[_key]['item_info'] = item_info
    return img_dict

In [12]:
img_dict = generate_item_info(img_dict)

In [13]:
# generate statistics
statistics = recursively_make_fake_dict(orig['statistics'])

In [14]:
# generate types
types = np.array([str.encode('stimulus.ColorFrameProjector')]*6000)

In [15]:
# generate neurons
unit_ids = np.arange(700, dtype=np.uint16)
area = np.array([str.encode('V1')]*700)
layer = np.array([str.encode('L2/3')]*700)
trial_idx = np.arange(6000, dtype=np.uint32)

neurons = dict(area = area,
               layer = layer,
               animal_id = np.zeros(6000,dtype=np.uint32),
               scan_idx = np.zeros(6000, dtype=np.uint32),
               session = np.zeros(6000, dtype=np.uint32),
               trial_idx = trial_idx
              )

In [16]:
# tiers
tiers = []
for i in range(4950):
    tiers.append(str.encode("train"))
for i in range(4950,5000):
    tiers.append(str.encode("validation"))
for i in range(5000,6000):
    tiers.append(str.encode("test"))
tiers = np.array(tiers)

In [21]:
for _key in img_dict.keys():
    
    
    
    with h5py.File("/mnt/scratch07/synicix_dev/datasets/"+_key+"_gabor_poisson.h5","r") as old_fh:
        
        
        complete_dict = dict(images = old_fh['images'][:],
                             true_rate = old_fh['true_rate'][:],
                             responses = old_fh['responses'][:],
                             behavior = old_fh['behavior'][:],
                             pupil_center = old_fh['pupil_center'][:],
                             tiers = tiers,
                             item_info = img_dict[_key]['item_info'],
                             statistics = statistics,
                             neurons = neurons,
                             trial_idx = trial_idx,
                             types = types,
                             condition_hashes = condition_hashes
                            )
        
        
        save_dict_to_hdf5(complete_dict,"{}_gabor_poisson_new.h5".format(_key))

In [22]:
for _key in img_dict.keys():
     with h5py.File(_key+"_gabor_poisson_new.h5","r") as old_fh:
            
            print(_key)
            print("---------------------------")
            print(old_fh['images'][:].shape)
            print(old_fh['true_rate'][:].shape)
            print(old_fh['responses'][:].shape)
            print(old_fh['behavior'][:].shape)
            print(old_fh['pupil_center'][:].shape)
            print('hjhh')
            print(old_fh['tiers'][:].shape)
            print(old_fh['trial_idx'][:].shape)
            print(old_fh['types'].shape)
            print(condition_hashes.shape)

biased_correlated
---------------------------
(6000, 2, 36, 64)
(6000, 700)
(6000, 700)
(6000, 3)
(6000, 3)
hjhh
(6000,)
(6000,)
(6000,)
(6000,)
biased_uncorrelated
---------------------------
(6000, 2, 36, 64)
(6000, 700)
(6000, 700)
(6000, 3)
(6000, 3)
hjhh
(6000,)
(6000,)
(6000,)
(6000,)
unbiased_correlated
---------------------------
(6000, 2, 36, 64)
(6000, 700)
(6000, 700)
(6000, 3)
(6000, 3)
hjhh
(6000,)
(6000,)
(6000,)
(6000,)
unbiased_uncorrelated
---------------------------
(6000, 2, 36, 64)
(6000, 700)
(6000, 700)
(6000, 3)
(6000, 3)
hjhh
(6000,)
(6000,)
(6000,)
(6000,)


In [31]:
sum(tiers == 'test'.encode())

1000

In [17]:
for _key in img_dict.keys():
    
    
    
    with h5py.File("/mnt/scratch07/synicix_dev/datasets/"+_key+"_gabor_poisson.h5","r") as old_fh:
        
        print(old_fh['tiers'][:].shape)

(12000,)
(12000,)
(12000,)
(12000,)


In [16]:
fh_new = h5py.File('unbiased_uncorrelated_gabor_poisson_new.h5', 'r')

OSError: Unable to open file (unable to open file: name = 'unbiased_uncorrelated_gabor_poisson_new.h5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [42]:
fh_new2 = h5py.File('biased_correlated_gabor_poisson_new.h5', 'r')