# Create Data Files

In [1]:
import sys
import os

myhost = os.uname()[1]
print("Running on Computer: [%s]" %myhost)

sys.path.insert(0, '/home/ifernand/Code/') 
dirname = '/home/ifernand/Cloud_SynthData_Proj'

import numpy as np
from copy import deepcopy
import matplotlib.pyplot as plt
from scipy import io as sio
import seaborn as sns
import torch
import time
import h5py

# NDN tools
import NDNT
import NDNT.utils as utils
from NDNT.modules.layers import *
from NDNT.networks import *
import NDNT.NDN as NDN
from NTdatasets.conway.synthcloud_datasets import SimCloudData, OLD_SimCloudData
from NTdatasets.generic import GenericDataset
from ColorDataUtils.multidata_utils import MultiExperiment
import ColorDataUtils.ConwayUtils as CU
from ColorDataUtils import readout_fit
from ColorDataUtils.simproj_utils import *
from NDNT.utils import fit_lbfgs, fit_lbfgs_batch
from NDNT.utils import imagesc   
from NDNT.utils import ss
from NDNT.utils import subplot_setup
from NDNT.utils import figure_export

%load_ext autoreload
%autoreload 2

Running on Computer: [sc]
Invoking __init__.py for NDNT.utils


## Pull Data

In [2]:
from utils.create_data_func import pull_data2
start = time.time()
pull_data2(cell_sq_rad=0.3, stim_dim=120)
end   = time.time()
print('CPU Time', end-start, 'sec')

++INFO: stimuli of shape (510000, 120, 120)
++INFO: Cell type order ['X_ON', 'X_OFF', 'V1_Exc_L4', 'V1_Inh_L4', 'V1_Exc_L2/3', 'V1_Inh_L2/3']
++INFO: 105 X_ON chosen
++INFO: 109 X_OFF chosen
++INFO: 607 V1_Exc_L4 chosen
++INFO: 187 V1_Inh_L4 chosen
++INFO: 535 V1_Exc_L2/3 chosen
++INFO: 162 V1_Inh_L2/3 chosen
++INFO: File 0 spike time added
++INFO: File 1 spike time added
++INFO: File 2 spike time added
++INFO: File 3 spike time added
++INFO: File 4 spike time added
++INFO: File 5 spike time added
++INFO: File 6 spike time added
++INFO: File 7 spike time added
++INFO: File 8 spike time added
++INFO: File 9 spike time added
++INFO: File 10 spike time added
++INFO: File 11 spike time added
++INFO: File 12 spike time added
++INFO: File 13 spike time added
++INFO: File 14 spike time added
++INFO: File 15 spike time added
++INFO: File 16 spike time added
++INFO: File 17 spike time added
++INFO: File 18 spike time added
++INFO: File 19 spike time added
++INFO: File 20 spike time added
++INFO

In [2]:
from utils.create_data_func import pull_data
start = time.time()
pull_data(robs_sq_rad=0.3, stim_dim=120)
end   = time.time()
print('CPU Time', end-start, 'sec')

++INFO: stimuli of shape (510000, 120, 120)
++INFO: Cell type order ['X_ON', 'X_OFF', 'V1_Exc_L4', 'V1_Inh_L4', 'V1_Exc_L2/3', 'V1_Inh_L2/3']
++INFO: 105 X_ON chosen
++INFO: 109 X_OFF chosen
++INFO: 607 V1_Exc_L4 chosen
++INFO: 187 V1_Inh_L4 chosen
++INFO: 535 V1_Exc_L2/3 chosen
++INFO: 162 V1_Inh_L2/3 chosen
++INFO: File 0 robs added
++INFO: File 1 robs added
++INFO: File 2 robs added
++INFO: File 3 robs added
++INFO: File 4 robs added
++INFO: File 5 robs added
++INFO: File 6 robs added
++INFO: File 7 robs added
++INFO: File 8 robs added
++INFO: File 9 robs added
++INFO: File 10 robs added
++INFO: File 11 robs added
++INFO: File 12 robs added
++INFO: File 13 robs added
++INFO: File 14 robs added
++INFO: File 15 robs added
++INFO: File 16 robs added
++INFO: File 17 robs added
++INFO: File 18 robs added
++INFO: File 19 robs added
++INFO: File 20 robs added
++INFO: File 21 robs added
++INFO: File 22 robs added
++INFO: File 23 robs added
++INFO: File 24 robs added
++INFO: File 25 robs add

In [3]:
from utils.create_data_func import pull_data
start = time.time()
pull_data(robs_sq_rad=0.3, stim_dim=120, scale=3)
end   = time.time()
print('CPU Time', end-start, 'sec')

++INFO: stimuli of shape (170000, 120, 120)
++INFO: Cell type order ['X_ON', 'X_OFF', 'V1_Exc_L4', 'V1_Inh_L4', 'V1_Exc_L2/3', 'V1_Inh_L2/3']
++INFO: 105 X_ON chosen
++INFO: 109 X_OFF chosen
++INFO: 607 V1_Exc_L4 chosen
++INFO: 187 V1_Inh_L4 chosen
++INFO: 535 V1_Exc_L2/3 chosen
++INFO: 162 V1_Inh_L2/3 chosen
++INFO: File 0 robs added
++INFO: File 1 robs added
++INFO: File 2 robs added
++INFO: File 3 robs added
++INFO: File 4 robs added
++INFO: File 5 robs added
++INFO: File 6 robs added
++INFO: File 7 robs added
++INFO: File 8 robs added
++INFO: Data file created: data/cloud_data_stim_dim120_robs_sqrad_0.3_sca=3.hdf5
CPU Time 90.5033872127533 sec


In [4]:
from utils.create_data_func import pull_data
start = time.time()
pull_data(robs_sq_rad=0.3, stim_dim=120, scale=6)
end   = time.time()
print('CPU Time', end-start, 'sec')

++INFO: stimuli of shape (170000, 120, 120)
++INFO: Cell type order ['X_ON', 'X_OFF', 'V1_Exc_L4', 'V1_Inh_L4', 'V1_Exc_L2/3', 'V1_Inh_L2/3']
++INFO: 105 X_ON chosen
++INFO: 109 X_OFF chosen
++INFO: 607 V1_Exc_L4 chosen
++INFO: 187 V1_Inh_L4 chosen
++INFO: 535 V1_Exc_L2/3 chosen
++INFO: 162 V1_Inh_L2/3 chosen
++INFO: File 0 robs added
++INFO: File 1 robs added
++INFO: File 2 robs added
++INFO: File 3 robs added
++INFO: File 4 robs added
++INFO: File 5 robs added
++INFO: File 6 robs added
++INFO: File 7 robs added
++INFO: File 8 robs added
++INFO: Data file created: data/cloud_data_stim_dim120_robs_sqrad_0.3_sca=6.hdf5
CPU Time 93.17553687095642 sec


In [5]:
from utils.create_data_func import pull_data
start = time.time()
pull_data(robs_sq_rad=0.3, stim_dim=120, scale=9)
end   = time.time()
print('CPU Time', end-start, 'sec')

++INFO: stimuli of shape (170000, 120, 120)
++INFO: Cell type order ['X_ON', 'X_OFF', 'V1_Exc_L4', 'V1_Inh_L4', 'V1_Exc_L2/3', 'V1_Inh_L2/3']
++INFO: 105 X_ON chosen
++INFO: 109 X_OFF chosen
++INFO: 607 V1_Exc_L4 chosen
++INFO: 187 V1_Inh_L4 chosen
++INFO: 535 V1_Exc_L2/3 chosen
++INFO: 162 V1_Inh_L2/3 chosen
++INFO: File 0 robs added
++INFO: File 1 robs added
++INFO: File 2 robs added
++INFO: File 3 robs added
++INFO: File 4 robs added
++INFO: File 5 robs added
++INFO: File 6 robs added
++INFO: File 7 robs added
++INFO: File 8 robs added
++INFO: Data file created: data/cloud_data_stim_dim120_robs_sqrad_0.3_sca=9.hdf5
CPU Time 89.61282753944397 sec


## Look at Data File

In [3]:
out_file = 'data/cloud_data_stim_dim120_spike_time_sqrad_0.3.hdf5'
with h5py.File(out_file, 'r') as f:
    stim = f['stim'][:]
    x_pos = f['x_pos'][:]
    y_pos = f['y_pos'][:]
    cell_key = [str(f['cell_key'][:][i], encoding='utf-8') for i in range(x_pos.shape[0])]
    file_start_pos = list(f['file_start_pos'][:])
    spike_times = []
    for i in range(x_pos.shape[0]):
        spike_times.append(f['spike_time_cell_'+str(i)][:])

In [3]:
len(spike_times)

1705

In [4]:
np.where(spike_times[0] == -1)[0].shape

(27,)

In [5]:
stim.shape

(510000, 14400)

In [6]:
file_start_pos

[0,
 20000,
 40000,
 60000,
 80000,
 100000,
 120000,
 140000,
 160000,
 180000,
 200000,
 220000,
 240000,
 260000,
 270000,
 290000,
 310000,
 330000,
 350000,
 360000,
 370000,
 390000,
 410000,
 430000,
 450000,
 470000,
 490000]

In [14]:
# Compute robs from spike times
dt = 16
NC = x_pos.shape[0]
T_full = int((16/dt)*stim.shape[0])
robs = np.zeros((T_full,NC)).astype(np.uint8)
for i in range(NC):
    cell_spike_times = spike_times[i]
    trial_idx = list(np.where(cell_spike_times == -1)[0])
    start = 0
    for j in range(len(trial_idx)):
        if j == 0:
            NT = int((16/dt)*(file_start_pos[j+1] - file_start_pos[j]))
            trial_spike_times = cell_spike_times[:trial_idx[j]]            
        elif j == len(trial_idx)-1:
            NT = int((16/dt)*(stim.shape[0] - file_start_pos[j]))
            trial_spike_times = cell_spike_times[trial_idx[j-1]+1:trial_idx[j]]
        else:
            NT = int((16/dt)*(file_start_pos[j+1] - file_start_pos[j]))
            trial_spike_times = cell_spike_times[trial_idx[j-1]+1:trial_idx[j]]
        spikes = np.histogram(trial_spike_times, bins=NT, range=(0,int(dt*NT)))[0].astype(np.uint8)
        robs[start:start+NT,i] = spikes
        start += NT

In [15]:
robs.shape

(510000, 1705)

In [4]:
out_file2 = 'data/cloud_data_stim_dim120_robs_sqrad_0.3.hdf5'
with h5py.File(out_file2, 'r') as f:
    stim2 = f['stim'][:]
    file_start_pos2 = list(f['file_start_pos'][:])
    robs2 = f['robs'][:]

In [5]:
np.sum(stim!=stim2)

0

In [6]:
file_start_pos == file_start_pos2

True

In [8]:
file_start_pos

[0,
 20000,
 40000,
 60000,
 80000,
 100000,
 120000,
 140000,
 160000,
 180000,
 200000,
 220000,
 240000,
 260000,
 270000,
 290000,
 310000,
 330000,
 350000,
 360000,
 370000,
 390000,
 410000,
 430000,
 450000,
 470000,
 490000]

In [12]:
robs2.shape

(510000, 1705)

In [16]:
np.sum(robs!=robs2)

0

In [18]:
NT = T_full
block_len = 1000
NB = NT//block_len # number of blocks
block_inds = np.arange(NT, dtype=np.int64).reshape(NB,block_len) # block indecies

In [19]:
NB

510

In [21]:
block_inds

array([[     0,      1,      2, ...,    997,    998,    999],
       [  1000,   1001,   1002, ...,   1997,   1998,   1999],
       [  2000,   2001,   2002, ...,   2997,   2998,   2999],
       ...,
       [507000, 507001, 507002, ..., 507997, 507998, 507999],
       [508000, 508001, 508002, ..., 508997, 508998, 508999],
       [509000, 509001, 509002, ..., 509997, 509998, 509999]])

In [None]:
def low_to_high_index(low_index, T, dt):
    """Convert an index from the low-resolution dataset (0 to T) to the high-resolution dataset (0 to 2T)."""
    return int(((16//dt) * low_index * ((16//dt) * T)) // T)

def high_to_low_index(high_index, T):
    """Convert an index from the high-resolution dataset (0 to 2T) to the low-resolution dataset (0 to T)."""
    return int((high_index * T) // ((16//dt) * T))

In [34]:
high_idx = np.array([0,1,2,3,4,5,6,7,8,9,20,21,22,23,24,25,26,27,28,29])
high_idx[np.where(high_idx%2 == 0)[0]]//2

array([ 0,  1,  2,  3,  4, 10, 11, 12, 13, 14])

In [37]:
sub_stim = stim[:100,:10]
sub_stim.shape

(100, 10)

In [39]:
np.repeat(sub_stim,2,axis=0)

array([[163, 163, 162, ..., 160, 162, 165],
       [163, 163, 162, ..., 160, 162, 165],
       [ 87,  98, 108, ..., 123, 120, 115],
       ...,
       [158, 163, 167, ..., 159, 151, 142],
       [135, 134, 132, ..., 111, 105, 101],
       [135, 134, 132, ..., 111, 105, 101]], dtype=uint8)

## Generic Data Module

In [18]:
start    = time.time()
data_ds  = SimCloudData(cell_type_list=['V1_Exc_L4', 'V1_Inh_L4', 'V1_Exc_L2/3', 'V1_Inh_L2/3'], dt=8, down_sample=2)
end      = time.time()
print('CPU Time', end-start, 'sec')

CPU Time 65.52293825149536 sec


In [19]:
data_ds[:]['stim'].shape

torch.Size([1020000, 3600])

In [20]:
data_ds[:]['robs'].shape

torch.Size([1020000, 1491])

In [21]:
510000*2

1020000

In [7]:
start    = time.time()
data_ds2  = OLD_SimCloudData(cell_type_list=['V1_Exc_L4', 'V1_Inh_L4', 'V1_Exc_L2/3', 'V1_Inh_L2/3'], down_sample=2)
end      = time.time()
print('CPU Time', end-start, 'sec')

CPU Time 49.08924102783203 sec


In [8]:
data_ds[:]['stim'].shape

1


torch.Size([510000, 3600])

In [9]:
data_ds2[:]['stim'].shape

torch.Size([510000, 3600])

In [10]:
data_ds[:]['robs'].shape

1


torch.Size([510000, 1491])

In [11]:
data_ds2[:]['robs'].shape

torch.Size([510000, 1491])

In [12]:
torch.sum(data_ds[:]['robs'] != data_ds2[:]['robs'])

1


tensor(0)

In [13]:
torch.sum(data_ds[:]['stim'] != data_ds2[:]['stim'])

1


tensor(0)

In [3]:
data_ds[:]['stim'].shape

torch.Size([510000, 3600])

In [4]:
data_ds[:]['robs'].shape

torch.Size([510000, 1496])

In [5]:
data_ds[:]['dfs'].shape

torch.Size([510000, 1496])

In [13]:
data_ds[:]['dfs'][2000:2020,1]

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1.,
        1., 1.])

In [10]:
data_ds.block_len

1000

In [6]:
data_ds.NT

510000

In [7]:
data_ds.train_inds

array([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,
        13,  14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,
        26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,
        39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  62,  63,  64,
        65,  66,  67,  68,  69,  70,  71,  72,  73,  74,  75,  76,  77,
        78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
        91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
       104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116,
       117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
       130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
       143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155,
       156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168,
       169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 18

In [8]:
data_ds.val_inds

array([408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420,
       421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433,
       434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446,
       447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459,
       460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472,
       473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485,
       486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498,
       499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509])