In [5]:

import numpy
import h5py
import random
import copy

from collections import defaultdict
from skimage import filters
from tqdm.auto import tqdm

MAX_PU = 8
PATH = "../data/80-20_calcium_dataset.h5"
numpy.random.seed(42)
random.seed(42)

POSITIVE_SAMPLES = {}
NEGATIVE_SAMPLES = defaultdict(list)
with h5py.File(PATH, "r") as file:
    for fold, fold_values in tqdm(file.items(), leave=False):
        print('fold', fold, len(fold_values.keys()))
        positive_samples = []
        for neuron, neuron_values in fold_values.items():
            for idx, event in enumerate(neuron_values["events"]):
                positive_samples.append({
                    "neuron" : neuron,
                    "event-id" : idx
                })
        POSITIVE_SAMPLES[fold] = positive_samples
        print('positive samples',len(POSITIVE_SAMPLES[fold]))
        
        if fold == "train":
            for neuron, neuron_values in tqdm(fold_values.items(), leave=False):
                num_samples = int(len(neuron_values["events"]) * MAX_PU)
                max_proj = neuron_values["input"][:25].mean(axis=0) # keeps first N frames to compute foreground
                foreground = max_proj > filters.threshold_triangle(max_proj)
                mask = numpy.zeros_like(foreground, dtype=int)
                mask[64 : -64, 64 : -64] = 1
                foreground = foreground.astype(int) * mask.astype(int)
                indices = numpy.argwhere(foreground)
                choices = numpy.random.choice(len(indices), size=num_samples, replace=False)
                yx = indices[choices]
                t = numpy.random.randint(64, neuron_values["input"].shape[0]-64, size=len(choices))
                coords = numpy.concatenate((t[:, numpy.newaxis], yx), axis=-1)
                negative_samples = []
                for coord in coords:
                    negative_samples.append({
                        "neuron" : neuron,
                        "coord" : coord.tolist()
                    })
                NEGATIVE_SAMPLES[fold].append(negative_samples)
            print('negative_samples',len(NEGATIVE_SAMPLES[fold]))

numpy.random.seed(42)
random.seed(42)

MAX_PU_64 = 64
NEGATIVE_SAMPLES_64 = defaultdict(list)
with h5py.File(PATH, "r") as file:
    for fold, fold_values in tqdm(file.items(), leave=False):
        if fold == "train":
            for i, (neuron, neuron_values) in enumerate(tqdm(fold_values.items(), leave=False)):
                num_samples = int(len(neuron_values["events"]) * MAX_PU_64) - len(NEGATIVE_SAMPLES[fold][i])
#                 print(num_samples)
                max_proj = neuron_values["input"][:25].mean(axis=0) # keeps first N frames to compute foreground
                foreground = max_proj > filters.threshold_triangle(max_proj)
                mask = numpy.zeros_like(foreground, dtype=int)
                mask[64 : -64, 64 : -64] = 1
                foreground = foreground.astype(int) * mask.astype(int)
                indices = numpy.argwhere(foreground)
                choices = numpy.random.choice(len(indices), size=num_samples, replace=num_samples > len(indices))
                yx = indices[choices]
                t = numpy.random.randint(64, neuron_values["input"].shape[0]-64, size=len(choices))
                coords = numpy.concatenate((t[:, numpy.newaxis], yx), axis=-1)
                negative_samples = NEGATIVE_SAMPLES[fold][i]
                for coord in coords:
                    negative_samples.append({
                        "neuron" : neuron,
                        "coord" : coord.tolist()
                    })
                NEGATIVE_SAMPLES_64[fold].append(negative_samples)
            print('negative_samples',len(NEGATIVE_SAMPLES_64[fold]))
            
MAX_PU_256 = 256
NEGATIVE_SAMPLES_256 = defaultdict(list)
with h5py.File(PATH, "r") as file:
    for fold, fold_values in tqdm(file.items(), leave=False):
        if fold == "train":
            for i, (neuron, neuron_values) in enumerate(tqdm(fold_values.items(), leave=False)):
                num_samples = int(len(neuron_values["events"]) * MAX_PU_256) - len(NEGATIVE_SAMPLES_64[fold][i])
#                 print(num_samples)
                max_proj = neuron_values["input"][:25].mean(axis=0) # keeps first N frames to compute foreground
                foreground = max_proj > filters.threshold_triangle(max_proj)
                mask = numpy.zeros_like(foreground, dtype=int)
                mask[64 : -64, 64 : -64] = 1
                foreground = foreground.astype(int) * mask.astype(int)
                indices = numpy.argwhere(foreground)
                choices = numpy.random.choice(len(indices), size=num_samples, replace=num_samples > len(indices))
                yx = indices[choices]
                t = numpy.random.randint(64, neuron_values["input"].shape[0]-64, size=len(choices))
                coords = numpy.concatenate((t[:, numpy.newaxis], yx), axis=-1)
                negative_samples = NEGATIVE_SAMPLES_64[fold][i]
                for coord in coords:
                    negative_samples.append({
                        "neuron" : neuron,
                        "coord" : coord.tolist()
                    })
                NEGATIVE_SAMPLES_256[fold].append(negative_samples)
            print('negative_samples',len(NEGATIVE_SAMPLES_256[fold]))            

for key, values in NEGATIVE_SAMPLES.items():
    print(key, sum([len(value) for value in values]))
    print(key, len(flatten(values)))
    
for key, values in NEGATIVE_SAMPLES_64.items():
    print(key, sum([len(value) for value in values]))
    print(key, len(flatten(values)))    
    
for key, values in NEGATIVE_SAMPLES_256.items():
    print(key, sum([len(value) for value in values]))
    print(key, len(flatten(values)))        

  0%|          | 0/2 [00:00<?, ?it/s]

fold train 58
positive samples 9532


  0%|          | 0/58 [00:00<?, ?it/s]

negative_samples 58
fold valid 15
positive samples 2639


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

negative_samples 58


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

negative_samples 58
train 2440192
train 2440192
train 2440192
train 2440192
train 2440192
train 2440192


In [6]:
def flatten(items):
    """
    Recursively flattens a list of items
    """
    flattened = []
    if isinstance(items, (dict, float, int)):
        return [items]
    else:
        for item in items:
            flattened.extend(flatten(item))
    return flattened


In [7]:
numpy.random.seed(42)
random.seed(42)

# Keeps 7800 positive crops
tmp_updated_samples = {}
for fold, values in POSITIVE_SAMPLES.items():
    tmp_updated_samples[fold] = values
updated_samples = copy.deepcopy(tmp_updated_samples)

SAMPLES_PU = {
    "complete-1:0" : {
        "positive" : updated_samples,
        "negative" : {"train" : [], "valid" : []}
    }
}

tmp_updated_samples = {}
subset = []
for neuron_subset in NEGATIVE_SAMPLES["train"]:
    subset.append(neuron_subset)
for i, ratio in enumerate([8, 4, 2, 1]):
    tmp_subset = []
    for s in subset:
        choices = numpy.random.choice(s, size=len(s) if ratio == 8 else len(s) // 2, replace=False)
        tmp_subset.append(choices.tolist())
    tmp_updated_samples[ratio] = flatten(tmp_subset)
    subset = copy.deepcopy(tmp_subset)

for key, values in tmp_updated_samples.items():
    SAMPLES_PU[f"complete-1:{key}"] = {
        "positive" : updated_samples,
        "negative" : {
            "train" : tmp_updated_samples[key],
            "valid" : [],
        }
    }

print(len(SAMPLES_PU["complete-1:0"]["positive"]["train"]))    
print(len(SAMPLES_PU["complete-1:0"]["negative"]["train"]))

print(len(SAMPLES_PU["complete-1:4"]["positive"]["train"]))
print(len(SAMPLES_PU["complete-1:4"]["positive"]["valid"]))

print(len(SAMPLES_PU["complete-1:8"]["negative"]["train"]))
print(len(SAMPLES_PU["complete-1:8"]["negative"]["valid"]))

9532
0
9532
2639
2440192
0


In [84]:
import json
json.dump(SAMPLES_PU, open("../configs/training-samples_complete.json", "w"))

In [53]:
numpy.random.seed(42)
random.seed(42)

# Keeps 7800 positive crops
tmp_updated_samples = {}
for fold, values in POSITIVE_SAMPLES.items():
    tmp_updated_samples[fold] = values
updated_samples = copy.deepcopy(tmp_updated_samples)

SAMPLES_PU = {
    "complete-1:0" : {
        "positive" : updated_samples,
        "negative" : {"train" : [], "valid" : []}
    }
}

tmp_updated_samples = {}
subset = []
for neuron_subset in NEGATIVE_SAMPLES_64["train"]:
    subset.append(neuron_subset)
for i, ratio in enumerate([64, 32, 16]):
    tmp_subset = []
    for s in subset:
        choices = numpy.random.choice(s, size=len(s) if ratio == 64 else len(s) // 2, replace=False)
        tmp_subset.append(choices.tolist())
    tmp_updated_samples[ratio] = flatten(tmp_subset)
    subset = copy.deepcopy(tmp_subset)

for key, values in tmp_updated_samples.items():
    SAMPLES_PU[f"complete-1:{key}"] = {
        "positive" : updated_samples,
        "negative" : {
            "train" : tmp_updated_samples[key],
            "valid" : [],
        }
    }

# print(len(SAMPLES_PU["complete-1:4"]["positive"]["train"]))
# print(len(SAMPLES_PU["complete-1:4"]["positive"]["valid"]))

# print(len(SAMPLES_PU["complete-1:8"]["negative"]["train"]))
# print(len(SAMPLES_PU["complete-1:8"]["negative"]["valid"]))

print(len(SAMPLES_PU["complete-1:16"]["negative"]["train"]))
print(len(SAMPLES_PU["complete-1:16"]["negative"]["valid"]))

print(len(SAMPLES_PU["complete-1:32"]["negative"]["train"]))
print(len(SAMPLES_PU["complete-1:32"]["negative"]["valid"]))

print(len(SAMPLES_PU["complete-1:64"]["negative"]["train"]))
print(len(SAMPLES_PU["complete-1:64"]["negative"]["valid"]))


152512
0
305024
0
610048
0


In [54]:
import json
json.dump(SAMPLES_PU, open("../configs/training-samples_complete_64.json", "w"))

In [8]:
numpy.random.seed(42)
random.seed(42)

# Keeps 7800 positive crops
tmp_updated_samples = {}
for fold, values in POSITIVE_SAMPLES.items():
    tmp_updated_samples[fold] = values
updated_samples = copy.deepcopy(tmp_updated_samples)

SAMPLES_PU = {
    "complete-1:0" : {
        "positive" : updated_samples,
        "negative" : {"train" : [], "valid" : []}
    }
}

tmp_updated_samples = {}
subset = []
for neuron_subset in NEGATIVE_SAMPLES_256["train"]:
    subset.append(neuron_subset)
for i, ratio in enumerate([256, 128]):
    tmp_subset = []
    for s in subset:
        choices = numpy.random.choice(s, size=len(s) if ratio == 256 else len(s) // 2, replace=False)
        tmp_subset.append(choices.tolist())
    tmp_updated_samples[ratio] = flatten(tmp_subset)
    subset = copy.deepcopy(tmp_subset)

for key, values in tmp_updated_samples.items():
    SAMPLES_PU[f"complete-1:{key}"] = {
        "positive" : updated_samples,
        "negative" : {
            "train" : tmp_updated_samples[key],
            "valid" : [],
        }
    }
    
print(len(SAMPLES_PU["complete-1:128"]["negative"]["train"]))
print(len(SAMPLES_PU["complete-1:128"]["negative"]["valid"]))

print(len(SAMPLES_PU["complete-1:256"]["negative"]["train"]))
print(len(SAMPLES_PU["complete-1:256"]["negative"]["valid"]))

1220096
0
2440192
0


In [9]:
import json
json.dump(SAMPLES_PU, open("../configs/training-samples_complete_256.json", "w"))

# Subset PU

In [9]:
import json
import copy
SAMPLES_PU = json.load(open("../configs/training-samples_complete.json", "r"))


In [10]:
NEURONS = list(set([sample["neuron"] for sample in SAMPLES_PU["complete-1:0"]["positive"]["train"]]))

In [11]:
numpy.random.seed(42)
random.seed(42)

def filter_neurons(samples_pu, subset):
    samples_pu = copy.deepcopy(samples_pu)
    
    out_pu = {}
    for key_pu, values_pu in samples_pu.items():
        key_pu = key_pu.replace("complete", "subset")
        out_pu[key_pu] = {}
        for key_type, values_type in values_pu.items():
            out_pu[key_pu][key_type] = {}
            for key_dataset, values_dataset in values_type.items():
                if key_dataset == "train":
                    out_pu[key_pu][key_type][key_dataset] = [value for value in values_dataset if value["neuron"] in subset]
                else:
                    out_pu[key_pu][key_type][key_dataset] = values_dataset
    return out_pu

SUBSET_SAMPLES_PU = {}
for subset_factor in [0.25, 0.5, 0.75]:
    for repetition in range(5):
        choices = numpy.random.choice(NEURONS, size=int(len(NEURONS) * subset_factor), replace=False)
        filtered = filter_neurons(SAMPLES_PU, choices)
        
        for key, values in filtered.items():
            key = key.replace("subset", f"subset-{subset_factor}-{repetition}")
            SUBSET_SAMPLES_PU[key] = values

In [177]:
json.dump(SUBSET_SAMPLES_PU, open("../configs/training-samples_subset.json", "w"))

In [12]:
numpy.random.seed(42)
random.seed(42)

complete_sums = {}
for key, values in SAMPLES_PU.items():
    running_sum = 0
    for key_type, values_type in values.items():
        running_sum += len(values_type["train"])
    complete_sums[key] = running_sum

subset_sums = {}
for key, values in SUBSET_SAMPLES_PU.items():
    running_sum = []
    for key_type, values_type in values.items():
        running_sum.append(len(values_type["train"]))
    subset_sums[key] = running_sum

for subset_factor in [0.25, 0.5, 0.75]:
    for repetition in range(5):
        for ratio in [0, 1, 2, 4, 8]:
            if sum(subset_sums[f"subset-{subset_factor}-{repetition}-1:{ratio}"]) > complete_sums["complete-1:0"]:
                break
                
        # total positive - num positive 
        sample_num = complete_sums["complete-1:0"] - subset_sums[f"subset-{subset_factor}-{repetition}-1:{ratio}"][0]

        SUBSET_SAMPLES_PU[f"subset-{subset_factor}-{repetition}-1:c"] = {
            "positive" : {
                "train" : SUBSET_SAMPLES_PU[f"subset-{subset_factor}-{repetition}-1:{ratio}"]["positive"]["train"],
                "valid" : SUBSET_SAMPLES_PU[f"subset-{subset_factor}-{repetition}-1:{ratio}"]["positive"]["valid"],
            },
            "negative" : {
                "train" : numpy.random.choice(
                    SUBSET_SAMPLES_PU[f"subset-{subset_factor}-{repetition}-1:{ratio}"]["negative"]["train"],
                    size = min(sample_num, len(SUBSET_SAMPLES_PU[f"subset-{subset_factor}-{repetition}-1:{ratio}"]["negative"]["train"])),
                    replace = False
                ).tolist(),
                "valid" : SUBSET_SAMPLES_PU[f"subset-{subset_factor}-{repetition}-1:{ratio}"]["negative"]["valid"],
            }
        }

In [179]:
subset_sums = {}
for key, values in SUBSET_SAMPLES_PU.items():
    running_sum = []
    for key_type, values_type in values.items():
        running_sum.append(len(values_type["train"]))
    subset_sums[key] = running_sum
for key, values in subset_sums.items():
    if "1:c" in key:
        print(key, sum(values))

subset-0.25-0-1:c 9532
subset-0.25-1-1:c 9522
subset-0.25-2-1:c 9532
subset-0.25-3-1:c 9532
subset-0.25-4-1:c 9532
subset-0.5-0-1:c 9532
subset-0.5-1-1:c 9532
subset-0.5-2-1:c 9532
subset-0.5-3-1:c 9532
subset-0.5-4-1:c 9532
subset-0.75-0-1:c 9532
subset-0.75-1-1:c 9532
subset-0.75-2-1:c 9532
subset-0.75-3-1:c 9532
subset-0.75-4-1:c 9532


In [180]:
json.dump(SUBSET_SAMPLES_PU, open("../configs/training-samples_subset.json", "w"))

In [186]:
for key, values in SUBSET_SAMPLES_PU.items():
    print(key, len(values["positive"]["train"]), len(values["positive"]["valid"]), len(values["negative"]["train"]), len(values["negative"]["valid"]))

subset-0.25-0-1:0 3319 2639 0 0
subset-0.25-0-1:8 3319 2639 26552 0
subset-0.25-0-1:4 3319 2639 13276 0
subset-0.25-0-1:2 3319 2639 6638 0
subset-0.25-0-1:1 3319 2639 3319 0
subset-0.25-1-1:0 1058 2639 0 0
subset-0.25-1-1:8 1058 2639 8464 0
subset-0.25-1-1:4 1058 2639 4232 0
subset-0.25-1-1:2 1058 2639 2116 0
subset-0.25-1-1:1 1058 2639 1058 0
subset-0.25-2-1:0 1359 2639 0 0
subset-0.25-2-1:8 1359 2639 10872 0
subset-0.25-2-1:4 1359 2639 5436 0
subset-0.25-2-1:2 1359 2639 2718 0
subset-0.25-2-1:1 1359 2639 1359 0
subset-0.25-3-1:0 1427 2639 0 0
subset-0.25-3-1:8 1427 2639 11416 0
subset-0.25-3-1:4 1427 2639 5708 0
subset-0.25-3-1:2 1427 2639 2854 0
subset-0.25-3-1:1 1427 2639 1427 0
subset-0.25-4-1:0 3066 2639 0 0
subset-0.25-4-1:8 3066 2639 24528 0
subset-0.25-4-1:4 3066 2639 12264 0
subset-0.25-4-1:2 3066 2639 6132 0
subset-0.25-4-1:1 3066 2639 3066 0
subset-0.5-0-1:0 4094 2639 0 0
subset-0.5-0-1:8 4094 2639 32752 0
subset-0.5-0-1:4 4094 2639 16376 0
subset-0.5-0-1:2 4094 2639 8188 0

## Subset-0.25

In [55]:
import json
import copy
SAMPLES_PU = json.load(open("../configs/training-samples_complete_64.json", "r"))
NEURONS = list(set([sample["neuron"] for sample in SAMPLES_PU["complete-1:0"]["positive"]["train"]]))

In [56]:
numpy.random.seed(42)
random.seed(42)

def filter_neurons(samples_pu, subset):
    samples_pu = copy.deepcopy(samples_pu)
    
    out_pu = {}
    for key_pu, values_pu in samples_pu.items():
        if "1:0" in key_pu:
            continue
        key_pu = key_pu.replace("complete", "subset")
        out_pu[key_pu] = {}
        for key_type, values_type in values_pu.items():
            out_pu[key_pu][key_type] = {}
            for key_dataset, values_dataset in values_type.items():
                if key_dataset == "train":
                    out_pu[key_pu][key_type][key_dataset] = [value for value in values_dataset if value["neuron"] in subset]
                else:
                    out_pu[key_pu][key_type][key_dataset] = values_dataset
    return out_pu

SUBSET_SAMPLES_PU = {}
for subset_factor in [0.25]:
    for repetition in range(5):
        choices = numpy.random.choice(NEURONS, size=int(len(NEURONS) * subset_factor), replace=False)
        filtered = filter_neurons(SAMPLES_PU, choices)
        
        for key, values in filtered.items():
            key = key.replace("subset", f"subset-{subset_factor}-{repetition}")
            SUBSET_SAMPLES_PU[key] = values


In [57]:
print(SUBSET_SAMPLES_PU.keys())

dict_keys(['subset-0.25-0-1:64', 'subset-0.25-0-1:32', 'subset-0.25-0-1:16', 'subset-0.25-1-1:64', 'subset-0.25-1-1:32', 'subset-0.25-1-1:16', 'subset-0.25-2-1:64', 'subset-0.25-2-1:32', 'subset-0.25-2-1:16', 'subset-0.25-3-1:64', 'subset-0.25-3-1:32', 'subset-0.25-3-1:16', 'subset-0.25-4-1:64', 'subset-0.25-4-1:32', 'subset-0.25-4-1:16'])


In [58]:
json.dump(SUBSET_SAMPLES_PU, open("../configs/training-samples_subset_64.json", "w"))

In [10]:
import json
import copy
SAMPLES_PU = json.load(open("../configs/training-samples_complete_256.json", "r"))
NEURONS = list(set([sample["neuron"] for sample in SAMPLES_PU["complete-1:0"]["positive"]["train"]]))

In [11]:
numpy.random.seed(42)
random.seed(42)

def filter_neurons(samples_pu, subset):
    samples_pu = copy.deepcopy(samples_pu)
    
    out_pu = {}
    for key_pu, values_pu in samples_pu.items():
        if "1:0" in key_pu:
            continue
        key_pu = key_pu.replace("complete", "subset")
        out_pu[key_pu] = {}
        for key_type, values_type in values_pu.items():
            out_pu[key_pu][key_type] = {}
            for key_dataset, values_dataset in values_type.items():
                if key_dataset == "train":
                    out_pu[key_pu][key_type][key_dataset] = [value for value in values_dataset if value["neuron"] in subset]
                else:
                    out_pu[key_pu][key_type][key_dataset] = values_dataset
    return out_pu

SUBSET_SAMPLES_PU = {}
for subset_factor in [0.25]:
    for repetition in range(5):
        choices = numpy.random.choice(NEURONS, size=int(len(NEURONS) * subset_factor), replace=False)
        filtered = filter_neurons(SAMPLES_PU, choices)
        
        for key, values in filtered.items():
            key = key.replace("subset", f"subset-{subset_factor}-{repetition}")
            SUBSET_SAMPLES_PU[key] = values

In [12]:
json.dump(SUBSET_SAMPLES_PU, open("../configs/training-samples_subset_256.json", "w"))

# Repair HDF5

In [14]:
import h5py
from tqdm.auto import tqdm

PATH = "../data/80-20_calcium_dataset.h5"
with h5py.File(PATH, "r+") as file:
    for fold, fold_values in tqdm(file.items(), leave=False):
        for neuron, neuron_values in tqdm(fold_values.items(), leave=False):
            print(neuron)
            for cached in ['cache-input', 'cache-label', 'cache-unlabeled-input', 'cache-unlabeled-label']:
                if cached in neuron_values:
                    print(neuron_values.keys())
#                     del neuron_values[cached]
#             if "cache-unlabeled-input" in neuron_values:
#                 print(neuron_values["cache-unlabeled-input"].keys())
#                 del neuron_values["cache-unlabeled-input"]
#             print(neuron_values.keys())

  0%|          | 0/2 [00:00<?, ?it/s]

1
10
11
12
14
16
17
18
2
21
22
23
24
25
26
27
28
29
3
30
35
36
37
39
4
41
42
43
45
48
49
5
6
63
64
66
67
68
7
70
71
72
73
74
76
77
81
82
84
85
86
87
88
89
9
90
91
92


                                     

0
15
20
31
32
33
40
44
46
47
65
75
79
8
83




# Extract small crops only

In [3]:

import numpy
import h5py
import random
import copy
import json

from collections import defaultdict
from skimage import filters
from tqdm.auto import tqdm

MAX_PU = 8
PATH = "../data/80-20_calcium_dataset.h5"
numpy.random.seed(42)
random.seed(42)

SAMPLES_PU = json.load(open("../configs/training-samples_complete.json", "r"))
print(SAMPLES_PU.keys())

print(SAMPLES_PU["complete-1:8"]["positive"]["train"][0])

DATASET = "complete-1:8"

PATH = "../data/80-20_calcium_dataset.h5"
OUTPATH = f"../data/calcium_dataset_crops_{DATASET}_tmp.h5"
with h5py.File(OUTPATH, "w") as outfile:
    with h5py.File(PATH, "r") as file:
        for fold, fold_values in tqdm(file.items(), leave=False):
            out_fold = outfile.create_group(fold)
            for neuron, neuron_values in tqdm(fold_values.items(), leave=False):
                out_neuron = out_fold.create_group(neuron)
                out_neuron.create_group("input")
                out_neuron.create_group("label")                

            # Positive
            for event in tqdm(SAMPLES_PU[DATASET]["positive"][fold]):
                out_neuron = out_fold[str(event["neuron"])]
                out_neuron["input"].create_dataset(str(event["event-id"]), data=fold_values[str(event["neuron"])]["cache-input"][str(event["event-id"])], compression="gzip", compression_opts=4)
                out_neuron["label"].create_dataset(str(event["event-id"]), data=fold_values[str(event["neuron"])]["cache-label"][str(event["event-id"])], compression="gzip", compression_opts=4, dtype=numpy.uint8)                
                
            # Negative
            for event in tqdm(SAMPLES_PU[DATASET]["negative"][fold]):
                out_neuron = out_fold[str(event["neuron"])]
                event_id = str(event["coord"])
                out_neuron["input"].create_dataset(event_id, data=fold_values[str(event["neuron"])]["cache-unlabeled-input"][event_id], compression="gzip", compression_opts=4)
                out_neuron["label"].create_dataset(event_id, data=fold_values[str(event["neuron"])]["cache-unlabeled-label"][event_id], compression="gzip", compression_opts=4, dtype=numpy.uint8)                
                    
#                 for cached in ['cache-input', 'cache-label']:
#                     out_cached = out_neuron.create_group(cached.split("cache-")[-1])
                    
#                     for event_idx, event in neuron_values[cached].items():
#                         out_cached.create_dataset(event_idx, data=event, compression="gzip", compression_opts=4)

dict_keys(['complete-1:0', 'complete-1:8', 'complete-1:4', 'complete-1:2', 'complete-1:1'])
{'neuron': '1', 'event-id': 0}


  0%|          | 0/2 [00:00<?, ?it/s]

  0%|          | 0/58 [00:00<?, ?it/s]

  0%|          | 0/9532 [00:00<?, ?it/s]

  0%|          | 0/76256 [00:00<?, ?it/s]

  0%|          | 0/15 [00:00<?, ?it/s]

  0%|          | 0/2639 [00:00<?, ?it/s]

0it [00:00, ?it/s]

In [69]:
# OUTPATH = f"../data/calcium_dataset_crops_{DATASET}.h5"
with h5py.File("../data/database.hdf5", "w") as file:
    for fold in ["train", "valid"]:
        fold_group = file.create_group(fold)
        for neuron_id in range(5):
            neuron_group = fold_group.create_group(str(neuron_id))
            events = []
            for event_id in range(10):
                t, y, x = numpy.random.randint(10, size=3)
                events.append([event_id, t, t + 64, y, y + 64, x, x + 64])

            events = numpy.array(events, dtype=int)
            input = numpy.random.rand(128, 96, 96).astype(numpy.float32)
            label = (numpy.random.rand(128, 96, 96) > 0.9).astype(numpy.uint8)

            neuron_group.create_dataset("events", data=events)
            neuron_group.create_dataset("input", data=input)
            neuron_group.create_dataset("label", data=label)