-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
64fea6e
commit e768f28
Showing
4 changed files
with
369 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,98 @@ | ||
import numpy as np | ||
import json | ||
|
||
def flatten(output, item): | ||
if 'path' in item: | ||
return output + [item] | ||
if not isinstance(item, dict): | ||
return output | ||
return reduce(flatten, item.values(), output) | ||
|
||
def values(res, key, nest=[], **keywords): | ||
name = '{}_{}_{}'.format(*key) | ||
# Format the nested paths | ||
paths = '/'.join(nest) | ||
# add values to dictionary | ||
if 'path' in keywords: | ||
fmt = keywords['path'] | ||
# Add the full formatted path | ||
full = fmt.format(paths, *key) | ||
keywords['path'] = full | ||
# Add the position list | ||
keywords['nest'] = nest + [name] | ||
# add to parent dictionary | ||
res[name] = keywords | ||
|
||
def add(res, keys, nest=[], **keywords): | ||
targets = res.keys() | ||
if not targets: | ||
for key in keys: | ||
# add values to this level | ||
values(res, key, nest, **keywords) | ||
else: | ||
for target in targets: | ||
# Add to the list of nest levels | ||
nested = nest + [target] | ||
# add next levels to this level | ||
add(res[target], keys, nested, **keywords) | ||
|
||
def add_all(result, shape, **keywords): | ||
# Add all possible keys at this level | ||
all_keys = np.where(np.ones(shape)) | ||
add(result, zip(*all_keys), **keywords) | ||
# Get the result | ||
return result | ||
|
||
|
||
def nester(tile_shape, group_shapes): | ||
|
||
result = {} | ||
file_fmt = 'data/{0}/raw_{1:04d}_{2:04d}_{3:04d}.h5' | ||
|
||
# Add all the folder levels | ||
for shape in group_shapes: | ||
result = add_all(result, shape) | ||
# Add all the tile endpoints | ||
result = add_all(result, tile_shape, path=file_fmt) | ||
|
||
# Get full result | ||
return result | ||
|
||
class Simplifier(): | ||
def __init__(self, nesting): | ||
self.nesting = nesting | ||
|
||
def simplify(self, item): | ||
nests = [n.split('_') for n in item['nest']] | ||
offset = (self.nesting * np.uint32(nests)).sum(0) | ||
return { | ||
'path': item['path'], | ||
'offset': list(offset), | ||
} | ||
|
||
def main(out_file, tile_shape, group_shapes): | ||
# Get file path structures | ||
nested = nester(tile_shape, group_shapes) | ||
listed = reduce(flatten, nested.values(), []) | ||
|
||
# Get the sizes for each nesting | ||
levels = [tile_shape] + group_shapes[:-1] | ||
# Get the cumulative effect of each layer | ||
nesting = np.cumprod(levels, axis=0) | ||
nesting = np.r_[nesting[::-1], [[1,1,1]]] | ||
# Simplify | ||
simp = Simplifier(nesting) | ||
output = map(simp.simplify, listed) | ||
|
||
# Document the model | ||
with open(out_file,'w') as fd: | ||
json.dump(output, fd, indent=4) | ||
|
||
if __name__ == '__main__': | ||
out_file = 'dataset.json' | ||
tile_shape = [10,10,10] | ||
group_shapes = [ | ||
[2,2,2], | ||
] | ||
main(out_file, tile_shape, group_shapes) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
import make_nested | ||
import test_nested | ||
import numpy as np | ||
import shutil | ||
import json | ||
import os | ||
|
||
def get_filename(tile_shape, group_shape): | ||
t_n = np.prod(tile_shape) | ||
g_n = np.prod(group_shape) | ||
return '{}_in_{}.json'.format(t_n, g_n) | ||
|
||
def make_one(shapes): | ||
tile_shape, group_shape = shapes | ||
out_file = get_filename(tile_shape, group_shape) | ||
return make_nested.main(out_file, tile_shape, [group_shape]) | ||
|
||
def test_one(shapes, trials, full_shape, file_block): | ||
if os.path.exists('data'): | ||
shutil.rmtree('data') | ||
tile_shape, group_shape = shapes | ||
in_file = get_filename(tile_shape, group_shape) | ||
return test_nested.main(in_file, trials, full_shape, file_block) | ||
|
||
if __name__ == '__main__': | ||
tile_group_shapes = [ | ||
# [ [1,1,1], [2,2,2] ], | ||
# [ [2,2,2], [2,2,2] ], | ||
# [ [4,4,4], [2,2,2] ], | ||
[ [1,1,1], [1,1,1] ], | ||
[ [1,1,2], [1,1,1] ], | ||
[ [1,2,2], [1,1,1] ], | ||
[ [1,2,4], [1,1,1] ], | ||
[ [1,4,4], [1,1,1] ], | ||
# [ [8,8,8], [1,1,1] ], | ||
] | ||
|
||
# Make both json files | ||
map(make_one, tile_group_shapes) | ||
|
||
trials = 2 | ||
full_shape = [8, 8192, 8192] | ||
file_blocks = [ | ||
[1, 1, 1], | ||
[1, 1, 2], | ||
[1, 2, 2], | ||
[1, 2, 4], | ||
[1, 4, 4], | ||
[1, 4, 8], | ||
[1, 8, 8], | ||
[1, 16, 8], | ||
[1, 16, 16], | ||
[1, 32, 16], | ||
[1, 32, 32], | ||
] | ||
speeds = [] | ||
# Make and test hdf5 files | ||
for t_g in tile_group_shapes: | ||
for f_b in file_blocks: | ||
# Set a max cutoff at 1024 blocks | ||
n_blocks = np.prod(f_b * np.multiply(*t_g)) | ||
if n_blocks > 1024: | ||
continue | ||
# Get the speed for the requested sizes | ||
speed = test_one(t_g, trials, full_shape, f_b) | ||
if not speed: | ||
continue | ||
speed.update({ | ||
'n_folders': t_g[1], | ||
'n_trials': trials, | ||
}) | ||
speeds.append(speed) | ||
msg = """ | ||
mean of {n_trials} trials: {mean_time} for | ||
{n_tiles_kji} tiles from {n_files_zyx} files | ||
over {n_folders} folders.""".format(**speed) | ||
print(msg) | ||
|
||
# Document the model | ||
speed_fmt = 'speed_{}_{}_{}.json' | ||
speed_out = speed_fmt.format(*full_shape) | ||
# Write the model to json | ||
with open(speed_out, 'w') as fd: | ||
json.dump(speeds, fd, indent=4) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
import json | ||
|
||
def n_key_filter(n, key): | ||
def trial_filter(trial): | ||
return n == np.prod(trial[key]) | ||
return trial_filter | ||
|
||
if __name__ == '__main__': | ||
|
||
full_shape = [8, 8192, 8192] | ||
in_fmt = 'speed_{}_{}_{}.json' | ||
in_file = in_fmt.format(*full_shape) | ||
|
||
# Load the model | ||
with open(in_file,'r') as fd: | ||
trials = json.load(fd) | ||
|
||
##### | ||
# TEST 1 | ||
# Plot time by number of h5 files used | ||
# Plot lines for every different number of tiles | ||
##### | ||
fig, ax = plt.subplots() | ||
# Filter only for trials with files in one folder | ||
in_one_folder = n_key_filter(1, 'n_folders') | ||
one_folder = filter(in_one_folder, trials) | ||
# Filter for trials with 1, 8, 64, or 512 files | ||
for n_files in [1, 2, 4, 8, 16]: | ||
has_n_files = n_key_filter(n_files, 'n_files_zyx') | ||
n_file_dicts = filter(has_n_files, one_folder) | ||
# Now, get number of tiles and time | ||
def get_tile_times(d): | ||
n_tiles = np.prod(d['n_tiles_kji']) | ||
mean_time = d['mean_time'] | ||
return [n_tiles, mean_time] | ||
tile_times = map(get_tile_times, n_file_dicts) | ||
cause, result = zip(*tile_times) | ||
ax.plot(cause, result, label = '{} files'.format(n_files)) | ||
|
||
legend = ax.legend(loc='upper left') | ||
# Set the fontsize | ||
for label in legend.get_texts(): | ||
label.set_fontsize('large') | ||
|
||
# Save to file | ||
plt.ylabel('time (seconds)') | ||
plt.xlabel('total number of tiles') | ||
title = 'time to load {}x{}x{} hdf5'.format(*full_shape) | ||
plt.title(title) | ||
plt.xscale('log', basex=2) | ||
plt.savefig('out.png') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
from numpy.random import randint | ||
import numpy as np | ||
import json | ||
import h5py | ||
import time | ||
import os | ||
|
||
class Maker(): | ||
def __init__(self, size, dtype): | ||
self.size = size | ||
self.dtype = dtype | ||
|
||
def make(self, filedata): | ||
filename = filedata['path'] | ||
# Make the parent directory | ||
directory = os.path.dirname(filename) | ||
if not os.path.exists(directory): | ||
os.makedirs(directory) | ||
# Get the datatype, noise range, and size | ||
dtype = getattr(np, 'uint{}'.format(self.dtype)) | ||
dmax = 2 ** self.dtype | ||
dsize = self.size | ||
# Create the file from a path | ||
with h5py.File(filename, 'w') as fd: | ||
# Make a random uint array | ||
pattern = randint(dmax, size= dsize, dtype= dtype) | ||
fd.create_dataset('stack', data= pattern) | ||
|
||
class Hasher(): | ||
def __init__(self): | ||
self.offsets = [] | ||
@property | ||
def shape(self): | ||
max_offset = np.max(self.offsets,0) | ||
return (max_offset + 1) | ||
|
||
def hash(self, hashed, filedata): | ||
offset = filedata['offset'] | ||
# Track all the offsets | ||
self.offsets.append(offset) | ||
# Map the offset to the path | ||
hashed[str(offset)] = filedata['path'] | ||
return hashed | ||
|
||
class Tester(): | ||
def __init__(self, files, zyx_shape, kji_shape): | ||
self.zyx_shape = np.array(zyx_shape) | ||
self.kji_shape = np.array(kji_shape) | ||
self.int_shape = np.uint32(kji_shape) | ||
self.files = files | ||
|
||
def load(self, path, start): | ||
with h5py.File(path, 'r') as fd: | ||
dataset = fd[fd.keys()[0]] | ||
k0,j0,i0 = start | ||
k1,j1,i1 = start + np.uint32(self.int_shape) | ||
v = dataset[k0:k1,j0:j1,i0:i1] | ||
|
||
def test(self, kji): | ||
# Get xyz tile file | ||
kji_full = kji * self.kji_shape | ||
zyx = np.floor(kji_full / self.zyx_shape) | ||
zyx_full = np.uint32(zyx * self.zyx_shape) | ||
# Get file path and file start | ||
file_key = str(list(np.uint32(zyx))) | ||
file_path = self.files.get(file_key) | ||
if not file_path: | ||
raise AttributeError | ||
file_start = np.uint32(kji_full - zyx_full) | ||
# Load file from path and start | ||
self.load(file_path, file_start) | ||
|
||
def main(in_file, trials, full_shape, block_shape): | ||
# Load the model | ||
with open(in_file,'r') as fd: | ||
listed = json.load(fd) | ||
|
||
# Hash the files | ||
hasher = Hasher() | ||
model = reduce(hasher.hash, listed, {}) | ||
tile_shape = np.uint32(hasher.shape) | ||
|
||
# Get the tile shape | ||
zyx_shape = full_shape / tile_shape | ||
# Get the block shape | ||
kji_shape = zyx_shape / block_shape | ||
dtype = 8 | ||
|
||
# Make the files | ||
make = Maker(zyx_shape, dtype) | ||
map(make.make, listed) | ||
|
||
# Get all kji blocks | ||
kji_range = full_shape // kji_shape | ||
kji_count = np.prod(tile_shape*block_shape) | ||
# Ensure consistencey | ||
if np.prod(kji_range) != kji_count: | ||
return {} | ||
# Get all the block indexes | ||
all_kji = zip(*np.where(np.ones(kji_range))) | ||
|
||
# Test the files | ||
time_results = [] | ||
test = Tester(model, zyx_shape, kji_shape) | ||
# Run the testing | ||
for t in range(trials): | ||
time_start = time.time() | ||
map(test.test, all_kji) | ||
time_end = time.time() | ||
# Record time difference | ||
time_diff = time_end - time_start | ||
time_results.append(time_diff) | ||
|
||
return { | ||
'time': time_results, | ||
'mean_time': float(np.mean(time_results)), | ||
'zyx_shape': map(int, zyx_shape), | ||
'kji_shape': map(int, kji_shape), | ||
'n_files_zyx': map(int, tile_shape), | ||
'n_tiles_kji': map(int, kji_range), | ||
'in_file': str(in_file), | ||
} | ||
|
||
|
||
if __name__ == '__main__': | ||
|
||
block_shape = [1, 2, 2] | ||
in_file = 'dataset.json' | ||
full_shape = np.uint32([20, 10000, 10000]) | ||
|
||
main(in_file, full_shape, block_shape) | ||
|
||
|