In [1]:
import os
import sys

import numpy as np
import matplotlib
%matplotlib inline
import matplotlib.pyplot as plt
from matplotlib import colors
from hydra import initialize, compose
import pickle
from glob import glob
from pprint import pprint
from tqdm import tqdm
import h5py
import csv

# set paths to various directories based on the machine this code is being executed on
with initialize(version_base=None, config_path='config'):
    config = compose(config_name='config.yaml')  # overrides=['machine=uzay']

array_dir, data_dir, figure_dir, pickle_dir, repo_dir  = config.machine.array_dir, config.machine.data_dir, config.machine.figure_dir, config.machine.pickle_dir, config.machine.repo_dir

# enable use of local modules
if repo_dir not in sys.path:
    sys.path.append(repo_dir)

# set matplotlib style
plt.style.use(f'{repo_dir}/mejiro/mplstyle/science.mplstyle')

from mejiro.lenses.test import SampleStrongLens
from mejiro.lenses.strong_lens import StrongLens
from mejiro.plots import diagnostic_plot, plot, plot_util, overplot
from mejiro.analysis import stats
from mejiro.utils import util
from mejiro.helpers import gs

# generate `.hdf5` file

In [None]:
output_file = os.path.join(repo_dir, 'data.hdf5')

In [26]:
image_paths = glob(f'{config.machine.dir_05}/*.npy')
subhalo_paths = glob(f'{config.machine.dir_02}/subhalos/*')

image_paths.sort()
subhalo_paths.sort()

images = [np.load(f) for f in image_paths]
subhalos = [util.unpickle(f) for f in subhalo_paths]

In [29]:
with h5py.File(output_file, 'w') as hf:
    image_dataset = hf.create_dataset('images', data=images)
    # subhalo_dataset = hf.create_dataset('subhalos', data=subhalos)

    # set attributes
    hf.attrs['n_images'] = len(images)
    
    for key, value in util.hydra_to_dict(config.pipeline).items():
        hf.attrs[key] = value

In [32]:
with h5py.File(output_file, 'r') as hf:
    print(list(hf.keys()))
    print(list(hf.attrs))

['images']
['band', 'exposure_time', 'final_pixel_side', 'grid_oversample', 'los_normalization', 'max_scene_size', 'n_images', 'num_pix', 'num_samples', 'seed', 'side', 'subhalo_cone', 'suppress_output']


# generate `.csv`

In [2]:
output_csv = os.path.join(repo_dir, 'data.csv')

In [3]:
lens_paths = glob(f'{config.machine.dir_03}/lens_*')
lens_paths.sort()

In [4]:
with open(output_csv, 'w') as f:
    writer = csv.writer(f)
    writer.writerow(StrongLens.get_csv_headers())

    for lens_path in tqdm(lens_paths):
        lens = util.unpickle(lens_path)
        writer.writerow(lens.csv_row())

  0%|          | 0/1019 [00:00<?, ?it/s]

100%|██████████| 1019/1019 [00:10<00:00, 97.07it/s]
