In [1]:
from __future__ import print_function
from os.path import join
import time

import numpy as np
import pandas as pd
import pyxis as px
from astropy.io import fits

In [2]:
def normalize(samples):
    for par, values in samples.items():
        if par in par_ranges.keys():
            low, high = par_ranges[par]
            values -= low
            values /= high-low

In [3]:
par_ranges = \
{
    'g1': [-0.5, 0.5],
    'g2': [-0.5, 0.5],
    'sin_theta': [-1, 1],
    'cos_theta': [-1, 1],
    'sini': [0, 1],
    'v0': [-30, 30],
    'vcirc': [60, 540],
    'rscale': [0.1, 10],
    'hlr': [0.1, 5],
}

In [5]:
n = 4000
data_dir = '/xdisk/timeifler/wxs0703/kl_nn/train_data_massive/'
samp_dir = '/xdisk/timeifler/wxs0703/kl_nn/samples/samples_massive.csv'
save_dir = '/xdisk/timeifler/wxs0703/kl_nn/train_data_massive/train_database_sincos'
samples = pd.read_csv(samp_dir)
samples.insert(3, 'sin_theta', np.sin(samples['theta_int']))
samples.insert(4, 'cos_theta', np.cos(samples['theta_int']))
samples = samples.drop('theta_int', axis='columns')
samples.to_csv('/xdisk/timeifler/wxs0703/kl_nn/samples/samples_massive_sincos.csv', index=False)

In [7]:
n = 4000
data_dir = '/xdisk/timeifler/wxs0703/kl_nn/train_data_massive/'
samp_dir = '/xdisk/timeifler/wxs0703/kl_nn/samples/samples_massive_sincos.csv'
save_dir = '/xdisk/timeifler/wxs0703/kl_nn/train_data_massive/train_database_sincos'
samples = pd.read_csv(samp_dir)
normalize(samples)

with px.Writer(dirpath=save_dir, map_size_limit=200000, ram_gb_limit=8) as db:
    
    for index in range(250):
        start = time.time()
        folder = index+1
        img_stack = np.full((n, 1, 48, 48), 0.)
        spec_stack = np.full((n, 1, 3, 64), 0.)
        fids = np.full((n, 9), 0.)
        start_id = index*n
        ids = np.arange(start_id, start_id+n, dtype=np.uint64)

        for i in range(n):
            
            ID = start_id + i

            with fits.open(join(data_dir, f'temp_{folder}/training_{ID}.fits')) as hdu:

                img = hdu[7].data
                img /= np.max(img)
                img_stack[i, 0] = img

                specs = np.full((3, 64), 0.)
                for j in range(3):
                    spec = hdu[2*j+1].data
                    specs[j, :spec.shape[0]] = spec
                specs /= np.max(specs)
                spec_stack[i, 0] = specs

                fids[i] = np.array(samples.iloc[ID])[1:]
                
        db.put_samples({'img': img_stack,
                        'spec': spec_stack,
                        'fid_pars': fids,
                        'id': ids})
        t = round(time.time() - start, 2)
        
        print(f'folder {folder} complete, {t} seconds')

folder 1 complete, 21.74 seconds
folder 2 complete, 21.85 seconds
folder 3 complete, 22.05 seconds
folder 4 complete, 23.62 seconds
folder 5 complete, 22.82 seconds
folder 6 complete, 29.29 seconds
folder 7 complete, 22.49 seconds
folder 8 complete, 22.73 seconds
folder 9 complete, 22.13 seconds
folder 10 complete, 23.61 seconds
folder 11 complete, 23.71 seconds
folder 12 complete, 22.06 seconds
folder 13 complete, 21.36 seconds
folder 14 complete, 22.22 seconds
folder 15 complete, 21.89 seconds
folder 16 complete, 31.22 seconds
folder 17 complete, 23.0 seconds
folder 18 complete, 22.47 seconds
folder 19 complete, 22.71 seconds
folder 20 complete, 21.85 seconds
folder 21 complete, 21.41 seconds
folder 22 complete, 20.02 seconds
folder 23 complete, 20.71 seconds
folder 24 complete, 21.21 seconds
folder 25 complete, 22.51 seconds
folder 26 complete, 22.69 seconds
folder 27 complete, 22.76 seconds
folder 28 complete, 21.66 seconds
folder 29 complete, 22.0 seconds
folder 30 complete, 22.49

In [8]:
n = 4000
data_dir = '/xdisk/timeifler/wxs0703/kl_nn/test_data/'
samp_dir = '/xdisk/timeifler/wxs0703/kl_nn/samples/samples_test_sincos.csv'
save_dir = '/xdisk/timeifler/wxs0703/kl_nn/test_data/test_database_sincos'
samples = pd.read_csv(samp_dir)
normalize(samples)
with px.Writer(dirpath=save_dir, map_size_limit=200000, ram_gb_limit=8) as db:
    
    for index in range(25):
        start = time.time()
        folder = index+1
        img_stack = np.full((n, 1, 48, 48), 0.)
        spec_stack = np.full((n, 1, 3, 64), 0.)
        fids = np.full((n, 9), 0.)
        start_id = index*n
        ids = np.arange(start_id, start_id+n, dtype=np.uint64)

        for i in range(n):
            
            ID = start_id + i

            with fits.open(join(data_dir, f'temp_{folder}/testing_{ID}.fits')) as hdu:

                img = hdu[7].data
                img /= np.max(img)
                img_stack[i, 0] = img

                specs = np.full((3, 64), 0.)
                for j in range(3):
                    spec = hdu[2*j+1].data
                    specs[j, :spec.shape[0]] = spec
                specs /= np.max(specs)
                spec_stack[i, 0] = specs

                fids[i] = np.array(samples.iloc[ID])[1:]
                
        db.put_samples({'img': img_stack,
                        'spec': spec_stack,
                        'fid_pars': fids,
                        'id': ids})
        t = round(time.time() - start, 2)
        
        print(f'folder {folder} complete, {t} seconds')

folder 1 complete, 19.41 seconds
folder 2 complete, 20.16 seconds
folder 3 complete, 20.19 seconds
folder 4 complete, 20.88 seconds
folder 5 complete, 20.11 seconds
folder 6 complete, 20.17 seconds
folder 7 complete, 20.04 seconds
folder 8 complete, 20.1 seconds
folder 9 complete, 20.0 seconds
folder 10 complete, 20.01 seconds
folder 11 complete, 20.42 seconds
folder 12 complete, 29.41 seconds
folder 13 complete, 20.07 seconds
folder 14 complete, 20.13 seconds
folder 15 complete, 20.47 seconds
folder 16 complete, 19.91 seconds
folder 17 complete, 28.56 seconds
folder 18 complete, 20.36 seconds
folder 19 complete, 19.87 seconds
folder 20 complete, 20.28 seconds
folder 21 complete, 20.22 seconds
folder 22 complete, 20.02 seconds
folder 23 complete, 20.05 seconds
folder 24 complete, 20.18 seconds
folder 25 complete, 20.14 seconds


In [11]:
save_dir = '/xdisk/timeifler/wxs0703/kl_nn/train_data_massive/train_database_sincos'

In [2]:
save_dir = '/xdisk/timeifler/wxs0703/kl_nn/test_data/test_database_sincos'

In [3]:
with px.Reader(save_dir) as db:
    print(db[0])

{'img': array([[[0.00021841, 0.00024102, 0.00025512, ..., 0.00027297,
         0.00025589, 0.00023666],
        [0.00024109, 0.00025562, 0.00026219, ..., 0.00029171,
         0.0002751 , 0.00025696],
        [0.00025535, 0.00026285, 0.00027575, ..., 0.00031473,
         0.00029351, 0.00027566],
        ...,
        [0.00027566, 0.00029351, 0.00031473, ..., 0.00027575,
         0.00026285, 0.00025535],
        [0.00025696, 0.0002751 , 0.00029171, ..., 0.00026219,
         0.00025562, 0.00024109],
        [0.00023666, 0.00025589, 0.00027297, ..., 0.00025512,
         0.00024102, 0.00021841]]]), 'spec': array([[[0.04684712, 0.06184555, 0.06375093, 0.06383767, 0.06386585,
         0.06389358, 0.06392134, 0.06394916, 0.06397707, 0.06400508,
         0.06403322, 0.06406151, 0.06408998, 0.06411864, 0.06414747,
         0.06417646, 0.06420556, 0.06423473, 0.06426393, 0.06429311,
         0.06432225, 0.06435129, 0.06438028, 0.06441475, 0.06463894,
         0.06817626, 0.10113642, 0.26119026, 0.