In [1]:
import os
import glob
import argparse
import datetime
import numpy as np

from precovery.config import DefaultConfig
from precovery.precovery_db import PrecoveryDatabase
import precovery

ModuleNotFoundError: No module named 'regex'

In [2]:
out_dir = '/mnt/DE384D6B384D43AD/code/precovery_db/nsc_test_new_indexing_mk2' # folder to which the database will be written
data_dir = '/mnt/DE384D6B384D43AD/code/precovery_db/nsc_datafiles/shorter'
# healpix nside tesselation. See https://healpy.readthedocs.io/en/latest/
# We use an nside of at least 256 operationally (this is about ~8000000 pixels on the sky)
#  but 16 is faster
nside = 16
# Max file size in bytes. I set this low to test the file splitting.
data_file_max_size = 1073741 
dataset_name = "NOIRLab Source Catalog (DR2)"
dataset_id= "NSC_DR2"
reference_doi= "https://doi.org/10.3847/1538-3881/abd6e1"
documentation_url= "https://datalab.noirlab.edu/nscdr2/index.php"
sia_url= "https://datalab.noirlab.edu/sia/nsc_dr2"

In [3]:
files = sorted(glob.glob(os.path.join(data_dir, "*.h5")))

print(f"Found {len(files)} observation files in {data_dir}:")
for f in files:
    print(f"\t{os.path.basename(f)}")

# for connecting to an existing database
# db = PrecoveryDatabase.from_dir(out_dir)


db = PrecoveryDatabase.create(
    out_dir,
    nside=nside,
    data_file_max_size=data_file_max_size
)

status_file = os.path.join(out_dir, "files_indexed.txt")
if not os.path.exists(status_file):
    read_files = []
    np.savetxt(status_file, read_files, fmt="%s", delimiter='\n')
else:
    read_files = np.loadtxt(status_file, dtype=str, delimiter='\n', ndmin=1).tolist()

time_start = datetime.datetime.now()
for i, observations_file in enumerate(files):
    print(f"Processing ({i + 1}/{len(files)}): {observations_file}")
    if observations_file not in set(read_files):
        db.frames.load_hdf5(
            observations_file, 
            dataset_id,
            name=dataset_name,
            reference_doi=reference_doi,
            documentation_url=documentation_url,
            sia_url=sia_url
        )
        read_files.append(observations_file)
        np.savetxt(status_file, read_files, fmt="%s", delimiter='\n')
    else:
        print(f"File has been indexed previously.")

time_end = datetime.datetime.now()
duration = (time_end - time_start)
print(f"All files indexed in {duration}.")

Found 2 observation files in /mnt/DE384D6B384D43AD/code/precovery_db/nsc_datafiles/shorter:
	nsc_dr2_observations_2018-06-01_2018-07-02.h5
	nsc_dr2_observations_2018-07-02_2018-08-02.h5
Processing (1/2): /mnt/DE384D6B384D43AD/code/precovery_db/nsc_datafiles/shorter/nsc_dr2_observations_2018-06-01_2018-07-02.h5


Output()

Processing (2/2): /mnt/DE384D6B384D43AD/code/precovery_db/nsc_datafiles/shorter/nsc_dr2_observations_2018-07-02_2018-08-02.h5


Output()

All files indexed in 0:05:48.730835.


In [5]:
db.frames.data_files

{}