# Overview of database files

Since we do not store the data files on GitHub it can be difficult to verify mirrors. He we will add a summary of all data files.

In [None]:
from herschelhelp_internal import git_version
print("This notebook was run with herschelhelp_internal version: \n{}".format(git_version()))
import datetime
print("This notebook was executed on: \n{}".format(datetime.datetime.now()))

In [35]:
from astropy.table import Table, Column
from astropy import units as u
import numpy as np

import glob
import hashlib

import os
import time

import yaml

In [27]:
SUFFIX = os.environ.get('SUFFIX', time.strftime("_%Y%m%d"))
MAKE_HASHES = False

## list of fits files

The information about the product is stored in a yml file which points to the various actual data files. Here we use a template.

In [2]:
fits_files = glob.glob('../dmu*/*/data/*.fits')
data_files = glob.glob('../dmu*/*/data/**/*.fits', recursive=True)

In [3]:
data_files

['../dmu31/dmu31_Examples/data/irac_i1_deeper_than_24_MOC.fits',
 '../dmu31/dmu31_Examples/data/irac_i1_less_deep_than_24_MOC.fits',
 '../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_combo_r_O16_MOC.fits',
 '../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_video_k_O16_MOC.fits',
 '../dmu4/dmu4_sm_CDFS-SWIRE/data/SERVS_SWIRE_COVERAGE_irac1_CDFS-SWIRE_MOC.fits',
 '../dmu4/dmu4_sm_CDFS-SWIRE/data/10_arcsec_holes_CDFS-SWIRE_O16_MOC.fits',
 '../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_gpc1_r_O16_MOC.fits',
 '../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_atlas_r_O16_MOC.fits',
 '../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_vhs_k_O16_MOC.fits',
 '../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_irac1_O16_MOC.fits',
 '../dmu4/dmu4_sm_Bootes/data/holes_Bootes_O16_MOC.fits',
 '../dmu4/dmu4_sm_ELAIS-S1/data/holes_ELAIS-S1_irac1_O16_20180122_MOC.fits',
 '../dmu4/dmu4_sm_ELAIS-S1/data/holes_ELAIS-S1_O16_MOC.fits',
 '../dmu4/dmu4_sm_ELAIS-S1/data/10_arcsec_holes_ELAIS-S1_O16_MOC.fits',


In [4]:
file_overview = Table()
file_overview.add_column(Column(data=data_files, name='data_filenames'))

In [5]:
file_overview

data_filenames
str113
../dmu31/dmu31_Examples/data/irac_i1_deeper_than_24_MOC.fits
../dmu31/dmu31_Examples/data/irac_i1_less_deep_than_24_MOC.fits
../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_combo_r_O16_MOC.fits
../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_video_k_O16_MOC.fits
../dmu4/dmu4_sm_CDFS-SWIRE/data/SERVS_SWIRE_COVERAGE_irac1_CDFS-SWIRE_MOC.fits
../dmu4/dmu4_sm_CDFS-SWIRE/data/10_arcsec_holes_CDFS-SWIRE_O16_MOC.fits
../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_gpc1_r_O16_MOC.fits
../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_atlas_r_O16_MOC.fits
../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_vhs_k_O16_MOC.fits
../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_irac1_O16_MOC.fits


In [6]:


def hash_bytestr_iter(bytesiter, hasher, ashexstr=False):
    for block in bytesiter:
        hasher.update(block)
    return (hasher.hexdigest() if ashexstr else hasher.digest())

def file_as_blockiter(afile, blocksize=65536):
    with afile:
        block = afile.read(blocksize)
        while len(block) > 0:
            yield block
            block = afile.read(blocksize)
            

In [7]:
if MAKE_HASHES:
    hashes = [(fname, hash_bytestr_iter(file_as_blockiter(open(fname, 'rb')), hashlib.sha256()))
        for fname in file_overview['data_filenames']]
    file_overview.add_column(Column(data=[i[1] for i in hashes], name='hashes'))

In [25]:
file_overview


data_filenames,hashes
str113,bytes32
../dmu31/dmu31_Examples/data/irac_i1_deeper_than_24_MOC.fits,"�\N}��k������� ���	��R�ҡ�,��"
../dmu31/dmu31_Examples/data/irac_i1_less_deep_than_24_MOC.fits,�!Oq_t�� \/�CS'�	~��.(��2��
../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_combo_r_O16_MOC.fits,��@���q!��e����t>��|Te
../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_video_k_O16_MOC.fits,��D f��!�4;���S�@̷k�s�I�FZ�
../dmu4/dmu4_sm_CDFS-SWIRE/data/SERVS_SWIRE_COVERAGE_irac1_CDFS-SWIRE_MOC.fits,^ы3��Z �#�H���K�fr�ړ�;B5.oZ�
../dmu4/dmu4_sm_CDFS-SWIRE/data/10_arcsec_holes_CDFS-SWIRE_O16_MOC.fits,��� �z�b�!籠@��p�av�#po�cPc�
../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_gpc1_r_O16_MOC.fits,"������""�:7�q�ԝLE���@��cI�Aȋ�"
../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_atlas_r_O16_MOC.fits,e�nG[Na�2@��ŭv�!��������.�
../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_vhs_k_O16_MOC.fits,�ڪ�*L8Zd}c�#�F=1��W���9�
../dmu4/dmu4_sm_CDFS-SWIRE/data/holes_CDFS-SWIRE_irac1_O16_MOC.fits,�D.�Ko.@��Q9��B]�kɚ2e�a#�6�&g�


In [29]:
file_overview.write('file_overview{}.fits'.format(SUFFIX), overwrite=True)
file_overview.write('file_overview{}.csv'.format(SUFFIX), overwrite=True)

In [30]:
yml_files = glob.glob('../**/*.yml', recursive=True)

In [31]:
yml_files

['../dmu2/meta_main.yml',
 '../dmu5/dmu5_gaia_flagging_script/meta_main.yml',
 '../meta/meta_main.yml',
 '../dmu14/dmu14_GALEX-GCAT/meta_survey.yml',
 '../dmu14/dmu14_GALEX-GCAT/meta_main.yml',
 '../dmu13/dmu13-jvla-cosmos-3-ghz-cat-ids-smolcic-2017/meta_main.yml',
 '../dmu13/dmu13-lockman-hole-project-1.4-ghz-cat-ids/meta_main.yml',
 '../dmu22/meta_main.yml',
 '../dmu24/dmu24_filters/meta_main.yml',
 '../dmu12/dmu12-lofar-bootes-150-mhz-cat-ids-2017/meta_survey.yml',
 '../dmu12/dmu12-lofar-bootes-150-mhz-cat-ids-2017/meta_main.yml',
 '../dmu32/columns.yml',
 '../dmu32/dmu32_COSMOS/meta_main.yml',
 '../dmu32/dmu32_ELAIS-S1/meta_main.yml',
 '../dmu32/dmu32_GAMA-09/meta_main.yml',
 '../dmu32/dmu32_Lockman-SWIRE/meta_main.yml',
 '../dmu32/dmu32_ELAIS-N2/meta_main.yml',
 '../dmu32/dmu32_Herschel-Stripe-82/meta_main.yml',
 '../dmu32/dmu32_SA13/meta_main.yml',
 '../dmu32/dmu32_GAMA-12/meta_main.yml',
 '../dmu32/dmu32_GAMA-15/meta_main.yml',
 '../dmu32/dmu32_AKARI-SEP/meta_main.yml',
 '../dmu

In [32]:
file_overview[0]

data_filenames,hashes
str113,bytes32
../dmu31/dmu31_Examples/data/irac_i1_deeper_than_24_MOC.fits,"�\N}��k������� ���	��R�ҡ�,��"
