## With this notebook you can create a dataset similar to the cta_1dc inside $GAMMAPY_EXTRA/test_datasets/cta_1dc.

**NOTE**: The notebook uses the simulated data from the [simulation_disk.ipynb](simulation_disk.ipynb) notebook. REMEMBER to simulate the data using that notebook before using this one.


This notebook just uses to simulated data to create the proper fits files (obs-index.fits.gz and hdu-index.fits.gz).

The default name for the simulated data is sim\_events\_######.fits, where # is a number. So **PLEASE** use this naming convention, otherwise this notebook won't work.

In [1]:
!mkdir dataset_test
!mkdir dataset_test/data
!cp -r $CALDB dataset_test/
!ls -ltr dataset_test/

mkdir: cannot create directory ‘dataset_test’: File exists
mkdir: cannot create directory ‘dataset_test/data’: File exists
total 8
drwxr-xr-x. 3 fermi-cta fermi-cta   18 25 lug 12.40 caldb
-rw-rw-r--. 1 fermi-cta fermi-cta  913 26 lug 16.04 hdu-index.fits.gz
-rw-rw-r--. 1 fermi-cta fermi-cta 1052 26 lug 16.04 obs-index.fits.gz
drwxrwxr-x. 2 fermi-cta fermi-cta  276 26 lug 19.49 data


#### Copy simulated data with the [notebook](simulation_disk.ipynb) into the folder dataset_test/data

In [2]:
!cp sim_events_00000* dataset_test/data

We just rename some files so that they can be used with the gammapy notebook.

In [3]:
%%bash
cp dataset_test/caldb/data/cta/prod2/bcf/North_0.5h/irf_file.fits.gz dataset_test/caldb/data/cta/prod2/bcf/North_0.5h/irf_file.fits 
cp dataset_test/caldb/data/cta/prod2/bcf/North_5h/irf_file.fits.gz dataset_test/caldb/data/cta/prod2/bcf/North_5h/irf_file.fits 
cp dataset_test/caldb/data/cta/prod2/bcf/North_50h/irf_file.fits.gz dataset_test/caldb/data/cta/prod2/bcf/North_50h/irf_file.fits 
cp dataset_test/caldb/data/cta/prod2/bcf/South_0.5h/irf_file.fits.gz dataset_test/caldb/data/cta/prod2/bcf/South_0.5h/irf_file.fits 
cp dataset_test/caldb/data/cta/prod2/bcf/South_5h/irf_file.fits.gz dataset_test/caldb/data/cta/prod2/bcf/South_5h/irf_file.fits 
cp dataset_test/caldb/data/cta/prod2/bcf/South_50h/irf_file.fits.gz dataset_test/caldb/data/cta/prod2/bcf/South_50h/irf_file.fits 


In [4]:
!ls -l dataset_test/caldb/data/cta/prod2/bcf/*

dataset_test/caldb/data/cta/prod2/bcf/North_0.5h:
total 800
-rw-r--r--. 1 fermi-cta fermi-cta 406879 27 lug 00.14 irf_file.fits
-rw-r--r--. 1 fermi-cta fermi-cta 406879 27 lug 00.13 irf_file.fits.gz

dataset_test/caldb/data/cta/prod2/bcf/North_50h:
total 784
-rw-r--r--. 1 fermi-cta fermi-cta 401149 27 lug 00.14 irf_file.fits
-rw-r--r--. 1 fermi-cta fermi-cta 401149 27 lug 00.13 irf_file.fits.gz

dataset_test/caldb/data/cta/prod2/bcf/North_5h:
total 792
-rw-r--r--. 1 fermi-cta fermi-cta 404611 27 lug 00.14 irf_file.fits
-rw-r--r--. 1 fermi-cta fermi-cta 404611 27 lug 00.13 irf_file.fits.gz

dataset_test/caldb/data/cta/prod2/bcf/South_0.5h:
total 800
-rw-r--r--. 1 fermi-cta fermi-cta 405893 27 lug 00.14 irf_file.fits
-rw-r--r--. 1 fermi-cta fermi-cta 405893 27 lug 00.13 irf_file.fits.gz

dataset_test/caldb/data/cta/prod2/bcf/South_50h:
total 784
-rw-r--r--. 1 fermi-cta fermi-cta 399254 27 lug 00.14 irf_file.fits
-rw-r--r--. 1 fermi-cta fermi-cta 399254 27 lug 00.13

In [5]:
!ls -l dataset_test

total 8
drwxr-xr-x. 3 fermi-cta fermi-cta   18 25 lug 12.40 caldb
drwxrwxr-x. 2 fermi-cta fermi-cta  276 26 lug 19.49 data
-rw-rw-r--. 1 fermi-cta fermi-cta  913 26 lug 16.04 hdu-index.fits.gz
-rw-rw-r--. 1 fermi-cta fermi-cta 1052 26 lug 16.04 obs-index.fits.gz


In [6]:
import gammalib
import ctools
import cscripts 

import numpy as np
import glob

from astropy.io import fits 
from astropy import units as u
from astropy.coordinates import SkyCoord

#### In order to create the hdu-index.fits.gz, we use as an example the one from the $GAMMAPY_EXTRA folder.

In [7]:
!cp -r $GAMMAPY_EXTRA/test_datasets/cta_1dc .

In [8]:
obs_ind = 'cta_1dc/hdu-index.fits.gz'
hdulist_obs = fits.open(obs_ind)
paste_header_hdu = hdulist_obs[1].header

paste_header_hdu['DATASET'] = "Dataset for the Sexten School"
#create columns

# count the number of sim_events_000*'fits files
obs_number = len(glob.glob('dataset_test/data/sim_events_*'))

data_1_hdu = np.array([[i]*6 for i in range(1,obs_number+1)]).reshape(6*obs_number)

temp_2 = ['events','gti','aeff','edisp','psf','bkg']
data_2_hdu = np.array([temp_2 for i in range(1,obs_number+1)]).reshape(6*obs_number)

temp_3 = ['events','gti','aeff_2d','edisp_2d','psf_3gauss','bkg_3d']
data_3_hdu = np.array([temp_3 for i in range(1,obs_number+1)]).reshape(6*obs_number)

temp_4 = ['data']*2
temp_4.extend(["caldb/data/cta/prod2/bcf/South_50h"]*4)
data_4_hdu = np.array([temp_4 for i in range(1,obs_number+1)]).reshape(6*obs_number)


temp_5 = ['irf_file.fits']*4
data_5_hdu = np.array([["sim_events_"+str(i).rjust(6,'0') +".fits"] *2 + temp_5 for i in range(1,obs_number+1)]).reshape(6*obs_number)

temp_6 = ['EVENTS','EVENTS','EFFECTIVE AREA','ENERGY DISPERSION','POINT SPREAD FUNCTION','BACKGROUND']
data_6_hdu = np.array([temp_6 for i in range(1,obs_number+1)]).reshape(6*obs_number)


col1_hdu = fits.Column(name='OBS_ID', format='K', array=data_1_hdu)
col2_hdu = fits.Column(name='HDU_TYPE', format='6A', array=data_2_hdu)
col3_hdu = fits.Column(name='HDU_CLASS', format='10A', array=data_3_hdu)
col4_hdu = fits.Column(name='FILE_DIR', format='39A', array=data_4_hdu)
col5_hdu = fits.Column(name='FILE_NAME', format='26A', array=data_5_hdu)
col6_hdu = fits.Column(name='HDU_NAME', format='21A', array=data_6_hdu)

cols_hdu = fits.ColDefs([col1_hdu, col2_hdu, col3_hdu, col4_hdu, col5_hdu, col6_hdu])
tbhdu_hdu = fits.BinTableHDU.from_columns(cols_hdu, header = paste_header_hdu)

prihdr_hdu = fits.Header()
prihdu_hdu = fits.PrimaryHDU(header=prihdr_hdu)

thdulist_hdu = fits.HDUList([prihdu_hdu, tbhdu_hdu])

fitsname_hdu = "dataset_test/hdu-index.fits.gz"

thdulist_hdu.writeto(fitsname_hdu , overwrite=True)

In [9]:
# Print additional informations regarding the fits file
# Just for testing purposes

obs_ind = 'cta_1dc/obs-index.fits.gz'
hdulist_obs = fits.open(obs_ind)
paste_header_hdu = hdulist_obs[1].header

print hdulist_obs[1].columns.names

['OBS_ID', 'RA_PNT', 'DEC_PNT', 'GLON_PNT', 'GLAT_PNT', 'ZEN_PNT', 'ALT_PNT', 'AZ_PNT', 'ONTIME', 'LIVETIME', 'DEADC', 'TSTART', 'TSTOP', 'DATE_OBS', 'TIME_OBS', 'DATE_END', 'TIME_END', 'EVENTS_FILENAME', 'EVENT_COUNT', 'EVENT_TIME_MIN', 'EVENT_TIME_MAX', 'EVENT_ENERGY_MIN', 'EVENT_ENERGY_MAX']


## CREATE DATASET
### After haveing created and moved the data to the proper folder, we inspect the informations from the obs-index.fits.gz and the hdu-index.fita.gz files from the dataset taken from the gammapy-extra test-dataset/cta_1dc folder.


In [10]:
data, hdr = fits.getdata('cta_1dc/obs-index.fits.gz', 1, header=True)
test_row = data[0:1]
out_file = "dataset_test/obs-index.fits.gz"

#fits.writeto(out_file, test_row, hdr, overwrite = True)

#get number of file fits from simulations
obs_number = len(glob.glob('dataset_test/data/sim_events_*'))

# these are constant over all the data
test_row['ZEN_PNT']  = 0.0
test_row['ALT_PNT']  = 90.0
test_row['AZ_PNT']   = 0.0
out_list = []

#fits.writeto(out_file, data[1:1],  hdr, overwrite = True)
for i in range(1,obs_number+1):
    
    obs_name = 'data/sim_events_'+str(i).rjust(6,'0')+'.fits'
    obs_ind = 'dataset_test/'+obs_name
    hdulist_obs = fits.open(obs_ind)
    paste_header_hdu = hdulist_obs[1].header

    test_row['OBS_ID']           = i
    test_row['RA_PNT']           = paste_header_hdu["RA_PNT"]
    test_row['DEC_PNT']          = paste_header_hdu["DEC_PNT"]
    c_icrs = SkyCoord(ra=float(test_row['RA_PNT'])*u.degree, dec=float(test_row['DEC_PNT'])*u.degree, frame='icrs')
    
    test_row['GLON_PNT']         = c_icrs.galactic.l.value
    test_row['GLAT_PNT']         = c_icrs.galactic.b.value
    test_row['ONTIME']           = paste_header_hdu["ONTIME"]
    test_row['LIVETIME']         = paste_header_hdu["LIVETIME"]
    test_row['DEADC']            = paste_header_hdu["DEADC"]
    test_row['TSTART']           = paste_header_hdu["TSTART"]
    test_row['TSTOP']            = paste_header_hdu["TSTOP"]
    test_row['DATE_OBS']         = paste_header_hdu["DATE_OBS"]
    test_row['TIME_OBS']         = paste_header_hdu["TIME_OBS"]
    test_row['DATE_END']         = paste_header_hdu["DATE_END"]
    test_row['TIME_END']         = paste_header_hdu["TIME_END"]
    test_row['EVENTS_FILENAME']  = obs_name
    test_row['EVENT_COUNT']      = hdulist_obs[1].header["NAXIS2"]
    test_row['EVENT_TIME_MIN']   = test_row['TSTART']
    test_row['EVENT_TIME_MAX']   = test_row['TSTOP']
    test_row['EVENT_ENERGY_MIN'] = np.min(hdulist_obs[1].data['ENERGY'])
    test_row['EVENT_ENERGY_MIN'] = np.max(hdulist_obs[1].data['ENERGY'])
    out_list.append(test_row.tolist())

test_numpy = np.array(out_list, dtype = test_row.dtype)
fits.writeto(out_file, test_numpy, hdr, overwrite = True)

In [11]:
!ls dataset_test/

caldb  data  hdu-index.fits.gz	obs-index.fits.gz


In [12]:
!ls dataset_test/data

sim_events_000001.fits	sim_events_000004.fits	sim_events_000007.fits
sim_events_000002.fits	sim_events_000005.fits	sim_events_000008.fits
sim_events_000003.fits	sim_events_000006.fits	sim_events_000009.fits
