This notebook present procedures to download the spCFrame of given fibers and create the BOSS Feature Table.

### Initialization

In [1]:
import operator as op

#### Bossdata

In [2]:
import bossdata
print(bossdata.__version__)

0.3.1


In [3]:
import bossdata.path
import bossdata.remote
import bossdata.meta as meta
import bossdata.spec as spec
import bossdata.plate as plate

#### Astropy

In [4]:
import astropy.io.fits as fits
from astropy.table import unique,Table

#### Pandas

In [5]:
import pandas as pd

#### Numpy

In [6]:
import numpy as np

#### HDPF5
Hierarchical Data Format

In [7]:
import h5py

#### Environment

In [8]:
import os
workdir = os.environ['SCRATCH']
os.environ["BOSS_DATA_URL"] = 'http://dr12.sdss3.org'
os.environ["BOSS_LOCAL_ROOT"] = os.path.join(workdir,'sdss')
os.environ["BOSS_SAS_PATH"] = '/sas/dr12/boss'
os.environ["BOSS_REDUX_VERSION"]='v5_7_0'

In [9]:
finder = bossdata.path.Finder()
mirror = bossdata.remote.Manager()

In [10]:
os.chdir(workdir)

In [11]:
os.getcwd()

'/scratch/kunjias'

In [12]:
workdir

'/scratch/kunjias'

### spAll

In [14]:
spAll_path = finder.get_sp_all_path(lite=False)

In [15]:
spAll_path

'/sas/dr12/boss/spectro/redux/v5_7_0/spAll-v5_7_0.fits'

In [14]:
spAll_file = mirror.get(spAll_path, progress_min_size=2, auto_download=True, local_paths=None)

In [15]:
spAll_file

'/scratch/kunjias/sdss/sas/dr12/boss/spectro/redux/v5_7_0/spAll-v5_7_0.fits'

In [16]:
local_path = 'home/kunjias/scratch'

In [17]:
meta.create_meta_full(catalog_path = spAll_file,db_path= local_path,verbose=True, primary_key='(PLATE,MJD,FIBER)')

  elif np.issubdtype(dtype, np.float):
  elif np.issubdtype(dtype, np.str) or np.issubdtype(dtype, np.bytes_):
Writing 100%|#################################################################|


### Bossdata.meta Database
Initialize searchable databse of BOSS observation metadata.

In [13]:
spAll= bossdata.meta.Database(finder=finder, mirror=mirror, lite=False, quasar_catalog=False, quasar_catalog_name=None, platelist=False, verbose=False)

In [14]:
columns = spAll.prepare_columns('*')

In [18]:
#columns

Select out all plates from the database.

In [16]:
all_plate = spAll.select_all(what='PLATE', max_rows=None)

In [20]:
#all_plate

In [21]:
unique_plate = unique(all_plate, keys='PLATE')

In [22]:
plate_list = np.array(unique_plate['PLATE'])

In [23]:
len(plate_list)

2407

There are 2407 unique plates in total, let's randomly sample 1000 from them. The RandomState is initialized first for reproducibility.

In [52]:
np.random.seed(5)
kilo_plate = np.random.choice(plate_list, size=1000,replace = False)

In [55]:
print(kilo_plate)

[6162 4713 3983 7132 3658 6271 6380 6521 4614 3878 4492 4067 5961 4688
 6005 6117 5414 5122 4178 6382 4734 5449 6200 4046 7261 6502 5284 5064
 5211 4549 5147 4501 4541 7083 5860 3968 7256 4446 4276 6497 3828 6018
 5161 6147 6881 4961 3754 7239 4975 4761 6199 7446 6974 5882 6195 4608
 5028 7097 3924 3818 7284 4565 7140 3698 4665 5424 4836 4377 5198 5854
 6394 5959 6830 5716 5159 5030 6120 4760 4473 6975 6270 4267 6803 6119
 4791 6805 3760 6509 6438 5207 6794 5040 6806 6426 7124 3836 5050 6790
 6259 4381 4768 6511 3931 5442 3775 4366 5041 4790 6623 6036 6286 4273
 5468 4269 4616 6787 3871 5875 5904 5851 4859 4457 5978 4988 7294 4323
 5988 3588 7257 6388 5786 4872 3971 4721 5052 5729 7104 4490 5797 4383
 4287 3834 6713 3655 6822 6692 6676 5330 3691 3873 6273 6308 6417 6295
 3872 6309 3868 3851 6030 4684 5381 6291 6134 4550 5156 4621 4733 6832
 5059 4695 3820 6179 6062 6317 4661 6470 3866 6464 5167 4264 5896 7141
 4481 6643 6505 3675 7339 4449 6167 3830 6465 7108 4275 6661 6375 3676
 4508 

List of paltes previously randomly sampled.

In [71]:
print(kilo_plate[0:10155])

[4506 5020 5399 6694 3663 6466 3671 5060 3843 6479 4666 6179 4034 4501
 6282 6975 6029 5959 4087 3859 5131 5779 6681 7384 6673 6206 5294 7329
 3778 6252 4544 5039 4462 4806 4296 4885 6317 6750 4360 5141 4237 4218
 5801 7277 6983 4025 4618 5425 5419 5487 5324 4550 4011 6748 4788 5132
 5126 6429 5777 4552 5955 4301 4979 4576 4993 6500 4196 7044 5486 6521
 4229 5795 7150 4571 5342 4960 7446 4031 5782 5109 7148 7280 5806 5314
 6297 3841 5106 4753 4978 4505 6300 6792 6810 6030 6025 3807 3872 6816
 7054 5189 4317 3980 7055 5902 5946 4488 5743 6318 5459 6874 4467 5724
 4881 5190 4787 6256 5323 6369 3830 4560 4312 4478 4066 5483 5813 5950
 6931 6202 4463 6276 3812 6114 4849 4995 5771 3934 5369 6825 4175 5334
 5158 4798 4441 5773 7132 5134 4869 5454 7128 4242 5321 4010 5210 6178
 4808 5780 6379 5007 6692 7089 5064 5186 5120 3820 5494 7085 6716 5285
 7336 5985 4053 4875 4643 6686 4090 6418 7136 5470 4320 5408 5434 4012
 5144 7168 5110 7161 4546 7082 3963 5886 6002 6195 4743 4866 6656 5304
 4872 

In [57]:
kilo_plate_table = pd.DataFrame(kilo_plate)

Fetch results of feature information using SQL Select Query.

| name  | description | file source |
|------ |-----------| -----------|
| XFOCAL | Hole x-axis position in focal plane (mm) | plPlugMap |
| YFOCAL | Hole y-axis position in focal plane (mm) | plPlugMap |
| PLATE | Plate ID | spZbest |
| MJD   | Modified Julian date of observation | spZbest |
| FIBER | Fiber ID | spZbest |
| RA | Right ascension of telescope boresights(deg) | photoObj |
| DEC | Declination of telescope boresight (deg) | photoObj |
| OBJTYPE | Why this Object was targetted (Note that if this field says QSO, it could be the case that this object would have been targetted as a GALAXY or any number of other categories as well. )| spZbest |
| AIRMASS_0 | Airmass at time of observation | photoObj |
| AIRMASS_1 |-- | --|
| AIRMASS_2 | --| --|
| AIRMASS_3 |-- |-- |
| AIRMASS_4 |-- |-- |

| SEEING 20 |                                   |
| SEEING 50 |                                     |
| SEEING 80 |                                      |

In [59]:
columns_names ='PLATE,MJD, XFOCAL,YFOCAL,FIBER,RA,DEC,OBJTYPE,AIRMASS_0,AIRMASS_1,AIRMASS_2,AIRMASS_3,AIRMASS_4'

In [60]:
columns_names

'PLATE,MJD, XFOCAL,YFOCAL,FIBER,RA,DEC,OBJTYPE,AIRMASS_0,AIRMASS_1,AIRMASS_2,AIRMASS_3,AIRMASS_4'

In [62]:
features_spAll = spAll.select_all(what = columns_names,where=None,sort=None,max_rows=None)

In [64]:
# Turn Astropy Table into Pandas
features_data = features_spAll.to_pandas()

In [65]:
features_data.head()

Unnamed: 0,PLATE,MJD,XFOCAL,YFOCAL,FIBER,RA,DEC,OBJTYPE,AIRMASS_0,AIRMASS_1,AIRMASS_2,AIRMASS_3,AIRMASS_4
0,3586,55181,290.408325,-100.942619,1,9.331912,-0.462955,GALAXY,1.393166,1.402038,1.384556,1.388828,1.397564
1,3586,55181,290.107849,-136.131714,2,9.330078,-0.624116,GALAXY,1.396166,1.405074,1.387519,1.391808,1.400582
2,3586,55181,312.33844,-53.472435,3,9.432106,-0.245182,GALAXY,1.195513,1.196438,1.19472,1.1951,1.195959
3,3586,55181,311.341522,-74.466339,4,9.427406,-0.341407,SPECTROPHOTO_STD,1.196825,1.197752,1.19603,1.196411,1.197272
4,3586,55181,298.426758,-73.963768,5,9.368675,-0.339224,GALAXY,1.196795,1.197722,1.196,1.196381,1.197242


In [66]:
# Object types of all fibers
features_data['OBJTYPE'].unique()

array(['GALAXY', 'SPECTROPHOTO_STD', 'SKY', 'QSO', 'NA'], dtype=object)

Select out the sky fibers

In [67]:
sky_fibers = features_data[(features_data['OBJTYPE'] == 'SKY')]

In [68]:
#sky_fibers

In [69]:
sky_fibers.head()

Unnamed: 0,PLATE,MJD,XFOCAL,YFOCAL,FIBER,RA,DEC,OBJTYPE,AIRMASS_0,AIRMASS_1,AIRMASS_2,AIRMASS_3,AIRMASS_4
11,3586,55181,305.141815,-30.619471,12,9.399483,-0.140439,SKY,0.0,0.0,0.0,0.0,0.0
13,3586,55181,311.175537,-9.341264,14,9.426984,-0.042847,SKY,0.0,0.0,0.0,0.0,0.0
25,3586,55181,267.770142,-53.615555,26,9.228573,-0.246457,SKY,1.19553,1.196456,1.194738,1.195118,1.195977
45,3586,55181,230.368332,-218.913437,46,9.056336,-1.00375,SKY,0.0,0.0,0.0,0.0,0.0
47,3586,55181,225.774734,-207.749695,48,9.035569,-0.952847,SKY,0.0,0.0,0.0,0.0,0.0


Randomly select 1000 sky fibers to retreive their exposure information.

Random state is set to 5 to for reproducibility.

In [107]:
kilo_sky_r5 = sky_fibers.sample(n=1000,replace=False,weights=None, random_state=5, axis=0)

In [127]:
kilo_sky_r5.head()

Unnamed: 0,PLATE,MJD,XFOCAL,YFOCAL,FIBER,RA,DEC,OBJTYPE,AIRMASS_0,AIRMASS_1,AIRMASS_2,AIRMASS_3,AIRMASS_4
169565,3844,55321,-93.327362,140.945984,566,180.40334,0.647407,SKY,0.0,0.0,0.0,0.0,0.0
1293639,5399,55956,-119.386322,150.76239,640,185.17342,12.159917,SKY,0.0,0.0,0.0,0.0,0.0
1758203,6190,56210,60.237095,-182.178345,204,6.769615,11.811333,SKY,0.0,0.0,0.0,0.0,0.0
2208918,6822,56711,148.396866,174.016495,919,204.21051,64.690726,SKY,0.0,0.0,0.0,0.0,0.0
2461169,7451,56739,86.115021,-190.730453,170,120.39544,-0.875818,SKY,0.0,0.0,0.0,0.0,0.0


Initialize empty exposure masked array of shape (1,4128) using the first row in the sampled kilo-sky table. [plate 3586, mjd 55181, exposure 0, fiber 12].

In [72]:
plate3586_combined_plan_path = finder.get_plate_plan_path(plate=3586, mjd =55181, combined=True)
plate3586_combined_plan_file = mirror.get(plate3586_combined_plan_path, progress_min_size=2,auto_download=True, local_paths=None)
plate3586_combined_plan = plate.Plan(plate3586_combined_plan_file)

plate3586_exposures = plate3586_combined_plan.num_science_exposures
plate3586_fiber12_spectrograph_index = plate3586_combined_plan.get_spectrograph_index(12)

plate3586_exp0_spCFrame_path = plate3586_combined_plan.get_exposure_name(sequence_number=0, band='red',fiber=12,ftype='spCFrame')
plate3586_exp0_spCFrame_file = mirror.get(plate3586_exp0_spCFrame_path)

plate3586_exp0_frame = plate.FrameFile(plate3586_exp0_spCFrame_file,index=plate3586_fiber12_spectrograph_index,calibrated=True)
plate3586_valid_spec = plate3586_exp0_frame.get_valid_data(fibers=[12],pixel_quality_mask=None, include_wdisp=False, include_sky=True, use_ivar=True, use_loglam=False)

empty_data_plate3586 = np.ma.empty_like(plate3586_valid_spec)
empty_data_plate3586.shape

(1, 4128)

In [73]:
plate3586_exposures

7

In [74]:
plate3586_fiber12_spectrograph_index

1

In [75]:
exposure = empty_data_plate3586

for n in range(plate3586_exposures):
    try:
        exp_spCFrame_path = plate3586_combined_plan.get_exposure_name(sequence_number = n,band='red',fiber=12,ftype='spCFrame')
        exp_spCFrame_file = mirror.get(exp_spCFrame_path)
        exp_frame = plate.FrameFile(exp_spCFrame_file, index =plate3586_fiber12_spectrograph_index, calibrated = True)
        valid_spec = exp_frame.get_valid_data(fibers = [12],pixel_quality_mask=None, include_wdisp=False, include_sky=True, use_ivar=True, use_loglam=False)
        exposure = np.ma.append(exposure,valid_spec,0)
    except Exception:
        pass
    
print(exposure.shape)

(8, 4128)


Retrieve exposure data for each one of the 1000 randomly selected sky fiber. The first row of sample empty data will be removed.

In [111]:
def get_valid_data(plate_number, mjd, fiber_number):
    '''Compute masked array of valid data from a given plate at specific mjd and exposure time.
    
    Parameters
    -----------
    plate_number: Specified plate number. Default plate number is 4042.
    
    mjd: modified julian date of the observation associated with input plate. Default mjd is 55626.
    
    fiber_number: fiber number to identify which spectrograph to retreive the exposure data,
    1-500 for spectrograph 1, and 501-1000 for spectrograph 2.
    
    Output
    -------
    A masked array of valid data of the input plate at specified exposure.
    '''
    # Get local path to plan file
    combined_plan_path = finder.get_plate_plan_path(plate_number,mjd, combined=True)
    combined_plan_file = mirror.get(combined_plan_path, progress_min_size=2,auto_download=True, local_paths=None)
    # Configuring BOSS pipeline to combine exposures of a single plate
    combined_plan = plate.Plan(combined_plan_file)
    # Number of Science Exposure
    exposures = combined_plan.num_science_exposures
    # Spectrograph Index
    spectrograph_index = combined_plan.get_spectrograph_index(fiber_number)
    
    exposure_data = empty_data_plate3586
    
    for n in range(exposures):
        try:
            exp_spCFrame_path = combined_plan.get_exposure_name(sequence_number=n,band='red',fiber=fiber_number,ftype='spCFrame')
            exp_spCFrame_file = mirror.get(exp_spCFrame_path)
            exp_frame = plate.FrameFile(exp_spCFrame_file, index = spectrograph_index, calibrated = True)
            valid_spec = exp_frame.get_valid_data(fibers =[fiber_number],pixel_quality_mask=None, include_wdisp=False, include_sky=True, use_ivar=True, use_loglam=False)
        
            exposure_data = np.ma.append(exposure_data,valid_spec,0)
        except Exception:
            pass
    
    print("Plate", plate_number, "fiber", fiber_number,"has", exposures, "exposures")
    return np.delete(exposure_data,0,0), exposures

In [112]:
plate3586_fiber12 = get_valid_data(plate_number=3586, mjd=55181, fiber_number=12)

Plate 3586 fiber 12 has 7 exposures


In [113]:
plate3586_fiber12[0].shape

(7, 4128)

In [114]:
plate3586_fiber12[1]

7

In [115]:
type(plate3586_fiber12)

tuple

First row in the kilo-sky table. (Plate 3844, fiber 566)

In [116]:
# Exposure Data
plate3844_fiber566 = get_valid_data(plate_number=3844, mjd=55321, fiber_number=566)

Plate 3844 fiber 566 has 4 exposures


In [117]:
plate3844_fiber566[0].shape

(4, 4128)

There are 4 available exposures from red band of plate 3844, spectrograph 1. All 4 of the exposures are successfully looped through, giving a valid data matrix of shape (4,4128). 

Similar procedures will be applied to all sky fibers from the sampled kilo-sky and retrieved their exposure information.

The corresponding exposure index can be added as a 13th feature to the table, and the exposure valid data columns can be added as additional 4128 features.

The valid data masked array contains 4 fields, 'wavelength', 'flux','ivar', and 'subtracted sky'. The total flux is sum of 'flux' and 'subtracted sky'.

In [118]:
def get_field(valid_data):
    "Compute masked array of wavelength and flux from a given masked array of valid data."
    wavelength = valid_data['wavelength']
    sdss_flux = valid_data['flux']
    sdss_sky = valid_data['sky']
    total_flux = np.ma.array(sdss_flux.data+sdss_sky.data, mask=list(map(op.and_,sdss_flux.mask,sdss_sky.mask)))
    ivar = valid_data['ivar']
    
    yield wavelength
    yield sdss_flux
    yield sdss_sky
    yield total_flux
    yield ivar

A function to create a list of 4128 column names.

In [119]:
def column_index(column_name,n):
    column=[]
    for i in range(1,n+1):
        a = '{}{}'.format(column_name,i)
        column.append(a)
    return column

Above is the exposure information table for plate 3844, fiber 566 including 13 features and all exposure valid data across 4 exposures.

Such information is to be collected for all sky fibers in the sampled kilo-sky.

In [120]:
def masked_dataframe(masked_array, name):
    ''' name: string. Name of column index
    
    Return: pandas table of masked_array
    '''
    column_name = column_index(name, 4128)

    pandas_table = pd.DataFrame(masked_array, columns = column_name).mask(masked_array.mask)
    pandas_table.insert(4128, 'exposure_index', np.arange(4))
    pandas_table = pandas_table.set_index('exposure_index')
    
    return pandas_table

In [122]:
def make_dataframe(masked_array, name):
    ''' name: string. Name of column index
    
    Return: pandas table of masked_array
    '''
    column_name = column_index(name, 4128)

    pandas_table = pd.DataFrame(masked_array, columns = column_name).mask(masked_array.mask)
    
    return pandas_table

In [123]:
def make_featureframe(feature_row, exposures):
    
    feature_table = pd.concat([feature_row] * exposures)
    feature_table.insert(13, 'exposure_index',np.arange(exposures))
    feature_table = feature_table.set_index('exposure_index')
    
    return feature_table

In [124]:
def data_collection(kilo_sky):
    
    fiber_feature0 = kilo_sky.take([0])
    
    plate_number0 = fiber_feature0.iloc[0,0]
    mjd0 = fiber_feature0.iloc[0,1]
    fiber_number0 = fiber_feature0.iloc[0,4]
        
    valid_info0 = get_valid_data(plate_number0,mjd0,fiber_number0)
    
    valid_data0 = valid_info0[0]
    wavelength0, sdss_flux0, sdss_sky0, total_flux0, inverse_variance0 = get_field(valid_data0)
        
    exposures0 = valid_info0[1]
    
    feature_table0 = make_featureframe(fiber_feature0, exposures0)
        
    wavelength_table0 = make_dataframe(wavelength0,'wavelength')
    sdss_flux_table0 = make_dataframe(sdss_flux0,'sdss_flux')
    sdss_sky_table0 = make_dataframe(sdss_sky0, 'sdss_sky')
    total_flux_table0 = make_dataframe(total_flux0,'total_flux')
    inverse_variance_table0 = make_dataframe(inverse_variance0,'inverse_variance')
    
    frames0 = [feature_table0, wavelength_table0, sdss_flux_table0, sdss_sky_table0, total_flux_table0, inverse_variance_table0]
    
    fiber_data0 = pd.concat(frames0, axis=1)
        
    for n in range(1,kilo_sky.shape[0]):
        fiber_feature = kilo_sky.take([n])
    
        plate_number = fiber_feature.iloc[0,0]
        mjd = fiber_feature.iloc[0,1]
        fiber_number = fiber_feature.iloc[0,4]
        
        valid_info = get_valid_data(plate_number,mjd,fiber_number)
    
        valid_data = valid_info[0]
        wavelength, sdss_flux, sdss_sky, total_flux, inverse_variance = get_field(valid_data)
        
        exposures = valid_info[1]
    
        feature_table = make_featureframe(fiber_feature, exposures)
        
        wavelength_table = make_dataframe(wavelength,'wavelength')
        sdss_flux_table = make_dataframe(sdss_flux,'sdss_flux')
        sdss_sky_table = make_dataframe(sdss_sky, 'sdss_sky')
        total_flux_table = make_dataframe(total_flux,'total_flux')
        inverse_variance_table = make_dataframe(inverse_variance,'inverse_variance')
    
        frames = [feature_table, wavelength_table, sdss_flux_table, sdss_sky_table, total_flux_table, inverse_variance_table]
    
        fiber_data = pd.concat(frames, axis=1)
        
        fiber_data0 = pd.concat([fiber_data0, fiber_data],axis=0)
        
    return fiber_data0.reset_index()

In [125]:
kilo_sky_r5_data = data_collection(kilo_sky_r5)

Plate 3844 fiber 566 has 4 exposures
Plate 5399 fiber 640 has 8 exposures
Plate 6190 fiber 204 has 4 exposures
Plate 6822 fiber 919 has 11 exposures
Plate 7451 fiber 170 has 13 exposures
Plate 7315 fiber 34 has 6 exposures
Plate 5384 fiber 264 has 6 exposures
Plate 7316 fiber 757 has 5 exposures
Plate 5956 fiber 878 has 7 exposures
Plate 4734 fiber 592 has 4 exposures
Plate 5119 fiber 176 has 4 exposures
Plate 4241 fiber 240 has 10 exposures
Plate 3873 fiber 668 has 3 exposures
Plate 6386 fiber 246 has 6 exposures
Plate 3794 fiber 544 has 18 exposures
Plate 4983 fiber 82 has 6 exposures
Plate 4184 fiber 828 has 4 exposures
Plate 3833 fiber 122 has 8 exposures
Plate 4901 fiber 495 has 7 exposures
Plate 4480 fiber 304 has 6 exposures
Plate 5880 fiber 432 has 4 exposures
Plate 3688 fiber 62 has 5 exposures
Plate 7417 fiber 986 has 5 exposures
Plate 7258 fiber 880 has 6 exposures
Plate 4977 fiber 164 has 3 exposures
Plate 6032 fiber 78 has 3 exposures
Plate 5785 fiber 148 has 5 exposures
P

In [128]:
kilo_sky_r5_data.head()

Unnamed: 0,index,PLATE,MJD,XFOCAL,YFOCAL,FIBER,RA,DEC,OBJTYPE,AIRMASS_0,...,inverse_variance4119,inverse_variance4120,inverse_variance4121,inverse_variance4122,inverse_variance4123,inverse_variance4124,inverse_variance4125,inverse_variance4126,inverse_variance4127,inverse_variance4128
0,0,3844,55321,-93.327362,140.945984,566,180.40334,0.647407,SKY,0.0,...,4611232.0,3.3382400000000004e-33,0.0,0.0,0.0,-2.216037e-35,3.064008e-06,0.0,0.0,-0.029867
1,1,3844,55321,-93.327362,140.945984,566,180.40334,0.647407,SKY,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,3844,55321,-93.327362,140.945984,566,180.40334,0.647407,SKY,0.0,...,4611232.0,3.3382400000000004e-33,0.0,0.0,0.0,-2.216037e-35,3.064008e-06,0.0,0.0,-0.029867
3,3,3844,55321,-93.327362,140.945984,566,180.40334,0.647407,SKY,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0,5399,55956,-119.386322,150.76239,640,185.17342,12.159917,SKY,0.0,...,7.174648e-43,-3.249857e+18,28537474.0,-1.079683e-15,12.765937,-1.816097e-37,9.403955e-38,0.0,1.3563159999999999e-19,0.0


In [133]:
kilo_sky_r5_data.rename(columns={'index':'exposure_index'}, inplace=True)

In [134]:
kilo_sky_r5_data.head()

Unnamed: 0,exposure_index,PLATE,MJD,XFOCAL,YFOCAL,FIBER,RA,DEC,OBJTYPE,AIRMASS_0,...,inverse_variance4119,inverse_variance4120,inverse_variance4121,inverse_variance4122,inverse_variance4123,inverse_variance4124,inverse_variance4125,inverse_variance4126,inverse_variance4127,inverse_variance4128
0,0,3844,55321,-93.327362,140.945984,566,180.40334,0.647407,SKY,0.0,...,4611232.0,3.3382400000000004e-33,0.0,0.0,0.0,-2.216037e-35,3.064008e-06,0.0,0.0,-0.029867
1,1,3844,55321,-93.327362,140.945984,566,180.40334,0.647407,SKY,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2,3844,55321,-93.327362,140.945984,566,180.40334,0.647407,SKY,0.0,...,4611232.0,3.3382400000000004e-33,0.0,0.0,0.0,-2.216037e-35,3.064008e-06,0.0,0.0,-0.029867
3,3,3844,55321,-93.327362,140.945984,566,180.40334,0.647407,SKY,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0,5399,55956,-119.386322,150.76239,640,185.17342,12.159917,SKY,0.0,...,7.174648e-43,-3.249857e+18,28537474.0,-1.079683e-15,12.765937,-1.816097e-37,9.403955e-38,0.0,1.3563159999999999e-19,0.0


Save as HDF5 File.

In [145]:
kilo_sky_r5_data.to_hdf('kilo_sky_r5_features.hdf5', key='df', mode='w')

In [146]:
! ls

home			kilo_sky_r5_features.hdf5  sdss
kilo_sky_features.hdf5	qso.dat			   sdss.building
