<!--BOOK_INFORMATION-->
<img align="left" style="padding-right:10px;" src="figures/xveganx_icon.png">
*This notebook contains ongoing research material from [gully](https://twitter.com/gully_); the content is available [on GitHub](https://github.com/BrownDwarf/xveganx).*


<!--NAVIGATION-->
< [Metadata](01.00-Metadata.ipynb) | [Contents](Index.ipynb) | [Period, Spectral Type, and multiplicity](01.02-Grankin2008_period_info.ipynb) >

# IGRINS FITS files

In this Notebook we make a table of all the local fits files and their header info.  This metadata will be useful for associating which-FITS-file-is-which, and making it easier to search for trends across FITS headers.

In [1]:
import matplotlib.pyplot as plt
import numpy as np
from astropy.io import fits

In [2]:
import astropy
import os
import pandas as pd
from astropy.utils.console import ProgressBar

In [3]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

### Read all the fits header keywords into a big dataframe

In [4]:
fits_path = '../data/IGRINS/MGS_data_request/'

In [5]:
all_files = os.listdir(fits_path)
c_files = [file for file in all_files if '.fits' in file]

In [6]:
undef = astropy.io.fits.card.UNDEFINED

In [7]:
N_files = len(c_files)
N_files

44

In [8]:
targ = 42
f_name = c_files[targ]
hdu_full = fits.open(fits_path+f_name)
hdu0 = hdu_full[1]
hdr_keys = list(hdu0.header.keys()) + ['filename']

In [9]:
hdu_full.info()

Filename: ../data/IGRINS/MGS_data_request/SDCK_20150106_0089.spec_a0v.fits
No.    Name      Ver    Type      Cards   Dimensions   Format
  0  SPEC_DIVIDE_A0V    1 PrimaryHDU     276   (2048, 26)   float64   
  1  WAVELENGTH    1 ImageHDU       275   (2048, 26)   float64   
  2  TGT_SPEC      1 ImageHDU       275   (2048, 26)   float32   
  3  A0V_SPEC      1 ImageHDU       275   (2048, 26)   float32   
  4  VEGA_SPEC     1 ImageHDU       275   (2048, 26)   float64   
  5  SPEC_DIVIDE_CONT    1 ImageHDU       275   (2048, 26)   float64   


Takes ~ 1 second

In [10]:
%%time
big_list = []
with ProgressBar(N_files, ipython_widget=True) as bar:
    for i, f_name in enumerate(c_files):
        bar.update()
        try:
            hdu_full = fits.open(fits_path+f_name, memmap=True)
        except OSError:
            print("{:06.0f} had a problem".format(i))
        hdu0 = hdu_full[2]
        dict_out = {key:hdu0.header[key] for key in hdu0.header.keys() if hdu0.header[key] != undef}
        dict_out['filename'] = f_name
        big_list.append(dict_out)
        hdu_full.close()


CPU times: user 1.05 s, sys: 67.1 ms, total: 1.11 s
Wall time: 1.17 s


In [11]:
df_out = pd.DataFrame(big_list)

In [12]:
df_out.tail(2)

Unnamed: 0,ACQTIME,ACQTIME1,ACQTYPE,AIRTEMP,ALT,AMEND,AMPINPUT,AMPRESET,AMSTART,ASICGAIN,...,WAT2_063,WAT2_064,WAT2_065,WAT2_066,WAT3_001,WCSDIM,XTENSION,ZDEND,ZDSTART,filename
42,2457030.0,2015-01-07-03:22:21.640,5,,29.1,1.0559,1,0,1.0559,8,...,,,,,wtype=linear,3,IMAGE,,,SDCK_20150106_0089.spec_a0v.fits
43,2457345.0,2015-11-18-09:02:30.876,5,,21.8,1.0735,1,0,1.0735,8,...,,,,,wtype=linear,3,IMAGE,,,SDCK_20151117_0205.spec_a0v.fits


### Decide which of the extraneous columns to trim off

In [13]:
fail_cols = df_out.columns.str.contains('WAT') | df_out.columns.str.contains('COMMENT')
pass_cols = ~fail_cols

In [14]:
df_out.columns[pass_cols].values

array(['ACQTIME', 'ACQTIME1', 'ACQTYPE', 'AIRTEMP', 'ALT', 'AMEND',
       'AMPINPUT', 'AMPRESET', 'AMSTART', 'ASICGAIN', 'ASIC_NUM', 'AZI',
       'BAND', 'BANDID1', 'BARPRESS', 'BITPIX', 'CD1_1', 'CD2_2', 'CD3_3',
       'CLKOFF', 'CLOCKING', 'CTYPE1', 'CTYPE2', 'CTYPE3', 'DATAMODE',
       'DATE', 'DATE-END', 'DATE-OBS', 'DATLEVEL', 'DECGS', 'DECTEL',
       'DECUSER', 'DETECTOR', 'DEWPOINT', 'DOMETEMP', 'EPOCH', 'EXPMODE',
       'EXPTIME', 'EXTNAME', 'FILTER', 'FITSFILE', 'FOCUS', 'FRMNAME',
       'FRMODE', 'FRMTIME', 'GAIN', 'GCOUNT', 'GLBRESET', 'HAEND',
       'HASTART', 'HUMIDITY', 'HXRGVER', 'INSTRUME', 'KTCREMOV',
       'LONGSTRN', 'LTM1_1', 'LTM2_2', 'LTM3_3', 'MCLK', 'MJD-OBS',
       'MUXTYPE', 'NADCS', 'NAXIS', 'NAXIS1', 'NAXIS2', 'NEXTRAL',
       'NEXTRAP', 'NOMGAIN', 'NOTES', 'NOUTPUTS', 'NRESETS', 'OBJECT',
       'OBJTYPE', 'OBSERVAT', 'OBSERVER', 'OBSGROUP', 'OBSID', 'OBSTOT',
       'PCOUNT', 'PDDECTOR', 'PIXSCALE', 'RAGS', 'RATEL', 'RAUSER',
       'RDNOISE', '

In [15]:
count_dict = {col:len(df_out[col].unique()) for col in df_out.columns[pass_cols]}

In [16]:
uniq_entries = pd.Series(count_dict).sort_values(ascending=False)

Let's look at the "boiler plate" columns and sparse columns.  We want to get rid of the boiler plate columns that are the same of each observation-- *e.g.* we know these were all observed with Kepler!

In [17]:
#uniq_entries

In [18]:
#for col in uniq_entries[:-87].index.values:
#    print("{:.<20} : {}".format(col, df_out[col].unique()))

In [19]:
uniq_entries[:-87].index.values

array(['filename', 'FITSFILE', 'AMEND', 'AMSTART', 'ACQTIME1', 'DATE-END',
       'DATE-OBS', 'ACQTIME', 'ALT', 'OBJECT', 'FOCUS', 'OBSERVER',
       'UTDATE', 'DATE', 'RAUSER', 'EXPTIME', 'RAGS', 'AZI', 'RATEL',
       'DECUSER', 'DECTEL', 'DECGS', 'NAXIS2'], dtype=object)

In [20]:
good_cols = uniq_entries[:-87].index.values

## Combine H and K band info when not unique.

In [21]:
df_out = df_out[good_cols]

In [22]:
df_out.shape

(44, 23)

In [23]:
Hdf = df_out[df_out.filename.str.contains('SDCH')].reset_index(drop=True)
Kdf = df_out[df_out.filename.str.contains('SDCK')].reset_index(drop=True)

In [24]:
band_differences = {col:(Kdf[col] != Hdf[col]).sum() for col in df_out.columns}
band_unique = pd.Series(band_differences).sort_values(ascending=False)
band_unique

filename    22
NAXIS2      22
FITSFILE    16
AMEND       16
AMSTART     16
ACQTIME1    15
DATE-END    15
DATE-OBS    15
ACQTIME     15
FOCUS        0
EXPTIME      0
UTDATE       0
DECTEL       0
DECGS        0
OBJECT       0
OBSERVER     0
DATE         0
AZI          0
RAGS         0
RATEL        0
ALT          0
RAUSER       0
DECUSER      0
dtype: int64

In [25]:
band_unique[-14:].index

Index(['FOCUS', 'EXPTIME', 'UTDATE', 'DECTEL', 'DECGS', 'OBJECT', 'OBSERVER',
       'DATE', 'AZI', 'RAGS', 'RATEL', 'ALT', 'RAUSER', 'DECUSER'],
      dtype='object')

In [26]:
Kdf.drop(band_unique[-14:].index, axis=1, inplace=True)
Kdf.drop(['DATE-OBS', 'DATE-END', 'ACQTIME', 'ACQTIME1', 'AMEND', 'AMSTART'], axis=1, inplace=True)

In [27]:
combined_df = pd.merge(Hdf, Kdf, how='outer', left_index=True, right_index=True, suffixes=('_H', '_K') )

In [28]:
combined_df.columns

Index(['filename_H', 'FITSFILE_H', 'AMEND', 'AMSTART', 'ACQTIME1', 'DATE-END',
       'DATE-OBS', 'ACQTIME', 'ALT', 'OBJECT', 'FOCUS', 'OBSERVER', 'UTDATE',
       'DATE', 'RAUSER', 'EXPTIME', 'RAGS', 'AZI', 'RATEL', 'DECUSER',
       'DECTEL', 'DECGS', 'NAXIS2_H', 'filename_K', 'FITSFILE_K', 'NAXIS2_K'],
      dtype='object')

In [29]:
combined_df.to_csv('../data/metadata/IGRINS_hdr_table.csv', index=False)

The end.

<!--NAVIGATION-->
< [Metadata](01.00-Metadata.ipynb) | [Contents](Index.ipynb) | [Period, Spectral Type, and multiplicity](01.02-Grankin2008_period_info.ipynb) >