In [0]:
DATA_DIR = '/datascope/subaru/data/catalogs/xshooter/'
LIST_FILE = DATA_DIR + '_list.csv'

PROJECT_PATH = '/home/dobos/project/ga_isochrones/python:' + \
    '/home/dobos/project/ga_pfsspec_all/python:' + \
    '/home/dobos/project/pysynphot'

GRID_PATH = '/datascope/subaru/data/pfsspec/models/stellar/grid/phoenix/phoenix_HiRes'

# XShooter DR3

Verro et al.
* Paper in A&A: https://www.aanda.org/articles/aa/pdf/2022/04/aa42388-21.pdf
* Data in Vizier: http://cdsarc.u-strasbg.fr/viz-bin/cat/J/A+A/660/A34

Atmospheric parameters in Vizier:
* Arentsen+, 2019
  * http://vizier.cds.unistra.fr/viz-bin/VizieR-3?-source=J/A%2bA/627/A138/tablea1
  * https://cdsarc.cds.unistra.fr/viz-bin/cat/J/A+A/627/A138#/browse

This moderate-to-high resolution, near-ultraviolet-to-near-infrared (350–2480 nm, R ∼ 10 000) spectral library is composed of 830 stellar spectra of 683 stars. 

CSV file donwloaded from Vizier using the SQL search tool with the query:

```sql
SELECT [default] FROM obscore
WHERE obs_collection='J/A+A/660/A34'
```

In [0]:
import os, sys, re, glob
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import notebook as tqdm
from astropy.io import fits
from glob import glob

In [0]:
# Allow load project as module
for p in reversed(PROJECT_PATH.split(':')):
    sys.path.insert(0, p)

os.environ['PYTHONPATH'] = PROJECT_PATH.split(':')[0]

In [0]:
os.environ['PYTHONPATH']

In [0]:
%matplotlib inline

# Load xshooter object list

In [0]:
from astropy import units as u
from astropy.coordinates import SkyCoord, Angle

fn = os.path.join(DATA_DIR, 'table.dat')
cols = {
    'obj_id':       (0, 25,     np.str),
    'uncert':       (25, 26,    np.str),
    'ra':           (27, 40,    np.str),
    'dec':          (40, 51,    np.str),
    'xsl_id':       (53, 58,    np.str),
    'mjd':          (59, 69,    np.float),
    'arm_missing':  (70, 77,    np.str),
    'file_uvb':     (78, 118,   np.str),
    'file_vis':     (119, 159,  np.str),
    'file_nir':     (160, 200,  np.str),
    'comments':     (201, 401,  np.str)}
table = pd.read_fwf(fn, header=None,
    names=[ k for k in cols.keys() ],
    dtype={ k: cols[k][2] for k in cols.keys() },
    colspecs=[ (cols[k][0], cols[k][1]) for k in cols.keys() ])

table['xsl_id'] = table['xsl_id'].map(lambda x: int(x[1:]))
table['ra'] = table['ra'].map(lambda ra: Angle(ra, unit=u.hourangle).deg)
table['dec'] = table['dec'].map(lambda dec: Angle(dec, unit=u.degree).deg)

In [0]:
table

# Load Arentsen+19 data

In [0]:
fn = os.path.join(DATA_DIR, 'Arentsen+19/tablea1.dat')
tablea1 = pd.read_fwf(fn, header=None,
    names=[
        'HNAME',
        'xsl_id',  
        'Teffuvb',
        'logguvb',
        '[Fe/H]uvb',
        'Teffvis',
        'loggvis',
        '[Fe/H]vis',
        'Teff',      
        'e_Teff',     
        'logg',
        'e_logg',
        '[Fe/H]',
        'e_[Fe/H]',
        'f_Teff',
        'f_logg',
        'f_[Fe/H]',
        'Cflag',
    ],
    dtype={
        'HNAME': np.str,
        'xsl_id': np.int,  
        'Teffuvb': np.float,
        'logguvb': np.float,
        '[Fe/H]uvb': np.float,
        'Teffvis': np.float,
        'loggvis': np.float,
        '[Fe/H]vis': np.float,
        'Teff': np.float,      
        'e_Teff': np.float,
        'logg': np.float,
        'e_logg': np.float,
        '[Fe/H]': np.float,
        'e_[Fe/H]': np.float,
        'f_Teff': np.str,
        'f_logg': np.str,
        'f_[Fe/H]': np.str,
        'Cflag': np.str,
    },
    colspecs=[
        (0, 24),
        (26, 29),
        (30, 35),
        (36, 41),
        (42, 47),
        (48, 53),
        (54, 59),
        (60, 65),
        (66, 71),
        (72, 76),
        (77, 82),
        (83, 87),
        (88, 93),
        (94, 98),
        ##
        (99, 100),
        (101, 102),
        (103, 104),
        (105, 106),
    ])

tablea1.shape

In [0]:
#tablea1.head(3)
tablea1.tail(5)

# Load file list

In [0]:
ff = glob(os.path.join(DATA_DIR, 'fits/*.fits'))
ff = [ os.path.basename(f) for f in ff ]

files = []
for f in ff:
    files.append({ 
                'xsl_id': int(re.search('(\d+)', f).group(0)),
                'file_merged': f
                })

files = pd.DataFrame(files)
files

# Read all FITS headers and write to file

In [0]:
fn = os.path.join(DATA_DIR, '_headers.csv')
if not os.path.isfile(fn):
    ff = glob(os.path.join(DATA_DIR, 'fits/*.fits'))
    fitsdata = None

    for f in ff:
        hdus = fits.open(f, memmap=False)
        headers = { k: hdus[0].header[k] for k in hdus[0].header.keys() }
        headers['filename'] = os.path.split(f)[1]
        
        if fitsdata is None:
            fitsdata = pd.DataFrame(headers, index=['XSL_ID'])
        else:
            fitsdata = fitsdata.append(headers, ignore_index=True)

    if not os.path.isfile(fn):
        fitsdata.to_csv(fn)
else:
    fitsdata = pd.read_csv(fn)


fitsdata['xsl_id'] = fitsdata['XSL_ID'].map(lambda x: int(re.search('(\d+)', x).group(0)))
fitsdata.shape

In [0]:
fitsdata.columns

In [0]:
list(fitsdata.iloc[13])

# Join tables to get file names and physical parameters

In [0]:
xsl = pd.merge(
    pd.merge(table, tablea1, left_on='xsl_id', right_on='xsl_id'),
    pd.merge(files, fitsdata, left_on='file_merged', right_on='filename'),
    left_on='xsl_id', right_on='xsl_id_x')[['xsl_id', 'obj_id', 'filename', '[Fe/H]uvb', 'Teffuvb', 'logguvb', 'SNR']]

In [0]:
xsl

In [0]:
from matplotlib.ticker import ScalarFormatter

f, ax = plt.subplots(1, 1, figsize=(3.4, 3.4), dpi=240)

l = ax.scatter(xsl['Teffuvb'], xsl['logguvb'], c=xsl['[Fe/H]uvb'], s=1, cmap='jet')
ax.invert_xaxis()
ax.invert_yaxis()
ax.set_xscale('log')
ax.set_xlim(11000, None)
#ax.xaxis.set_major_formatter(ScalarFormatter())
#ax.xaxis.set_minor_formatter(ScalarFormatter())
ax.set_xlabel(r'$T_\mathrm{eff}$')
ax.set_ylabel(r'$\log\,g$')

f.colorbar(l)

# Read FITS and plot

In [0]:
mask = (xsl['[Fe/H]uvb'] < -1) & (xsl['logguvb'] < 3) & (xsl['Teffuvb'] < 6000)
xsl[mask]

In [0]:
fn = os.path.join(DATA_DIR, 'fits/xsl_spectrum_X0442_merged.fits')
hdus = fits.open(fn, memmap=False)
for h in hdus:
    print(h)

In [0]:
hdus[0].header

In [0]:
hdus[0].header['XSL_ID'], hdus[0].header['OBJECT'], hdus[0].header['SNR'], hdus[0].header['SPEC_RES']


# BARY_COR      / Barycentric radial velocity correction value 
# REST_VIS      / VIS cz value in km/s 
# REST_UVB      / UVB cz value in km/s 
# AV_VAL        / Total extinction in V

In [0]:
hdus[1].data.names

In [0]:
wave = hdus[1].data['WAVE']
flux = hdus[1].data['FLUX']
flux_dr = hdus[1].data['FLUX_DR'] if 'FLUX_DR' in hdus[1].data.names else None      # dereddened flux?
flux_err = hdus[1].data['ERR']

In [0]:
flux, flux_dr

In [0]:
f, ax = plt.subplots(1, 1, figsize=(4, 3), dpi=240)

#mask = (790 <= wave) & (wave <= 796)
mask = np.full_like(wave, True, dtype=np.bool)
print(mask.sum())

ax.plot(wave[mask], flux_dr[mask], lw=0.3)
ax.plot(wave[mask], flux[mask], lw=0.3)
ax.plot(wave[mask], flux_err[mask], lw=0.3)

ax.set_ylim(0, 1.1 * np.quantile(flux_dr[mask], 0.99))

ax.set_xlabel('wavelength [nm]')
ax.set_ylabel('flux [erg s-1 cm-2 A-1]')

ax.set_title('{}, SNR={:.2f}'.format(hdus[0].header['HNAME'], hdus[0].header['SNR']))

ax.grid()

# Spectral resolution

In [0]:
wref = (0.5 * (wave[1:] + wave[:-1]))

mask = (700 <= wref) & (wref <= 900)

R = wref / np.diff(wave)
R[mask].mean(), R[mask].std()

In [0]:
f, ax = plt.subplots(1, 1, figsize=(4, 3), dpi=240)

ax.plot(wref, R, lw=0.5)

ax.set_ylim(0, 50000)
ax.grid()

In [0]:
f, ax = plt.subplots(1, 1, figsize=(4, 3), dpi=240)

ax.plot(wref, np.diff(wave), lw=0.5)

#ax.set_ylim(0, 50000)
ax.grid()

# Read file list

In [0]:
df = pd.read_csv(LIST_FILE)
df['xsl_id'] = df['obs_id'].map(lambda x: int(re.search('(\d+)', x).group(0)))
df.shape

In [0]:
df.columns

In [0]:
len(df['obs_id'].unique())

In [0]:
df.head(3)

# Download data

In [0]:
t = tqdm.tqdm(total=df.shape[0])
for i, r in df.iterrows():
    url = r['access_url']
    fn = os.path.join(DATA_DIR, 'fits', r['obs_id'])
    if not os.path.isfile(fn):
        res = requests.get(url)
        with open(fn, 'wb') as f:
            f.write(res.content)
    t.update(1)

In [0]:
list(df['target_name'])