# Convert original unredshifted Fors2 spectra

- author : Sylvie Dagoret-Campagne
- affiliation : IJCLab/IN2P3/CNRS
- creation date : 2023/05/23
- last update : 2023/05/23
- FORS2 : https://github.com/ericnuss/fors2


- MUST RUN *ExploreFors2inOriginalFrame.ipynb* before


## Import

In [1]:
import os,sys,re
import h5py
from astropy.table import Table
import random

In [2]:
# Import some generally useful packages

import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cmx

from matplotlib.backends.backend_pdf import PdfPages

%matplotlib inline
import pandas as pd
from itertools import cycle, islice


In [3]:
from scipy import ndimage

#from scipy import interp as scinterp
from scipy.interpolate import interp1d

import glob
import collections
#from def_raw_seds import *

In [4]:
# to enlarge the sizes
params = {'legend.fontsize': 'large',
          'figure.figsize': (4, 4),
         'axes.labelsize': 'xx-large',
         'axes.titlesize':'xx-large',
         'xtick.labelsize':'xx-large',
         'ytick.labelsize':'xx-large'}
plt.rcParams.update(params)

In [5]:
import matplotlib.offsetbox
props = dict(boxstyle='round',edgecolor="w",facecolor="w", alpha=0.5)

In [6]:
from def_raw_seds import *

In [7]:
from raw_data_analysis import *

In [8]:
path_out_sdc          = "./raw/"
all_specfilenames = os.listdir(path_out_sdc)

# Start

In [9]:
cwd=os.path.abspath("")
main_dir = cwd
print(f"notebook current executing path : {cwd}")

notebook current executing path : /Users/dagoret/MacOSX/GitHub/LSST/PhotoZ_PhD/StudyFors2SED


# Read catalog

In [10]:
filename_fits_catalog

'/Users/dagoret/MacOSX/GitHub/LSST/fors2/fors2/data/fors2_catalogue.fits'

In [11]:
t = Table.read(filename_fits_catalog)

  mask = data[col.name] == col.null


In [12]:
cut = t["ID"] == 3
t[cut]

ID,m_ID,RAJ2000,DEJ2000,z,u_z,Rmag,RV,e_RV,RT,Nsp,Lines
Unnamed: 0_level_1,Unnamed: 1_level_1,deg,deg,Unnamed: 4_level_1,Unnamed: 5_level_1,mag,km / s,km / s,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
int16,bytes1,float64,float64,float64,bytes1,float64,int32,int16,float64,int16,bytes77
3,,13.52552,-28.4208,0.6866,,21.45,210642,48,11.6,4,"[OII],H9,K,H,H{beta},[OIII],H8"


In [13]:
cut = t["ID"] == 738
t[cut]

ID,m_ID,RAJ2000,DEJ2000,z,u_z,Rmag,RV,e_RV,RT,Nsp,Lines
Unnamed: 0_level_1,Unnamed: 1_level_1,deg,deg,Unnamed: 4_level_1,Unnamed: 5_level_1,mag,km / s,km / s,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
int16,bytes1,float64,float64,float64,bytes1,float64,int32,int16,float64,int16,bytes77
738,,13.6437,-28.22527,0.56044,,19.73,168132,126,7.1,1,"K,H,4000{AA}-break,G"


# Read spectra

In [14]:
all_specfilenames = sorted(all_specfilenames)

### Make list of files to read

In [15]:
all_numbers = [ int(re.findall('^SPEC(.*)_raw_sdc[.]txt$',fn)[0]) for fn in all_specfilenames ]

In [16]:
### should be already sorted
all_numbers = sorted(all_numbers)

In [17]:
print(all_numbers)

[2, 3, 9, 13, 19, 24, 25, 30, 31, 32, 33, 34, 35, 36, 37, 45, 47, 49, 51, 55, 57, 58, 59, 61, 62, 66, 67, 68, 69, 70, 71, 72, 73, 77, 79, 80, 83, 84, 85, 86, 87, 89, 91, 93, 96, 97, 102, 106, 107, 109, 110, 111, 112, 113, 114, 115, 117, 118, 120, 121, 123, 127, 128, 132, 134, 135, 137, 138, 141, 149, 151, 152, 156, 160, 161, 164, 171, 178, 179, 181, 182, 183, 184, 185, 186, 187, 188, 189, 191, 192, 193, 194, 196, 197, 198, 204, 205, 210, 214, 218, 221, 222, 223, 226, 227, 231, 233, 234, 235, 236, 237, 238, 240, 242, 243, 244, 245, 246, 248, 249, 250, 252, 253, 258, 259, 260, 261, 262, 264, 265, 266, 267, 268, 271, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 287, 288, 291, 292, 294, 295, 296, 297, 298, 301, 302, 303, 304, 305, 306, 307, 308, 309, 313, 315, 317, 318, 319, 321, 322, 323, 324, 325, 326, 327, 328, 329, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 343, 344, 345, 346, 348, 349, 350, 351, 352, 353, 354, 355, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367

In [18]:
all_spectra = []
all_wl = []
all_tag = []
all_lines = []
#all_sed = []

for the_num in all_numbers:
    tcut = t["ID"] == the_num
    redshift = t[tcut]["z"].value[0]
    lines = t[tcut]["Lines"].value[0]
    filename = f"SPEC{the_num}_raw_sdc.txt"
    tag = f"SPEC{the_num} , z={redshift:.2f}, lines={lines}"
    fullfilename = os.path.join(path_out_sdc,filename)
#    the_sed=SED_jct(the_num)
#    all_sed.append(the_sed)
    arr = np.loadtxt(fullfilename)
    all_wl.append(arr[:,0])
    all_spectra.append(arr[:,1])
    all_tag.append(tag)
    all_lines.append(lines)
 

In [19]:
NSED = len(all_numbers)
# wavelength bin colors
jet = plt.get_cmap('jet')
cNorm = colors.Normalize(vmin=0, vmax=NSED)
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
all_colors = scalarMap.to_rgba(np.arange(NSED), alpha=1)

# Read all SEDS and noremalised them in a range 0 - 10000 nm and interpolate

In [20]:
all_spectra = []

all_tag = []
all_wl = []

all_redshifts = []
all_num = []
all_ra = []
all_dec = []
all_Rmag = []
all_RV = []
all_eRV = []
all_RT = []
all_Nsp = []


for idx,the_num in enumerate(all_numbers):
    
    tcut     = t["ID"] == the_num
    redshift = t[tcut]["z"].value[0]
    lines    = t[tcut]["Lines"].value[0]
    ra       = t[tcut]["RAJ2000"].value[0]
    dec      = t[tcut]["DEJ2000"].value[0]
    Rmag     = t[tcut]["Rmag"].value[0]
    RV       = t[tcut]["RV"].value[0]
    e_RV     = t[tcut]["e_RV"].value[0]
    RT       = t[tcut]["RT"].value[0]
    Nsp      = t[tcut]["Nsp"].value[0]
    
    
    filename = f"SPEC{the_num}_raw_sdc.txt"
    tag = f"SPEC{the_num} , z={redshift:.2f}, lines={lines}"
    fullfilename = os.path.join(path_out_sdc,filename)

    arr = np.loadtxt(fullfilename)
    all_wl.append(arr[:,0])
    all_spectra.append(arr[:,1])
    all_tag.append(tag)
    all_lines.append(lines)
 
    
    all_redshifts.append(redshift)
    all_num.append(the_num) 
    all_ra.append(ra) 
    all_dec.append(dec) 
    all_Rmag.append(Rmag)
    all_RV.append(RV)
    all_eRV.append(e_RV)
    all_RT.append(RT)
    all_Nsp.append(Nsp)
    

In [21]:
all_redshifts = np.array(all_redshifts)
all_num = np.array(all_num)
all_ra = np.array(all_ra)
all_dec = np.array(all_dec)
all_Rmag = np.array(all_Rmag)
all_RV = np.array(all_RV)
all_eRV = np.array(all_eRV)
all_RT = np.array(all_RT)
all_Nsp = np.array(all_Nsp)

# Interpolate

In [23]:
all_wl

[array([4670.08, 4673.44, 4676.8 , ..., 9243.04, 9246.4 , 9249.76]),
 array([4545.76, 4549.12, 4552.48, ..., 9243.04, 9246.4 , 9249.76]),
 array([4495.36, 4498.72, 4502.08, ..., 9243.04, 9246.4 , 9249.76]),
 array([4508.8 , 4512.16, 4515.52, ..., 8285.44, 8288.8 , 8292.16]),
 array([4539.04, 4542.4 , 4545.76, ..., 9243.04, 9246.4 , 9249.76]),
 array([4471.84, 4475.2 , 4478.56, ..., 8409.76, 8413.12, 8416.48]),
 array([4572.64, 4576.  , 4579.36, ..., 9243.04, 9246.4 , 9249.76]),
 array([4542.4 , 4545.76, 4549.12, ..., 9243.04, 9246.4 , 9249.76]),
 array([4485.28, 4488.64, 4492.  , ..., 8476.96, 8480.32, 8483.68]),
 array([4535.68, 4539.04, 4542.4 , ..., 9243.04, 9246.4 , 9249.76]),
 array([4532.32, 4535.68, 4539.04, ..., 9243.04, 9246.4 , 9249.76]),
 array([4616.32, 4619.68, 4623.04, ..., 9243.04, 9246.4 , 9249.76]),
 array([4525.6 , 4528.96, 4532.32, ..., 9243.04, 9246.4 , 9249.76]),
 array([4539.04, 4542.4 , 4545.76, ..., 9243.04, 9246.4 , 9249.76]),
 array([4495.36, 4498.72, 4502.08,

In [22]:
assert False

AssertionError: 

## Remove RMag = 0

In [None]:
if FLAG_REMOVE_RMAG0:
    idx_selected = np.where(all_Rmag != 0)[0]

In [None]:
if FLAG_REMOVE_RMAG0:
    all_redshifts = all_redshifts[idx_selected]
    all_num       = all_num[idx_selected]
    all_ra        = all_ra[idx_selected]
    all_dec       = all_dec[idx_selected]
    all_Rmag      = all_Rmag[idx_selected]
    all_RV        = all_RV[idx_selected]
    all_eRV       = all_eRV[idx_selected]
    all_RT        = all_RT[idx_selected]
    all_Nsp       = all_Nsp[idx_selected]

In [None]:
if FLAG_REMOVE_RMAG0:
    NSAMPLES = len(idx_selected)

    flux_out_sel=np.zeros((NSAMPLES,NWL))
    flux_out_sel = flux_out[idx_selected,:]
    flux_out     = flux_out_sel 

# Save in a file 

In [None]:
with h5py.File('FORS2spectra.hdf5', 'w') as hf:
    hf.create_dataset("flambda", data = flux_out,dtype="float32")
    hf.create_dataset("wl", data = WL,dtype="float32")
    hf.create_dataset("redshift", data = all_redshifts,dtype="float32")
    hf.create_dataset("quantile", data = digitized,dtype="int")
    hf.create_dataset("fracinfbalmer", data = flux_integ[:,3],dtype="float32")
    
    hf.create_dataset("num", data = all_num,dtype="int")
    
    hf.create_dataset("ra", data = all_ra,dtype="float32")
    hf.create_dataset("dec", data = all_dec,dtype="float32")
    hf.create_dataset("Rmag", data = all_Rmag,dtype="float32")
    hf.create_dataset("RV", data = all_RV,dtype="float32")
    hf.create_dataset("eRV", data = all_eRV,dtype="float32")
    hf.create_dataset("RT", data = all_RT,dtype="float32")
    hf.create_dataset("Nsp", data = all_Nsp,dtype="float32")

In [None]:
hf.close()

# Plots

In [None]:
cmap = plt.cm.get_cmap('jet', NQUANTILES) 

In [None]:
cmap

In [None]:
cmap(0)

In [None]:
fig = plt.figure(figsize=(8,6))
ax=fig.add_subplot(1,1,1)
for idx in range(NSAMPLES):
       
    nq = digitized[idx]-1
    col =cmap(NQUANTILES-nq)
    ax.plot(WL,flux_out[idx,:], color=col)
    
    
ax.axvline(WL_BALMER_BREAK,color="k")    
ax.legend(loc="upper right")
ax.grid()
ax.set_xlabel("wavelength $\lambda   (\AA)$")
ax.set_title("Spectral Energy Distribution of FOR2/SL synthetised Templates")

In [None]:
fig = plt.figure(figsize=(8,6))
ax=fig.add_subplot(1,1,1)
for idx in range(NSAMPLES):
       
    nq = digitized[idx]-1
    col =cmap(NQUANTILES-nq)
    
    u = random.random()
    if u < 0.05:
        ax.plot(WL,flux_out[idx,:], color=col)
    
ax.set_xlim(0,10000)    
ax.axvline(WL_BALMER_BREAK,color="k")    
ax.legend(loc="upper right")
ax.grid()
ax.set_xlabel("wavelength $\lambda   (\AA)$")
ax.set_title("Spectral Energy Distribution of FOR2/SL synthetised Templates")

In [None]:
if 0:
    #plt.figure(figsize=(16,20))
    for idx in range(NSAMPLES):
        plt.figure(figsize=(16,2))
      
        nq = digitized[idx]-1
        col =cmap(NQUANTILES-nq)
    
        plt.plot(WL,flux_out[idx,:], color=col,label=all_tag[idx])
        plt.legend(loc="upper right")
        plt.grid()
        #plt.axvline(4150,color="k")
        #plt.axvline(4250,color="k")
        #plt.ylim(-0.02,0.4)
        #plt.xlim(0.,20000.)
        
    plt.show()

# Read again the data

In [None]:
with h5py.File('FORS2spectra.hdf5', 'r') as hf:
    data_in = hf['flambda'][:]
    wl_in = hf['wl'][:]
    target = hf['quantile'][:]

    # other data
    redshift = hf['redshift'][:]
    Rmag = hf['Rmag'][:]
    RT = hf['RT'][:]
    RV = hf['RV'][:]
    ra = hf['ra'][:]
    dec = hf['dec'][:]
    fracinfbalmer = hf['fracinfbalmer'][:]


In [None]:
spectra = data_in
wavelengths = wl_in

In [None]:
spectra.shape

In [None]:
#plt.figure(figsize=(16,20))
if 0:
    for idx in range(NSED):
        plt.figure(figsize=(16,2))
        #plt.plot(all_wl[idx],all_normspectra[idx], color=all_colors[idx],label=all_tag[idx])
        plt.plot(wl_in,data_in[idx,:], color=all_colors[idx],label=all_tag[idx])
        plt.legend(loc="upper right")
        plt.grid()
        #plt.axvline(4150,color="k")
        #plt.axvline(4250,color="k")
        #plt.ylim(-0.02,0.4)
        #plt.xlim(0.,20000.)
    plt.show()

In [None]:
jet = plt.get_cmap('jet')
cNorm = colors.Normalize(vmin=0, vmax=1)
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
all_colors = scalarMap.to_rgba(1-fracinfbalmer, alpha=1)

In [None]:
fig = plt.figure(figsize=(5,4))
ax=fig.add_subplot(1,1,1)
ax.scatter(fracinfbalmer,target,color=all_colors,cmap=jet)
ax.set_xlabel("fracinfbalmer")
ax.set_ylabel("quantile number (target)")
ax.grid()

In [None]:
NQ = len(np.unique(target))

In [None]:
jet10 = plt.get_cmap('jet')
cNorm10 = colors.Normalize(vmin=1, vmax=NQ)
scalarMap10 = cmx.ScalarMappable(norm=cNorm10, cmap=jet10)
all_colors10 = scalarMap10.to_rgba(np.arange(10,0,-1), alpha=1)

In [None]:
ncols=2
nrows= NQ//ncols

In [None]:
nrows

In [None]:
fig, axes = plt.subplots(ncols=ncols,nrows=nrows, sharex=True, sharey=False,figsize=(10,12))

# loop on quantiles
for idx, ax in enumerate(axes.flat):
    
    selected_indexes = np.where(target==idx+1)[0]
            
    for idx_sel in selected_indexes:
        u = random.random()
        if u< 0.07:
            ax.plot(wavelengths,spectra[idx_sel,:],c=all_colors10[idx] )
    ax.set_xlim(0,10000) 
    
    if idx==8 or idx ==9:
        ax.set_xlabel("$\lambda  (\AA)$")
        
    if idx%ncols ==0:
        ax.set_ylabel("flux $F_\lambda$")
        
plt.tight_layout()