# Prepare SL Data for Deep Learning analysis

- **Part 1**
- author : Sylvie Dagoret-Campagne
- affiliation : IJCLab/IN2P3/CNRS
- creation date : 2023/03/23
- last update : 2023/04/24
- FORS2 : https://github.com/ericnuss/fors2
- Result of StarLight : /sps/lsst/groups/photoz/fors2/FORS2/ResStarlight



          /Users/dagoret/MacOSX/GitHub/LSST/FORS2>ls -l
              total 0
              drwxrwsr-x  3 dagoret  staff   96 Aug 21  2018 ResStarlight
              drwxr-xr-x  9 dagoret  staff  288 Dec 19 18:01 fors2

## Import

In [None]:
import os,sys,re
import h5py
from astropy.table import Table

In [None]:
# Import some generally useful packages

import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cmx

from matplotlib.backends.backend_pdf import PdfPages

%matplotlib inline
import pandas as pd
from itertools import cycle, islice


In [None]:
from scipy import ndimage

#from scipy import interp as scinterp
from scipy.interpolate import interp1d

import glob
import collections
#from def_raw_seds import *

In [None]:
# to enlarge the sizes
params = {'legend.fontsize': 'large',
          'figure.figsize': (4, 4),
         'axes.labelsize': 'xx-large',
         'axes.titlesize':'xx-large',
         'xtick.labelsize':'xx-large',
         'ytick.labelsize':'xx-large'}
plt.rcParams.update(params)

In [None]:
import matplotlib.offsetbox
props = dict(boxstyle='round',edgecolor="w",facecolor="w", alpha=0.5)

In [None]:
from def_raw_seds import *

In [None]:
from raw_data_analysis import *

In [None]:
#from spectroscopy import *

In [None]:
# Import the primary photometry classes from rubin_sim.photUtils

#import rubin_sim.phot_Utils.Bandpass as Bandpass
#import rubin_sim.phot_Utils.Sed as Sed
#from rubin_sim.data import get_data_dir

In [None]:
#pysynphot_root_path=os.environ['PYSYN_CDBS']
#path_sed_calspec=os.path.join(pysynphot_root_path,'calspec')
# pysynphot
#import pysynphot as S
#S.refs.setref(area=1)
#S.refs.getref()

In [None]:
WL_BALMER_BREAK= 3646.0 
NQUANTILES = 10
quantiles = np.linspace(0,1,NQUANTILES+1)
quantiles

In [None]:
FLAG_REMOVE_RMAG0 = True

## Start

In [None]:
cwd=os.path.abspath("")
main_dir = cwd
print(f"notebook current executing path : {cwd}")

## Configuration

### SL

## Start

### Catalog

In [None]:
t = Table.read(filename_fits_catalog)

In [None]:
t

### List

In [None]:
lst=fors2_jct_SL() #remet les spectres dans le rest frame (ie a z=0)

In [None]:
lst = sorted(lst)
print(lst)

In [None]:
all_specfilenames = os.listdir(path_BC)

In [None]:
all_specfilenames = sorted(all_specfilenames)

In [None]:
all_numbers = [ int(re.findall('^SPEC(.*)_HZ4_BC[.]txt$',fn)[0]) for fn in all_specfilenames ]

In [None]:
all_numbers = sorted(all_numbers)

In [None]:
NSED = len(all_numbers)
# wavelength bin colors
jet = plt.get_cmap('jet')
cNorm = colors.Normalize(vmin=0, vmax=NSED)
scalarMap = cmx.ScalarMappable(norm=cNorm, cmap=jet)
all_colors = scalarMap.to_rgba(np.arange(NSED), alpha=1)

In [None]:
cut = t["ID"] == 3
t[cut]

# Read all SEDS and noremalised them in a range 0 - 10000 nm and interpolate

In [None]:
WL = np.arange(0,10000,1)
NWL = len(WL)
NSAMPLES = NSED
flux_out=np.zeros((NSAMPLES,NWL))

In [None]:
all_normspectra = []
all_integ = []
all_tag = []
all_sed = []

all_redshifts = []
all_num = []
all_ra = []
all_dec = []
all_Rmag = []
all_RV = []
all_eRV = []
all_RT = []
all_Nsp = []


for idx,the_num in enumerate(all_numbers):
    
    tcut     = t["ID"] == the_num
    redshift = t[tcut]["z"].value[0]
    lines    = t[tcut]["Lines"].value[0]
    ra       = t[tcut]["RAJ2000"].value[0]
    dec      = t[tcut]["DEJ2000"].value[0]
    Rmag     = t[tcut]["Rmag"].value[0]
    RV       = t[tcut]["RV"].value[0]
    e_RV     = t[tcut]["e_RV"].value[0]
    RT       = t[tcut]["RT"].value[0]
    Nsp       = t[tcut]["Nsp"].value[0]
    
    
    
    
    filename = f"SPEC{the_num}_HZ4_BC.txt"
    tag = f"SPEC{the_num} , z={redshift:.2f}, lines={lines}"
    fullfilename = os.path.join(path_BC,filename)
    the_sed=SED(fullfilename,label=tag)
    all_sed.append(the_sed)
    arr = np.loadtxt(fullfilename)
    
    # select wavelength in an appropriate range 
    wl=arr[:-1,0]  
    wlrange = np.where(np.logical_and(wl>=0,wl<=10000))[0]
    
    wl=arr[wlrange,0] 
    flambda=arr[wlrange,1]
    
    
    x = WL
    y = np.interp(x, wl, flambda)
    
    
    # normalize spectrum wrt its integral between 0-10000 angstrom
    integ = np.trapz(y,x)
    the_normspectra = y/integ
    flux_out[idx,:]= the_normspectra
    
    all_normspectra.append(the_normspectra)
    
    all_tag.append(tag)
    
    # RAJ2000	DEJ2000	z	u_z	Rmag	RV	e_RV	RT	Nsp	
    all_redshifts.append(redshift)
    all_num.append(the_num) 
    all_ra.append(ra) 
    all_dec.append(dec) 
    all_Rmag.append(Rmag)
    all_RV.append(RV)
    all_eRV.append(e_RV)
    all_RT.append(RT)
    all_Nsp.append(Nsp)
    

In [None]:
all_redshifts = np.array(all_redshifts)
all_num = np.array(all_num)
all_ra = np.array(all_ra)
all_dec = np.array(all_dec)
all_Rmag = np.array(all_Rmag)
all_RV = np.array(all_RV)
all_eRV = np.array(all_eRV)
all_RT = np.array(all_RT)
all_Nsp = np.array(all_Nsp)

## Remove RMag = 0

In [None]:
if FLAG_REMOVE_RMAG0:
    idx_selected = np.where(all_Rmag != 0)[0]

In [None]:
if FLAG_REMOVE_RMAG0:
    all_redshifts = all_redshifts[idx_selected]
    all_num       = all_num[idx_selected]
    all_ra        = all_ra[idx_selected]
    all_dec       = all_dec[idx_selected]
    all_Rmag      = all_Rmag[idx_selected]
    all_RV        = all_RV[idx_selected]
    all_eRV       = all_eRV[idx_selected]
    all_RT        = all_RT[idx_selected]
    all_Nsp       = all_Nsp[idx_selected]

In [None]:
if FLAG_REMOVE_RMAG0:
    NSAMPLES = len(idx_selected)

    flux_out_sel=np.zeros((NSAMPLES,NWL))
    flux_out_sel = flux_out[idx_selected,:]
    flux_out     = flux_out_sel 

# Blue part / red part 

In [None]:
idx_blue = np.where (WL<WL_BALMER_BREAK)[0]
idx_red = np.where (WL>=WL_BALMER_BREAK)[0]
wl_blue = WL[idx_blue]
wl_red = WL[idx_red]

In [None]:
# idx=0 flux before Balmer Break
# idx=1 flux after Balmer Break
# idx=2 flux sum  before and after Balmer Break
# idx=3 flux fraction before Balmer break
# idx=4 flux fraction after Balmer break
# idx=5 flux ratio  
# idx=6 flux ratio  


flux_integ=np.zeros((NSAMPLES,7))

In [None]:
for idx in range(NSAMPLES):
    
    flux_blue = flux_out[idx,idx_blue]
    flux_red  = flux_out[idx,idx_red]
    
    flux_integ[idx,0] =  np.trapz(flux_blue ,wl_blue)
    flux_integ[idx,1] =  np.trapz(flux_red ,wl_red)
    flux_integ[idx,2] = flux_integ[idx,0] + flux_integ[idx,1] 
    
    flux_integ[idx,3] = flux_integ[idx,0] / flux_integ[idx,2] 
    flux_integ[idx,4] = flux_integ[idx,1] / flux_integ[idx,2] 
    
    flux_integ[idx,5] = flux_integ[idx,0] / flux_integ[idx,1] 
    flux_integ[idx,6] = flux_integ[idx,1] / flux_integ[idx,0] 
    

In [None]:
plt.figure(figsize=(10,6))
plt.hist(flux_integ[:,3],bins=50 );
quantiles_thr = np.quantile(flux_integ[:,3], quantiles)
digitized = np.digitize(flux_integ[:,3],quantiles_thr)

for thr in quantiles_thr:
    plt.axvline(thr,color='k')
    
plt.title("fraction of energy before Balmer break")

In [None]:
digitized

In [None]:
plt.hist(flux_integ[:,4],bins=50,facecolor="b" );

In [None]:
all_ra.shape

In [None]:
jet2 = plt.get_cmap('jet')
cNorm2 = colors.Normalize(vmin=0, vmax=np.max(all_redshifts))
scalarMap2 = cmx.ScalarMappable(norm=cNorm2, cmap=jet)
all_colors2 = scalarMap2.to_rgba(all_redshifts, alpha=1)

## Position of SL galaxies vs redshifts

In [None]:
fig = plt.figure(figsize=(5,4))
ax = fig.add_subplot(1,1,1)
#im = ax.scatter(all_ra,all_dec,c=all_colors2,s=10,marker="o",alpha=0.5,cmap=jet2)
im = ax.scatter(all_ra,all_dec,c=all_redshifts,s=10,marker="o",alpha=0.5,cmap="jet")
ax.grid()
ax.set_xlabel("ra (deg)")
ax.set_ylabel("dec (deg)")
ax.set_title("Sky location of FORS2 galaxies")
cbar = fig.colorbar(im, ax=ax)
cbar.set_label('redshift')

## Position vs Redshift

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
for idx, ax in enumerate(axes.flatten()):
    if idx == 0:
        ax.hist(all_redshifts,bins=50,facecolor="b");
        ax.set_xlabel("redshift")
    else:
        im = ax.scatter(all_ra,all_dec,c=all_redshifts,s=10,marker="o",alpha=0.5,cmap="jet")
        cbar = fig.colorbar(im, ax=ax)
        cbar.set_label('redshift')
        ax.grid()
        ax.set_xlabel("ra (deg)")
        ax.set_ylabel("dec (deg)")
        ax.set_title("Sky location of FORS2 galaxies")
        #fig.colorbar(im, ax=ax)
plt.tight_layout()  
plt.suptitle("redshift",fontsize=20)

## Position of galaxies vs Rmag

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
for idx, ax in enumerate(axes.flatten()):
    if idx == 0:
        ax.hist(all_Rmag,bins=50,facecolor="b");
        ax.set_xlabel("Rmag")
    else:
        im = ax.scatter(all_ra,all_dec,c=all_Rmag,s=10,marker="o",alpha=0.5,cmap="jet")
        cbar = fig.colorbar(im, ax=ax)
        cbar.set_label('Rmag')
        ax.grid()
        ax.set_xlabel("ra (deg)")
        ax.set_ylabel("dec (deg)")
        ax.set_title("Sky location of FORS2 galaxies")
        #fig.colorbar(im, ax=ax)
plt.tight_layout() 
plt.suptitle("Rmag",fontsize=20)

## Position of galaxies vs RT

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
for idx, ax in enumerate(axes.flatten()):
    if idx == 0:
        ax.hist(all_RT,bins=50,facecolor="b");
        ax.set_xlabel("RT")
    else:
        im = ax.scatter(all_ra,all_dec,c=all_RT,s=10,marker="o",alpha=0.5,cmap="jet")
        cbar = fig.colorbar(im, ax=ax)
        cbar.set_label('RT')
        ax.grid()
        ax.set_xlabel("ra (deg)")
        ax.set_ylabel("dec (deg)")
        ax.set_title("Sky location of FORS2 galaxies")
        #fig.colorbar(im, ax=ax)
plt.tight_layout()
plt.suptitle("RT",fontsize=20)

## Position of galaxies vs RV

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
for idx, ax in enumerate(axes.flatten()):
    if idx == 0:
        ax.hist(all_RV,bins=50,facecolor="b");
        ax.set_xlabel("RV")
    else:
        im = ax.scatter(all_ra,all_dec,c=all_RV,s=10,marker="o",alpha=0.5,cmap="jet")
        cbar = fig.colorbar(im, ax=ax)
        cbar.set_label('RV')
        ax.grid()
        ax.set_xlabel("ra (deg)")
        ax.set_ylabel("dec (deg)")
        ax.set_title("Sky location of FORS2 galaxies")
        #fig.colorbar(im, ax=ax)
plt.tight_layout() 
plt.suptitle("RV",fontsize=20)

## Position vs Nsp

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
for idx, ax in enumerate(axes.flatten()):
    if idx == 0:
        ax.hist(all_Nsp,bins=50,facecolor="b");
        ax.set_xlabel("Nsp")
    else:
        im = ax.scatter(all_ra,all_dec,c=all_Nsp,s=10,marker="o",alpha=0.5,cmap="jet")
        cbar = fig.colorbar(im, ax=ax)
        cbar.set_label('Nsp')
        ax.grid()
        ax.set_xlabel("ra (deg)")
        ax.set_ylabel("dec (deg)")
        ax.set_title("Sky location of FORS2 galaxies")
        #fig.colorbar(im, ax=ax)
plt.tight_layout() 
plt.suptitle("Nsp",fontsize=20)

# Save in a file 

In [None]:
with h5py.File('SLspectra.hdf5', 'w') as hf:
    hf.create_dataset("flambda", data = flux_out,dtype="float32")
    hf.create_dataset("wl", data = WL,dtype="float32")
    hf.create_dataset("redshift", data = all_redshifts,dtype="float32")
    hf.create_dataset("quantile", data = digitized,dtype="int")
    hf.create_dataset("num", data = all_num,dtype="int")
    hf.create_dataset("fracinfbalmer", data = flux_integ[:,3],dtype="float32")
    
    hf.create_dataset("ra", data = all_ra,dtype="float32")
    hf.create_dataset("dec", data = all_dec,dtype="float32")
    hf.create_dataset("Rmag", data = all_Rmag,dtype="float32")
    hf.create_dataset("RV", data = all_RV,dtype="float32")
    hf.create_dataset("eRV", data = all_eRV,dtype="float32")
    hf.create_dataset("RT", data = all_RT,dtype="float32")
    hf.create_dataset("Nsp", data = all_Nsp,dtype="float32")

In [None]:
hf.close()

# Plots

In [None]:
cmap = plt.cm.get_cmap('jet', NQUANTILES) 

In [None]:
cmap

In [None]:
cmap(0)

In [None]:
fig = plt.figure(figsize=(16,10))
ax=fig.add_subplot(1,1,1)
for idx in range(NSAMPLES):
       
    nq = digitized[idx]-1
    col =cmap(NQUANTILES-nq)
    ax.plot(WL,flux_out[idx,:], color=col)
    
    
ax.axvline(WL_BALMER_BREAK,color="k")    
ax.legend(loc="upper right")
ax.grid()
ax.set_xlabel("wavelength $\lambda   (\AA)$")
ax.set_title("Spectra Energy distribution of SL synthetised Templates (SED)")

In [None]:
if 1:
    #plt.figure(figsize=(16,20))
    for idx in range(NSAMPLES):
        plt.figure(figsize=(16,2))
      
        nq = digitized[idx]-1
        col =cmap(NQUANTILES-nq)
    
        plt.plot(WL,flux_out[idx,:], color=col,label=all_tag[idx])
        plt.legend(loc="upper right")
        plt.grid()
        #plt.axvline(4150,color="k")
        #plt.axvline(4250,color="k")
        #plt.ylim(-0.02,0.4)
        #plt.xlim(0.,20000.)
        
    plt.show()

# Read again the data

In [None]:
with h5py.File('SLspectra.hdf5', 'r') as hf:
    data_in = hf['flambda'][:]
    wl_in = hf['wl'][:]


In [None]:
data_in.shape

In [None]:
wl_in.shape

In [None]:
#plt.figure(figsize=(16,20))
if 0:
    for idx in range(NSED):
        plt.figure(figsize=(16,2))
        #plt.plot(all_wl[idx],all_normspectra[idx], color=all_colors[idx],label=all_tag[idx])
        plt.plot(wl_in,data_in[idx,:], color=all_colors[idx],label=all_tag[idx])
        plt.legend(loc="upper right")
        plt.grid()
        #plt.axvline(4150,color="k")
        #plt.axvline(4250,color="k")
        #plt.ylim(-0.02,0.4)
        #plt.xlim(0.,20000.)
    plt.show()