# The Executor for WR-Blue
In this notebook, we will train the neural networks focus on the 3000-5200 Angstrom wavelength. 
Firstly, we configure the GPU settings.  

In [4]:
import keras.backend.tensorflow_backend as KTF
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config=tf.ConfigProto()
config.gpu_options.allow_growth=True
set_session(tf.Session(config=config))
sess=tf.Session(config=config)
KTF.set_session(sess)

Using TensorFlow backend.


In [5]:
import os
import glob
import yaml
import keras
import astropy
import numpy as np
import pandas as pd
import keras.backend as K
import astropy.units as u
import astropy.constants as c
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from scipy.interpolate import interp1d
from scipy.signal import savgol_filter
from keras.models import Sequential,Model
from dust_extinction.averages import GCC09_MWAvg
from sklearn.model_selection import train_test_split
from dust_extinction.parameter_averages import CCM89,F99
from keras.layers import Dense,Dropout,LocallyConnected1D,AveragePooling1D
from keras.layers import Input,Conv1D,MaxPooling1D,BatchNormalization,Activation,Add,UpSampling1D

## Normalize the Spectra
Unlike the 2000-10000 Angstrom wavelength spectra, this time we normalize the spectra observing the 3000-5000 Angstrom wavelength. 

In [3]:
def Normalizer(spec,shortwave=6500,longwave=7500):
    small=np.argmin(abs(spec[:,0]-shortwave))
    long=np.argmin(abs(spec[:,0]-longwave))
    if small<long:spec[:,1]=spec[:,1]/np.average(spec[small:long,1])
    if small>long:spec[:,1]=spec[:,1]/np.average(spec[long:small,1])
    return spec

## The Data Augmentation
It's hard to say... but in the previous deep learning data augmentation processes, I used a naive data augmentation method.  
Here, the DataAugmenter function is the correct data augmentation function, which add noise to the spectra accroding to the poission distribution.  
In my previous data augmentation, it seems I just set the "S" parameter to be 10 or so.  
The original function is shown in "NaiveDataAugmenter".  

Both the data augmentation functions make extra two copy on the original data, then add noise to one copy, use Savitzky-Golay filter onto the other copy.  
After the augmentation, there will be 3-times of original data.  

In [4]:
def DataAugmenter(Xoriginal,Yoriginal):
    noisparam1=50
    noisparam2=5
    X2=Xoriginal
    Y2=Yoriginal
    X3=Xoriginal
    F5500=X3[:,400:420].mean(axis=1)
    F5500=F5500.reshape([X3.shape[0],1,1])
    Slist=np.random.random(X3.shape[0])*noisparam1+noisparam2# you can modify the 50 and 5 here depending how much noise you want to add
    Slist=Slist.reshape([X3.shape[0],1,1])
    Noiselist=np.random.randn(X3.shape[0]*X3.shape[1]).reshape([X3.shape[0],X3.shape[1],1])*X3**0.5
    X3=X3*(1+Noiselist/F5500**0.5/Slist)
    Y3=Yoriginal
    for i in range(len(X2)):
        X2[i]=savgol_filter(X2[i].T,\
                                  np.random.choice([7,9,11,13,15,17,19,21,23,25]),\
                                  np.random.choice([2,3,4,5,6])).T
    Xaugment=np.concatenate([Xoriginal,X2,X3])
    Yaugment=pd.concat([Yoriginal,Y2,Y3])
    return Xaugment,Yaugment

In [5]:
def NaiveDataAugmenter(Xoriginal,Yoriginal):
    X2=Xoriginal
    Y2=Yoriginal
    X3=Xoriginal*1.1**(np.random.randn(np.array(Xoriginal.shape).prod()).reshape(Xoriginal.shape))
    Y3=Yoriginal
    for i in range(len(X2)):
        X2[i]=savgol_filter(X2[i].T,\
                                  np.random.choice([7,9,11,13,15,17,19,21,23,25]),\
                                  np.random.choice([2,3,4,5,6])).T
    Xaugment=np.concatenate([Xoriginal,X2,X3])
    Yaugment=pd.concat([Yoriginal,Y2,Y3])
    return Xaugment,Yaugment

## Read the Data
Next, we will read the data from the pre-saved data set, "X.npy" and "Y.csv". "X.npy" saves the flux, and "Y.csv" saves the element.  
To notice, the wavelength here is different. Previously it was 2000 pixels between 2000 and 10000 Angstrom, but now it is 719 pixels between 3000 and 5200 Angstrom, the indices are 448 and 1167.  

In [6]:
Yaverage=np.array([1.49088116, 1.49097508, 1.50731088, 1.50455924, 1.51035671,
       1.50782288, 1.51765324, 1.49176123, 1.49786083, 1.49164187,
       1.51008039, 1.50649329, 1.49706889, 1.48256907, 1.50291872,
       1.49671495, 1.48266993, 1.49328473, 1.49776985, 1.48788236,
       1.50800197, 1.49640525, 1.49671263, 1.48329884, 1.50654272,
       1.51546595, 1.50350355, 1.51359888, 1.50366677, 1.51347009,
       1.49226848, 1.49165777, 1.49447523, 1.50564056, 1.50430572,
       1.4965303 , 1.48774539, 1.50293614, 1.50234283, 1.49455285,
       1.49912642, 1.49035065, 1.49374915, 1.48790414, 1.49469701,
       1.51057406, 1.49123219, 1.49597135, 1.50784911, 1.49772004,
       1.5341486 , 1.5098752 , 1.5031406 , 1.48253015, 1.48185943,
       1.5142945 , 1.51335541, 1.49587574, 1.5064204 , 1.4859998 ,
       1.49735422, 1.49443917, 1.49713147, 1.50904882, 1.50430961,
       1.48899186, 1.4843948 , 1.50350289, 1.51124936, 1.50585086,
       1.51427044, 1.49639282, 1.50156886, 1.50700966, 1.49711655,
       1.50462769, 1.48889458, 1.50254653, 1.48807825, 1.49757542,
       1.20827367, 1.48634746, 1.49944391, 1.51260678, 1.19763427,
       1.51552949, 1.50722289, 1.49200582, 1.20885273, 1.48434282,
       1.49319914, 1.49298437])
Ystd=np.array([0.85998959, 0.86814719, 0.8687621 , 0.87050864, 0.86075304,
       0.86440532, 0.86272026, 0.86967863, 0.8626639 , 0.86734201,
       0.86057179, 0.87010535, 0.87411794, 0.8643628 , 0.86209518,
       0.86797349, 0.86725306, 0.85800528, 0.86381744, 0.86611775,
       0.8669094 , 0.86762243, 0.8675576 , 0.86361385, 0.86510062,
       0.8652972 , 0.86980369, 0.86254431, 0.87056344, 0.86437873,
       0.86389094, 0.87339658, 0.86688045, 0.86508027, 0.87134972,
       0.86946127, 0.86630519, 0.8612666 , 0.86535052, 0.86876499,
       0.85862661, 0.8706371 , 0.86687728, 0.86952742, 0.8607824 ,
       0.86498409, 0.85954263, 0.85826469, 0.86454108, 0.86876391,
       0.86799523, 0.86188565, 0.86329134, 0.86601738, 0.87293888,
       0.86968641, 0.86252809, 0.87444087, 0.86285946, 0.8640128 ,
       0.86677515, 0.87035309, 0.86068416, 0.86235551, 0.86720743,
       0.86189559, 0.86178644, 0.86734348, 0.86053826, 0.86848727,
       0.87080408, 0.86002404, 0.86218669, 0.85973503, 0.85875187,
       0.87255914, 0.86074365, 0.86338974, 0.85837912, 0.86682464,
       0.73932632, 0.85765432, 0.86773346, 0.87239269, 0.73557205,
       0.86274201, 0.87080225, 0.86799464, 0.73390453, 0.86346123,
       0.87181945, 0.87096847])

In [27]:
X=np.load('DataSet/X.npy')
X=X[:,448:1167,:]
Y=np.array(pd.read_csv('DataSet/Y.csv',index_col=0))
wave=np.load('DataSet/wave.npy')
wave2=wave[448:1167]
X=X/np.average(X[52:718,167:269])
Y=(Y-Yaverage)/Ystd
Y=np.tanh(Y)/2+0.5
Y=pd.DataFrame(Y,columns=[str(i)+'_'+str(j) for i in range(6,29) for j in range(1,5)])

## Read the data part 2
This part is the original part to read the data into the python program.  

In [None]:
data=[]
for i in [1]:
    data.append(pd.read_csv('csvs/'+str(i)+'.csv'))

In [24]:
specdirs=['Spec/'+str(i)+'/*' for i in [1]]
specs=[]
speccounts=[]
for i in specdirs:
    specsmall=[]
    speccountsmall=[]
    for j in glob.glob(i):
        k=np.genfromtxt(j)
        specsmall.append(k)
        speccountsmall.append(int(j.split('/')[2].split('.txt')[0]))
    print(i)
    specs.append(specsmall)
    speccounts.append(speccountsmall)

Spec/1/*


In [None]:
wave2=wave[448:1167]
X=[]
Y=[]
Yaux=[]
for i in range(len(specs)):
    for j in range(len(specs[i])):
        spdata=specs[i][j]
        spdata=Normalizer(spdata,longwave=5000,shortwave=3000)
        if np.max(spdata[:,1])>30:continue
        X.append(spdata[448:1167,1])
        Y.append(np.array(data[i].iloc[speccounts[i][j],3:]))
        Yaux.append(np.array(data[i].iloc[speccounts[i][j],:3]))

X=np.array(X)
X=X.reshape(X.shape[0],719,1)
Y=np.array(Y)
Yaux=np.array(Yaux)
YRph=Yaux[:,1]*Yaux[:,2]
Yaux=pd.DataFrame(Yaux,columns=['Lumi','Time','Vph'])
Yaux['Rph']=YRph

Y=(Y-Yaverage)/Ystd
Y=np.tanh(Y)/2+0.5
Y=pd.DataFrame(Y,columns=[str(i)+'_'+str(j) for i in range(6,29) for j in range(1,5)])

YauxAver=np.array([8.68,19.5,6900,133000])
YauxStd=np.array([1.734*10**-2,2.0255,729.43,1781])
Yaux=(Yaux-YauxAver)/YauxStd
Yaux=np.tanh(Yaux)/2+0.5
Yaux=pd.DataFrame(Yaux,columns=['Lumi','Time','Vph','Rph'])

## Separate and Augment
This step is to separate the data into training dataset and testing dataset.  
Then, use the data augmentation function onto the training dataset and the testing dataset, twice.  

In [None]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,train_size=0.9)
X_train2,Y_train2=DataAugmenter(X_train,Y_train)
X_train3,Y_train3=DataAugmenter(X_train2,Y_train2)

X_test2,Y_test2=DataAugmenter(X_test,Y_test)
X_test3,Y_test3=DataAugmenter(X_test2,Y_test2)

  # This is added back by InteractiveShellApp.init_path()


## The Selected Spectra
Here, I listed all the spectra that will be used when training.  
Soon after a model is trained, these 11 spectra will be inserted for prediction, and plot onto the truth-prediction plot.  
If the real supernova contains a certain element more than we expected, the prediction will return a value which is extremely close to the upper limit of parameter space.  
So, in that case, we may generate some other spectra and expand the parameter space.  --well, such accident happened on one or two element-zones and I am not aware of that after 60k spectra are generated.  
That may not big issue, as the predicted element abundances can still synthesize spectra close to the observations, whatever, I don't have enough computational hour to take care of that.  

All these spectra are available on WISeREP, except SN2011by, which is presented by Professor Ryan J. Foley from University of California at Santa Cruz.  
The redshift data are also from WISeREP.  
The extinction values and models are from the relating literatures mentioned on my paper.  

In [None]:
spectralist=[
    'ObserveSpectra/Prediction/SN2011fe/SN2011fe_0.4d.dat',
    'ObserveSpectra/Prediction/SN2011fe/SN2011fe_-2.6d.dat',
    'ObserveSpectra/Prediction/SN2011fe/SN2011fe_3.7d.dat',
    'ObserveSpectra/Prediction/SN2013dy/SN2013dy_-3.1d.flm',
    'ObserveSpectra/Prediction/SN2013dy/SN2013dy_-1.1d.flm',
    'ObserveSpectra/Prediction/SN2013dy/SN2013dy_0.9d.flm',
    'ObserveSpectra/Prediction/SN2013dy/SN2013dy_3.9d.flm',
    'ObserveSpectra/Prediction/SN2011iv/SN2011iv_0.6d.flm',
    'ObserveSpectra/Prediction/SN2015F/SN2015F_-2.3d.flm',
    'ObserveSpectra/Prediction/ASASSN-14lp/ASASSN-14lp_-4.4d.flm',
    'ObserveSpectra/Prediction/SN2011by/SN2011by_-0.4d.flm']
ext1list=[CCM89(Rv=3.1),CCM89(Rv=3.1),CCM89(Rv=3.1),
         CCM89(Rv=3.1),CCM89(Rv=3.1),CCM89(Rv=3.1),CCM89(Rv=3.1),
         CCM89(Rv=3.1),CCM89(Rv=3.1),CCM89(Rv=3.1),F99(Rv=3.1)]
ext2list=[GCC09_MWAvg(),GCC09_MWAvg(),GCC09_MWAvg(),
         GCC09_MWAvg(),GCC09_MWAvg(),GCC09_MWAvg(),GCC09_MWAvg(),
         GCC09_MWAvg(),GCC09_MWAvg(),GCC09_MWAvg(),GCC09_MWAvg()]
ext1EbvList=[0,0,0,0.206,0.206,0.206,0.206,0,0.035,0.33,0.039]
ext2EbvList=[0,0,0,0.135,0.135,0.135,0.135,0,0.175,0.021,0.013]
zlist=[0.000804,0.000804,0.000804,0.00389,0.00389,0.00389,0.00389,0.006494,0.0049,0.0051,0.002843]
snname=['SN2011fe_0.4d','SN2011fe_-2.6d','SN2011fe_3.7d','SN2013dy_-3.1d','SN2013dy_-1.1d',
        'SN2013dy_0.9d','SN2013dy_3.9d','SN2011iv_0.6d','SN2015F_-2.3d','ASASSN-14lp_-4.4d','SN2011by_-0.4d']

## Store the testing data
It is quite a special part comparing to other deep learing paradigms, as we are using the testing dataset to estimate the one-sigma error.  
So, we just store the testing dataset beforehands, and they will be used in the prediction part.  

In [None]:
Y_test.to_csv('DataCache/NaiveRun/Ytest.csv')
np.save('DataCache/NaiveRun/Xtest.npy',X_test)

# Let the DL Run! 
Firstly, I store all the deep learning network structures into a "NiceModels.py" file, please check its existence.  
In the cell below, I choose the multi-residual neural network with 7 cells to run --although it shows a 6 in the CellNumber option, **there is one additional cell not explicitly shown, don't get cheated**.  

As there are only 1000 spectra available, please save the output models and pictures into NaiveRun folder, and please do not overwrite the HunKRun models, they are really trained on 100k spectra.  
When the training starts, the program will print the model structure using an integrated function in keras. The learning rates, batch sizes are not written in the UltraDenseResNeuro function, you may need to handle the NiceModels.py manually if these hyperparameters are important for your task.  
There are also some other pre-stacked neural networks in the NiceModels.py, DensResNeuro is a MRNN with fixed number of cells, ConNeuro is a plain CNN with variable number of cells, SimpleConNeuro is a plain CNN with no cell structure **Also to notice here, don't get cheated and I am sorry for the misleading arguments shown in these functions**, SimpleUltraDenseResNeuro is a MRNN with no cell structure (and it seems only one adding action in that network). As for the SimpleUltraDenseConcatNeuro and UltraDenseConcatNeuro, they are the dense connected residual networks, but I didn't try to add bottleneck components.  
Be careful, it may occupy 8 giga-byte memory when training.  

Thanks for the keras's design, I can use the same neural network function, and keras can automatically define the dimension of each layers. 

In [None]:
%run -i NiceModels.py

In [None]:
def ResPlotter(Yout,Yreal,res,elemzone):#Yreal=Y_test[k]
    plt.scatter(Yout,Yreal,label='Test Date')
    plt.xlim(0,1)
    plt.ylim(0,1)
    plt.xlabel('Predict')
    plt.ylabel('Real')
    fig=plt.gcf()
    fig.set_size_inches(10,10)
    plt.plot([0,3],[0,3],c='r',label='Reference')
    UpperSigma=[]
    LowerSigma=[]
    for i in np.linspace(0.01,0.99,num=1000):
        lowersigma,uppersigma=OneSigmaCalculator(Yout=Yout,Yreal=Y_test[k],center=i,window=0.02)
        UpperSigma.append(uppersigma)
        LowerSigma.append(lowersigma)
    plt.plot(np.linspace(0.01,0.99,num=1000),np.array(UpperSigma),c='k',linewidth=4,label=r'1$\sigma$ Upper Limit')
    plt.plot(np.linspace(0.01,0.99,num=1000),np.array(LowerSigma),c='violet',linewidth=4,label=r'1$\sigma$ Lower Limit')
    plt.legend(loc='best')
    plt.title('Residual Plot of '+k+', MSE is '+str(res))
    return

In [None]:
def NewPredictor(model,spectra,ext1=CCM89(Rv=3.1),ext2=GCC09_MWAvg(),ext1bv=0,ext2bv=0,z=0):
    #To notice, the ext1 is the extinction of the host galaxy, while the ext2 is the extinction of milky way. 
    spectra[:,1]=spectra[:,1]/ext2.extinguish(spectra[:,0]*u.AA,Ebv=ext2bv)
    spectra[:,0]=spectra[:,0]/(1+z)
    spectra[:,1]=spectra[:,1]/ext1.extinguish(spectra[:,0]*u.AA,Ebv=ext1bv)
    fw=interp1d(spectra[:,0],spectra[:,1],fill_value='extrapolate')
    flux=fw(wave2)
    spnew=np.array([wave2,flux]).T
    spnew=Normalizer(spnew)
    flux=spnew[:,1]
    Yout=model.predict(flux.reshape(1,719,1))
    return Yout

## Select the element-zone
Also, not all the models will be runned, because I use a filter in HSTMdSaver/SmallRun. In that directory, I stored $23\times4=92$ models, which are trained on 10000 spectra, there name observes element_zone.  
In the DataRes.csv, I recorded the performance of these 92 models, and they are tested on 1836 synthesized spectra. I always use it as a benchmark to determine which element_zone combination shall be trained when the dataset is becoming too large and training time is becoming too long.  
Finally, I choose 34 element_zone for the real training.  

In [6]:
ProberRes=pd.read_csv('HSTMdSaver/SmallRun/DataRes.csv')
RawElems=[str(i)+'_'+str(j) for i in range(6,29) for j in [1,2,3,4]]
ChosenElem=[]
for k in RawElems:
    if ProberRes[k][0]>0.1:continue
    ChosenElem.append(k)

## Now, the deep leanring starts
Before running, please make sure the existence of directory "HSTMdSaver/NaiveRun/" and "HSTplotout/NaiveRun/" and make sure they are empty.  
The model will be stored in HSTMdSaver directory, and the relating pictures will be stored in plotout directory.  
In the HSTplotout directory, several plots of the testing dataset predictin-truth and the predictions on the 11 observed SNe will be generated just after a neural network is trained out.  
If you have 2 GPUs, it is okay to choose "usegpu=True". If you have only one GPU or only CPUs, please choose "usegpu=False".  

In [None]:
%run -i NiceModels.py
ElemZones=[str(i)+'_'+str(j) for i in range(6,29) for j in range(1,5)]
ElemRes=pd.DataFrame(np.zeros([1,92]),columns=ElemZones)
ProberRes=pd.read_csv('HSTMdSaver/SmallRun/DataRes.csv')
PredData=pd.DataFrame(np.zeros([11,35]),columns=['NameList']+ChosenElem)
PredData['NameList']=snname
for k in ChosenElem:
    model,his1,his2=UltraDenseResNeuro(NameShell=k,CellNumber=6,X_train=X_train3,Y_train=Y_train3,X_test=X_test,Y_test=Y_test,usegpu=True)
    model.save('HSTMdSaver/NaiveRun/'+k+'.hdf')
    res=model.evaluate(X_test,Y_test[k])
    ElemRes[k]=res
    Yout=model.predict(X_test,batch_size=2000)
    for i in range(11):
        spr=np.genfromtxt(spectralist[i])
        Ypred=NewPredictor(model,spr,ext1list[i],ext2list[i],ext1EbvList[i],ext2EbvList[i],z=zlist[i])
        plt.scatter(Ypred[0],Ypred[0],marker='^',s=200,zorder=10,c='k')
    ResPlotter(Yout=Yout,Yreal=Y_test[k],res=res,elemzone=k)
    plt.savefig('HSTplotout/NaiveRun/'+k+'.png')
    plt.savefig('HSTplotout/NaiveRun/'+k+'.eps')
    plt.savefig('HSTplotout/NaiveRun/'+k+'.pdf')
    plt.close('all')
ElemRes.to_csv('HSTMdSaver/NaiveRun/DataRes.csv')
PredData.to_csv('HSTMdSaver/NaiveRun/Prediction.csv')

# Okay, Finished Here.  