In [1]:
from exobaconn import read_grid, add_noise, bCNN, normalize, load_cnn, true_vs_pred
import numpy as np
import matplotlib.pyplot as plt
import tensorflow.keras as tfk
plt.rc('font', size=14)
from sklearn.preprocessing import MinMaxScaler
from tqdm import trange
import pickle

md = pickle.load(open('../Data/metadata.p', 'rb'))

## Data preparation

### Define the locations of the files with the spectra and the parameters

First define the instrument and the type of retrieval.

In [2]:
Type = 2
instrument = 'WFC3'

In [3]:
f_trans = '../Data/Training_Sets/trans_type'+ str(Type)+'.npy'
f_param = '../Data/Training_Sets/parameters_type'+ str(Type)+'.npy'
noise   = md['Noise'][instrument]
wvl     = md['Wavelength']['NIRSPEC']
new_wvl = md['Wavelength'][instrument]

### Define the number of copies to make of each spectra and the number of examples to use for training and testing

`N_aug` defines the number of noisy copies to make for each spectrum in the training and testing sets.

In [5]:
N_aug  = [20, 1]
ntrain = {1:68000, 2:64000}[Type]
ntest  = 8000

### Create the training and testing datasets, and add noise

In [6]:
X_train, Y_train, X_test, Y_test = read_grid(f_trans, f_param, wvl=wvl, new_wvl=new_wvl, n_train=ntrain, n_test=ntest, 
                                             N_aug=N_aug)

X_train = 0.752**2*add_noise(X_train, noise, floor=5)
X_test  = 0.752**2*add_noise(X_test , noise, floor=5)

Loading files...
Rebinning spectra
X_train...
Y_train...
X_test...
Y_test...


### Normalize the spectra and the parameters

In [7]:
X_train_n = normalize(X_train, method='-mean', conc=2)
X_test_n  = normalize(X_test, method='-mean', conc=2)

Y_scaler  = MinMaxScaler().fit(Y_train)
Y_train_n = Y_scaler.transform(Y_train)
Y_test_n  = Y_scaler.transform(Y_test)

Save the scaling of the parameters to be able to convert the normalized retrieved parameters into natural units.

In [8]:
pickle.dump(Y_scaler, open('../Data/yscaler_type'+str(Type)+'.p', 'wb'))

## Neural Network

### Define the architecture of the neural network

`arch_conv` is a list of tuples that defines the convolutional layers of the network. Each tuple contains the number of filters and the size of the filters in each layer.

`arch_fc` is a list with the number of neurons of each fully connected layer.

In [9]:
arch_conv = [(16, 17), (32, 9), (64, 7)]
arch_fc   = [128]

### Create the neural network

In [10]:
cnn = bCNN(X_train_n.shape[1], Y_train_n.shape[1], arch_conv, arch_fc,  
           arch='cnn', activation=tfk.layers.ReLU(), act_mu='sigmoid', 
           loss='chol', maxpool=True, bn=False, ncols=2)

### Train the neural network

In [11]:
history = cnn.train(X_train_n, Y_train_n, patience=10, epochs=500, lr=1e-3, 
                    fname='../Data/CNN_weights/'+instrument+'_type'+str(Type))

Epoch 1/500
 121/2250 [>.............................] - ETA: 36s - loss: -7.7446

KeyboardInterrupt: 