# Configure

In [1]:
""" Use the following commented out code if on colab:
from google.colab import drive
drive.mount('/content/drive')
import sys
sys.path.append('/content/drive/My Drive/Colab Notebooks')
"""

import os

import numpy as np
import pandas as pd
from sklearn.metrics import mean_squared_error, r2_score
import tensorflow as tf
from tensorflow import keras
from time import perf_counter

from get_Chen_data import get_Chen_data, load_a
from quasar_analysis import model_mean_predict_bins

BEST_MODEL_FILENAME_INTERMEDIATE = os.path.join(
    'models', 'Best_models', 'Best_ANN_intermediate_stage.h5')
CHECKPOINT_FILENAME_INTERMEDIATE = os.path.join(
    'models', 'Checkpoints', 'Checkpoint_intermediate_stage.h5')
BEST_MODEL_FILENAME_FINAL = os.path.join('models', 'Best_models',
                                         'Best_ANN_final_stage.h5')
CHECKPOINT_FILENAME_FINAL = os.path.join(
    'models', 'Checkpoints', 'Checkpoint_final_stage.h5')

In [2]:
(quasars, spectra,
    X_train, X_val, X_test, y_train, y_val, y_test) = get_Chen_data()
spec_length = X_test.shape[1]

Loading quasars and spectra
Forming X and y for train, val, test
Completed.


# Create ML model

In [3]:
model = keras.models.Sequential([
    keras.layers.Conv1D(32, kernel_size=100, strides=60,
                        activation="relu", input_shape=(spec_length, 1,)),
    keras.layers.Conv1D(64, kernel_size=25, activation="relu"),
    keras.layers.BatchNormalization(),
    keras.layers.Conv1D(128, kernel_size=25, strides=5,
                            activation="relu"),
    keras.layers.LSTM(128, return_sequences=True),
    keras.layers.LSTM(128),
    keras.layers.Dense(128),
    keras.layers.Dense(1)   
])

In [None]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

checkpoint_cb = (keras.callbacks.
                 ModelCheckpoint(CHECKPOINT_FILENAME_INTERMEDIATE,
                                                save_best_only=True))
optimizer = keras.optimizers.Nadam(lr=1e-3)  
model.compile(loss="mse", optimizer=optimizer)
history = model.fit(np.expand_dims(X_train.to_numpy(), -1),
                    y_train.to_numpy(),
                    epochs=20,
                    validation_data=(np.expand_dims(X_val.to_numpy(), -1), 
                                     y_val.to_numpy()),
                    callbacks=[checkpoint_cb])

Train on 29040 samples, validate on 8262 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20

In [None]:
"""The best set of weights came from a slightly different file format,
which was changed to avoid having to post a pickle file on the web.
"""
model = keras.models.load_model(BEST_MODEL_FILENAME_INTERMEDIATE)
y_val_pred = np.squeeze(model.predict(np.expand_dims(X_val, -1)))
mse = mean_squared_error(y_val, y_val_pred)
r2 = r2_score(y_val, y_val_pred)
print(f'For the validation set, mse = {mse:.04f} and r2 = {r2:.04f}')
print(f'This mse corresponds to a standard error of {np.sqrt(mse):.04f}')

In [None]:
np.random.seed(42)
tf.random.set_seed(42)

checkpoint_cb = keras.callbacks.ModelCheckpoint(CHECKPOINT_FILENAME_FINAL,
                                                save_best_only=True)
optimizer = keras.optimizers.Nadam(lr=1e-5)  
model.compile(loss="mse", optimizer=optimizer)
history = model.fit(np.expand_dims(X_train.to_numpy(), -1),
                    y_train.to_numpy(),
                    epochs=10,
                    validation_data=(np.expand_dims(X_val.to_numpy(), -1), 
                                     y_val.to_numpy()),
                    callbacks=[checkpoint_cb])

## Analyse and predict by object

In [None]:
model = keras.models.load_model(BEST_MODEL_FILENAME_FINAL)
y_val_pred = np.squeeze(model.predict(np.expand_dims(X_val, -1)))
mse = mean_squared_error(y_val, y_val_pred)
r2 = r2_score(y_val, y_val_pred)
print(f'For the validation set, mse = {mse:.04f} and r2 = {r2:.04f}')
print(f'That mse corresponds to a standard error of {np.sqrt(mse):.04f}')
y_test_pred = np.squeeze(model.predict(np.expand_dims(X_test, -1)))
mse = mean_squared_error(y_test, y_test_pred)
r2 = r2_score(y_test, y_test_pred)
print(f'\nFor the test set, mse = {mse:.04f} and r2 = {r2:.04f}')
print(f'That mse corresponds to a standard error of {np.sqrt(mse):.04f}')

In [None]:
"""Here are the bins used to stratify the objects by train/val/test in
creating X_train, ..., y_train, ...
"""

strata_bin_edges = [1, 2, 3, 60, np.inf]
model_mean_predict_bins(quasars, model, X_train, y_train, strata_bin_edges,
                        title="Train")
model_mean_predict_bins(quasars, model, X_val, y_val, strata_bin_edges,
                        title="Val")
model_mean_predict_bins(quasars, model, X_test, y_test, strata_bin_edges,
                        title="Test")

In [None]:
"""Here is a slightly more detailed set of bins"""

more_detailed_bin_edges = [1, 2, 3, 4, 5, 6, 10, 60, np.inf]
model_mean_predict_bins(quasars, model, X_train, y_train,
                        more_detailed_bin_edges, title="Train")
model_mean_predict_bins(quasars, model, X_val, y_val,
                        more_detailed_bin_edges, title="Val")
model_mean_predict_bins(quasars, model, X_test, y_test, more_detailed_bin_edges,
                        title="Test")