In [6]:
import numpy as np
import glob
import os
from opac_mixer.read import ReadOpacChubb
from opac_mixer.emulator import Emulator
from opac_mixer.emulator import DataIO

import matplotlib.pyplot as plt

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error
from ipywidgets import interact, IntSlider, FloatSlider, Dropdown
%matplotlib widget

In [8]:
R='S1'
base = f'{os.environ["pRT_input_data_path"]}/opacities/lines/corr_k'
files = glob.glob(os.path.join(base,f'*_R_{R}/*.h5'))
batchsize = int(2**19)

load = False
load_model = False

In [9]:
opac = ReadOpacChubb(files)
emulator = Emulator(opac, filename_data=f'data/dataset_{R}_{batchsize}.h5')

In [None]:
if load:
    X_train, X_test, y_train, y_test = emulator.load_data()
else:
    emulator.setup_sampling_grid(batchsize=batchsize)
    X_train, X_test, y_train, y_test = emulator.setup_mix(do_parallel=True)

 16%|█▌        | 82401/524288 [03:58<20:42, 355.71it/s]

In [5]:
emulator.setup_model(load=load_model, filename=f'data/model_R_{R}_{batchsize}', tree_method='gpu_hist', max_depth=16, n_estimators=30, subsample=1.0)

In [6]:
emulator.fit()

[0]	validation_0-rmse:11.03853	validation_1-rmse:11.03013
[1]	validation_0-rmse:7.74080	validation_1-rmse:7.73519
[2]	validation_0-rmse:5.43203	validation_1-rmse:5.42855
[3]	validation_0-rmse:3.81601	validation_1-rmse:3.81410
[4]	validation_0-rmse:2.68519	validation_1-rmse:2.68470
[5]	validation_0-rmse:1.89434	validation_1-rmse:1.89547
[6]	validation_0-rmse:1.34211	validation_1-rmse:1.34517
[7]	validation_0-rmse:0.95783	validation_1-rmse:0.96359
[8]	validation_0-rmse:0.69093	validation_1-rmse:0.70028
[9]	validation_0-rmse:0.50689	validation_1-rmse:0.52106
[10]	validation_0-rmse:0.38132	validation_1-rmse:0.40162
[11]	validation_0-rmse:0.29643	validation_1-rmse:0.32393
[12]	validation_0-rmse:0.23971	validation_1-rmse:0.27491
[13]	validation_0-rmse:0.20240	validation_1-rmse:0.24497
[14]	validation_0-rmse:0.17774	validation_1-rmse:0.22682
[15]	validation_0-rmse:0.16120	validation_1-rmse:0.21579
[16]	validation_0-rmse:0.14993	validation_1-rmse:0.20900
[17]	validation_0-rmse:0.14200	validati

In [7]:
y_predict_test = emulator.predict(emulator.X_test, shape='same')
y_predict_test_scaled = emulator.model.predict(emulator.X_test)
print('r2score on test:', r2_score(y_true=emulator.y_test, y_pred=y_predict_test))
print('mean squared error on test:', np.sqrt(mean_squared_error(y_true=emulator.y_test, y_pred=y_predict_test)))
print('mean absolute percentage error on :', mean_absolute_percentage_error(y_true=emulator.y_test, y_pred=y_predict_test))

print('r2score on scaled test:', r2_score(y_true=emulator.output_scaling(emulator.y_test), y_pred=y_predict_test_scaled))
print('mean squared error on scaled test:', np.sqrt(mean_squared_error(y_true=emulator.output_scaling(emulator.y_test), y_pred=y_predict_test_scaled)))
print('mean absolute percentage error on scaled test:', mean_absolute_percentage_error(y_true=emulator.output_scaling(emulator.y_test), y_pred=y_predict_test_scaled))

r2score on test: 0.9611614324935208
mean squared error on test: 5.230764989206132
mean absolute percentage error on : 46.49485874729342
r2score on scaled test: 0.9956844439150501
mean squared error on scaled test: 0.1896734938352187
mean absolute percentage error on scaled test: 0.08769144571545857


In [None]:
y_predict_train = emulator.predict(emulator.X_train, shape='same')
y_predict_train_scaled = emulator.model.predict(emulator.X_train)

print('r2score on train:', r2_score(y_true=emulator.y_train, y_pred=y_predict_train))
print('mean squared error on train:', np.sqrt(mean_squared_error(y_true=emulator.y_train, y_pred=y_predict_train)))
print('mean absolute percentage error on train:', mean_absolute_percentage_error(y_true=emulator.y_train, y_pred=y_predict_train))

print('r2score on scaled train:', r2_score(y_true=emulator.output_scaling(emulator.y_train), y_pred=y_predict_train_scaled, force_finite=True))
print('mean absolute error on scaled train:', np.sqrt(mean_squared_error(y_true=emulator.output_scaling(emulator.y_train), y_pred=y_predict_train_scaled)))
print('mean absolute percentage error on scaled train:', mean_absolute_percentage_error(y_true=emulator.output_scaling(emulator.y_train), y_pred=y_predict_train_scaled))

r2score on train: 0.9988487232574982


In [None]:
y_predict = {'test': emulator.reshape(y_predict_test), 'train': emulator.reshape(y_predict_train)}
y_true = {'test':emulator.reshape(emulator.y_test), 'train': emulator.reshape(emulator.y_train)}

In [None]:
fig_tes, ax_tes = plt.subplots(1,1)
def plot_single_prediction(index_frac, mode):
    ax_tes.cla()
    index = np.array([int(index_frac*len(y_predict[mode]))])
    for fi in range(y_predict[mode].shape[1]):
        x = opac.bin_edges[fi]+opac.weights.cumsum()*(opac.bin_edges[fi+1]-opac.bin_edges[fi])
        lp, = ax_tes.loglog(x,y_predict[mode][index][0,fi,:], color ='red', alpha=0.5)
        lt, = ax_tes.loglog(x,y_true[mode][index][0,fi,:], color = 'blue', alpha=0.5)
        
    # ax_tes.set_yscale('linear')
    ax_tes.set_ylabel(r'$\kappa$ [cm2/g]')
    ax_tes.set_xlabel(r'wavenumber')
    ax_tes.set_title(f'{mode} data, {index}')
    ax_tes.legend([lp,lt],['prediction','true'])

interact(plot_single_prediction, index_frac=FloatSlider(min=0., max=.99, step=0.05, value=0.), mode=Dropdown(options=['train','test']));

In [None]:
fig_t, ax_t = plt.subplots(1,1) 
lt, = ax_t.plot(y_predict['test'][:,0,0], y_true['test'][:,0,0], 'ro', ms=0.01, linestyle="None")
ax_t.set_xscale('log')
ax_t.set_yscale('log')
dt, = ax_t.plot([y_predict['test'][:,0,0].min(), y_predict['test'][:,0,0].max()], [y_predict['test'][:,0,0].min(), y_predict['test'][:,0,0].max()], color = 'gray', ls = '--')
ax_t.set_ylabel('true values')
ax_t.set_xlabel('predicted values')
    

def update_prediction_test(freqi, gi, mode):
    lt.set_ydata(y_true[mode][:,freqi,gi])
    lt.set_xdata(y_predict[mode][:,freqi,gi])    
    dt.set_ydata([y_predict[mode][:,freqi,gi].min(), y_predict[mode][:,freqi,gi].max()])
    dt.set_xdata([y_predict[mode][:,freqi,gi].min(), y_predict[mode][:,freqi,gi].max()])
    ax_t.set_xlim([y_true[mode][:,freqi,gi].min(),y_true[mode][:,freqi,gi].max()])
    ax_t.set_ylim([y_true[mode][:,freqi,gi].min(),y_true[mode][:,freqi,gi].max()])        
    ax_t.set_title(f'{mode} data variance')
    fig_t.canvas.draw_idle()


interact(update_prediction_test, freqi=IntSlider(min=0, max=opac.lf[0]-1, step=1, value=0),gi=IntSlider(min=0, max=opac.lg[0]-1, step=1, value=0), mode=Dropdown(options=['train','test']));