In [1]:
import pandas as pd
import numpy as np
from tqdm.notebook import tqdm

import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.templates.default = 'plotly_white'

import warnings
warnings.filterwarnings("ignore")

In [2]:
bdt_J19 = pd.read_csv('df_bdt_eval.csv')
bdt_J19_5M_23 = bdt_J19[(bdt_J19['opt']==23) & (bdt_J19['model']=='5M')]
bdt_J19_5M_0 = bdt_J19[(bdt_J19['opt']==0) & (bdt_J19['model']=='5M')]

In [3]:
vm = input()
if vm.lower()=='yes':
    vm=True
else:
    vm=False

yes


In [4]:
import pickle

xgbregs = []
for i in range(2):
    if i == 0:
        xgbreg = pickle.load(open("models/xgb_energy_{}.dat".format("ideal"), "rb"))
    elif i == 1:
        xgbreg = pickle.load(open("models/xgb_energy_{}.dat".format("real"), "rb"))
        
    xgbregs.append(xgbreg)

In [None]:
if vm:
    path='/mnt/cephfs/ml_data/mc_2021/'
else:
    path=''

# Rs = [0, 10.7, 13.5, 15.6, 17.2]

Rs = [0, 5, 7, 10, 11, 12, 13, 14, 15, 16, 16.5, 17.2]

y_true_all = []
y_pred_all = []
for k in tqdm(range(len(Rs))):        
    y_true_array = []
    y_pred_array = []
    for j in range(len(xgbregs)):
        if j == 0:
            name = 'ProcessedTestIdeal'
        elif j == 1:
            name = 'ProcessedTestReal'
        y_true = []
        y_pred = []
        for i in range(11):
            test = pd.read_csv('{}processed_data/{}/{}MeV.csv.gz'.format(path, name, i))
            if k < len(Rs)-1:
                test = test[test['edepR'] > Rs[k]]
                test = test[test['edepR'] < Rs[k+1]]
            else:
                test = test[test['edepR'] < 17.2]
            edep = np.array(test['edep'])
            X_test = test.iloc[:, :-5]
            edep_preds = xgbregs[j].predict(np.array(X_test))

            y_true.append(edep)
            y_pred.append(edep_preds)
            
        y_true_array.append(y_true)
        y_pred_array.append(y_pred)
        
    y_true_all.append(y_true_array)
    y_pred_all.append(y_pred_array)

  0%|          | 0/12 [00:00<?, ?it/s]

In [None]:
diffs = np.array([
    [[y_pred_all[k][j][i] - y_true_all[k][j][i] for i in range(len(y_pred_array[0]))]
    for j in range(len(y_true_all[0]))]
    for k in range(len(y_true_all))
])

In [None]:
energies = np.array([1.022+i for i in range(11)]).round(5)
energies

In [None]:
names = ['J21 1M Ideal', 'J21 1M Real', 'J19 5M Ideal', 'J19 5M Real']

In [None]:
[diffs[i][0][0].shape for i in range(len(Rs))]

In [None]:
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from scipy.stats import norm

a_all = []
errors_all = []
for j in tqdm(range(diffs.shape[0])):
    a_array = []
    errors_array = []
    for k in range(diffs.shape[1]):
        a = []
        e = []
        for i in range(diffs.shape[2]):
            fig, ax = plt.subplots()
            nbins = 150
            n, bins, patches = ax.hist(diffs[j][k][i], nbins, density=True, facecolor = 'grey', alpha = 0.5, label='before');
            plt.close(fig)
            centers = (0.5*(bins[1:]+bins[:-1]))
            pars, cov = curve_fit(lambda x, mu, sig : norm.pdf(x, loc=mu, scale=sig), centers, n, p0=[0,1])  
            a.append(pars)
            e.append(cov)
        a_array.append(a)
        errors_array.append(e)
    a_all.append(a_array)
    errors_all.append(errors_array)

In [None]:
colors = ['red', 'darkviolet', 'blue', 'green', 'black']

In [None]:
error_sigma_all = []
for ind in range(diffs.shape[0]):
    error_sigma = []
    for k in range(diffs.shape[1]):
        error = [100 * np.sqrt(errors_all[ind][k][i][1][1]) / energies[i] for i in range(len(energies))]
        error_sigma.append(error)
    error_sigma_all.append(error_sigma)

error_mu_all = []
for ind in range(diffs.shape[0]):
    error_mu = []
    for k in range(diffs.shape[1]):
        error = [100 * np.sqrt(errors_all[ind][k][i][0][0]) / energies[i] for i in range(len(energies))]
        error_mu.append(error)
    error_mu_all.append(error_mu)

In [None]:
res_all = []
bias_all = []
for ind in range(diffs.shape[0]):
    res = []
    bias = []
    for k in range(diffs.shape[1]):
        sigma = [100 * a_all[ind][k][i][1] / energies[i] for i in range(len(energies))]
        mu = [100 * a_all[ind][k][i][0] / energies[i] for i in range(len(energies))]
        res.append(sigma)
        bias.append(mu)
        
    res_all.append(res)
    bias_all.append(bias)

In [None]:
names = ['R: {}-{} m'.format(Rs[i], Rs[i+1]) for i in range(len(Rs)-1)]
names += ['R: 0-17.2 m']
leg=False

def plot_results(appr=False, ylim=2.5):
    x_lin = np.linspace(0.8, 11.5, 1000)

    fig = make_subplots(rows=2, cols=2,
                        shared_xaxes=True,
                        vertical_spacing=0.01,
                        row_width=[0.25, 0.75],
                        column_widths=[0.5, 0.5],
                        subplot_titles=("Ideal", "Real")

    )

    for i in range(len(xgbregs)):
        if i==0:
            leg=False
        else:
            leg=True
        for k in range(diffs.shape[0]):
            fig.add_trace(
                go.Scatter(
                    x=energies,
                    y=res_all[k][i],
                    mode='markers',
                    marker=dict(
                        color=colors[k],
                        symbol=k
                    ),
                    showlegend=leg,
                    error_y=dict(
                        type='data',
                        width=10,
                        array=error_sigma_all[k][i],
                        visible=True
                    ),
                    name=names[k]
                ), row=1, col=i+1
            )

            fig.add_trace(
                go.Scatter(
                    x=energies,
                    y=bias_all[k][i],
                    mode='markers',
                    showlegend=False,
                    marker=dict(
                        color=colors[k],
                        symbol=k
                    ),
                    error_y=dict(
                            type='data',
                            width=10,
                            array=error_mu_all[k][i],
                            visible=True
                    ),
                    name=names[k]
                ), row=2, col=i+1
            )
            
    if appr:
        for i in range(len(xgbregs)):
            for k in range(len(names)):
                fig.add_trace(
                    go.Scatter(
                        x=x_lin,
                        y=func(x_lin, a[:, i][k], b[:, i][k], c[:, i][k]),
                        mode='lines',
                        line=dict(
                        ),
                        opacity=0.5,
                        showlegend=False,
                        name=names[k],
                        marker=dict(
                            color=colors[k]
                        )
                    ), row=1, col=i+1
                )


    xaxis = dict(
        showline=True,
        ticks='outside',
        mirror=True,
        tick0=1,
        dtick=1,
        linecolor='black',
        showgrid=True,
        gridcolor='grey',
        gridwidth=0.25,
    )

    yaxis = lambda range: dict(
        showline=True,
        ticks='outside',
        mirror=True,
        linecolor='black',
        range=range,
        showgrid=True,
        gridcolor='grey',
        gridwidth=0.25,
        zeroline=True,
        zerolinecolor='black',
        zerolinewidth=0.25
    )

    fig.update_layout(
        xaxis3_title="Visible energy, MeV",
        xaxis4_title="Visible energy, MeV",
        yaxis1_title="Resolution, %",
    #     yaxis2_title="Resolution, %",
        yaxis3_title="Bias, %",
    #     yaxis4_title="Bias, %",

        xaxis1 = xaxis,
        xaxis2 = xaxis,
        xaxis3 = xaxis,
        xaxis4 = xaxis,
        yaxis1 = yaxis([0, ylim]),
        yaxis2 = yaxis([0, ylim]),
        yaxis3 = yaxis([-0.25, 0.25]),
        yaxis4 = yaxis([-0.25, 0.25]),

        showlegend=True,
        height=500,
        width=950,
        font=dict(
                family="Times New Roman",
                size=16,
        ),

        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1.05,
            xanchor="right",
            x=1,
            title_font_family="Times New Roman",
            font=dict(
                family="Times New Roman",
                size=16,
                color="black"
            ),
        )
    )

    fig.show()
    if appr:
        pio.write_image(fig, 'plots/appr_compare_results_for_diff_regions.pdf', height=500, width=950)
    else:
        pio.write_image(fig, 'plots/compare_results_for_diff_regions.pdf', height=500, width=950)
#     pio.write_image(fig, 'plots/compare_results_for_diff_regions.pdf', height=500, width=950)

In [None]:
plot_results()

In [None]:
def a(x, a):
    return np.sqrt((a/x**0.5)**2)


def b(x, b):
    b_list = []
    b_list.append(np.sqrt(b**2))
    return b_list*len(x)


def c(x, c):
    return np.sqrt((c/x)**2)


def func(x, a, b, c):
    return np.sqrt((a/x**0.5)**2 + b**2 + (c/x)**2) 


def approximated(x, y, yerr):
    popt, pcov = curve_fit(func, x, y, sigma=yerr, maxfev=10**9, bounds=([0, 0, 0], [5, 5, 5]))
    a, b, c = popt
    perr = np.sqrt(abs(pcov.diagonal()))

    return func(x, a, b, c), popt, perr

In [None]:
y_approximated_all = []
coefs_all = []
errors_all = []
for i in range(len(xgbregs)):
    y_approximated_array = []
    coefs_array = []
    errors_array = []
    for k in range(diffs.shape[0]):
        y_approximated, coefs, errors = approximated(
            energies[1:10], res_all[k][i][1:10], error_sigma_all[k][i][1:10])
        y_approximated_array.append(y_approximated)
        coefs_array.append(coefs)
        errors_array.append(errors)
        
    y_approximated_all.append(y_approximated_array)
    coefs_all.append(coefs_array)
    errors_all.append(errors_array)

In [None]:
a = np.array(coefs_all).T[0]
b = np.array(coefs_all).T[1]
c = np.array(coefs_all).T[2]

In [None]:
plot_results(appr=True, ylim=3.5)

In [None]:
reindex = [0, 3, 1, 4, 2, 5]
coefs_df_real = pd.DataFrame(
    np.hstack((coefs_all[1], errors_all[1]))
)[reindex]
coefs_df_real.columns = ['a', r'$\Delta a$', 'b', r'$\Delta b$', 'c', r'$\Delta c$']

coefs_df_real = coefs_df_real.round(3)
coefs_df_real.index = names
coefs_df_real[r'$\tilde{a}$'] = (
    coefs_df_real['a']**2 + (1.6 * coefs_df_real['b'])**2 + (coefs_df_real['c'] / 1.6)**2
)**0.5

coefs_df_real[r'$\Delta \tilde{a}$'] = np.sqrt(
    (coefs_df_real['a']*coefs_df_real[r'$\Delta a$'])**2 + \
    (2.56*coefs_df_real['b']*coefs_df_real[r'$\Delta b$'])**2 + \
    (coefs_df_real['c']*coefs_df_real[r'$\Delta c$'] / 2.56)**2
) / coefs_df_real[r'$\tilde{a}$']

In [None]:
coefs_df_real

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=names,
        y=coefs_df_real[r'$\tilde{a}$'][:-1],
        marker=dict(color='darkblue'),
        error_y=dict(
        type='data',
        width=10,
        array=coefs_df_real[r'$\Delta \tilde{a}$'],
        visible=True
        ),
    )
)

fig.add_hline(
    y=coefs_df_real[r'$\tilde{a}$'][-1],
    line=dict(
        dash='dash'
    )
)

fig.add_hrect(
    y0=coefs_df_real[r'$\tilde{a}$'][-1]-coefs_df_real[r'$\Delta \tilde{a}$'][-1],
    y1=coefs_df_real[r'$\tilde{a}$'][-1]+coefs_df_real[r'$\Delta \tilde{a}$'][-1],
    fillcolor="darkred",
    line_width=0,
    opacity=0.25,
)

xaxis = dict(
    showline=True,
    ticks='outside',
    mirror=True,
    tick0=1,
    dtick=1,
    linecolor='black',
    showgrid=True,
    gridcolor='grey',
    gridwidth=0.25,
)

yaxis = lambda range: dict(
    showline=True,
    ticks='outside',
    mirror=True,
    linecolor='black',
    range=range,
    showgrid=True,
    gridcolor='grey',
    gridwidth=0.25,
    zeroline=True,
    zerolinecolor='black',
    zerolinewidth=0.25
)

fig.update_layout(
    xaxis_title="Region",
    yaxis_title=r'$\tilde{a}$',
    xaxis = xaxis,
    yaxis = yaxis([2.25, 3.5]),

    showlegend=False,
    height=500,
    width=950,
    font=dict(
            family="Times New Roman",
            size=16,
    ),
)


fig.show()