# Model testing

In [None]:
import time
import timeit
import pandas as pd
import numpy as np
from plotly import graph_objects as go
import plotly.express as px
from datetime import datetime
from IPython.display import HTML
import cufflinks
import numba as nb
from scipy.optimize import least_squares, curve_fit
from sklearn.decomposition import PCA, TruncatedSVD
from sklearn.svm import SVC # Support Vector Classifier
from sklearn.neural_network import MLPRegressor
import mlflow
import os
import mlflow.keras
import mlflow.sklearn
from gewapro.cache import cache
from gewapro.preprocessing import get_waveforms, train_test_split_cond, smoothen_waveforms, get_and_smoothen_waveforms, select_from_source
from gewapro.functions import (quadratic_arr,
                               fit_parabolas,
                               df_with_fits,
                               _fit_final_slope,
                               combine_and, combine_or,
                               calc_ab)
from gewapro.plotting.base import _fwhm_energy_df
from gewapro.util import name_to_vals, pandas_string_rep, add_notes, combine_cols_with_errors
from gewapro.plotting import (histogram,
                              corr_fig,
                              mlp_reg_fig,
                              plot_transform,
                              energy_histogram,
                              box_plot,
                              plot_predictions,
                              energy_line_plot,
                              add_energy_histogram,
                              combine_line_plots,
                              combined_channel_line_plot,
                              change_combined_line_fig)
from gewapro import plotting
from gewapro.models import regressor_model, train_model, get_model_version_map, ModelInfo, fitted_PCA
import gewapro.models
from gewapro.experiment_flow import run_experiment
import mlflow.pyfunc
import xgboost as xgb
import itertools

cufflinks.go_offline()
# gewapro.models.update_validity()

# First run 'mlflow ui' in a terminal, otherwise this will not work!!!
mlflow.set_tracking_uri("http://127.0.0.1:5000")

In [None]:
# Get all data loaded
data_g1274_name = "20231110-Na22-d0-12-tz6-ML200-g1274.dat"
data_g511_name = "20231110-Na22-d0-12-tz6-ML200-g511.dat"
data_g1274_unfiltered_name = "20231110-Na22-d0-12-tz6-ML200_nofir_noMa_D40-g1274.dat"
data_g511_unfiltered_name = "20231110-Na22-d0-12-tz6-ML200_nofir_noMa_D40-g511.dat"
data_g1274_partfltr_name_all = "20231110-Na22-d0-12-tz6-ML200_noMa_D10_soft_sc_g1274_all.parquet"
data_g511_partfltr_name_all = "20231110-Na22-d0-12-tz6-ML200_noMa_D10_soft_sc_g511_all.parquet"
data_g1274_partfltr_name_all2 = "20231117-Na22-d0-12-tz6-ML200_noMa_D10_soft_sc_g1274_all.parquet"
data_g511_partfltr_name_all2 = "20231117-Na22-d0-12-tz6-ML200_noMa_D10_soft_sc_g511_all.parquet"
name_g1274_d = lambda i: f"20231110-Na22-d{i}-tz6-ML200_noMa_D10_soft_sc_g1274.dat"
name_g511_d = lambda i: f"20231110-Na22-d{i}-tz6-ML200_noMa_D10_soft_sc_g511.dat"
name2_g1274_d = lambda i: f"20231117-Na22-d{i}-12-tz6-ML200_noMa_D10_soft_sc_g1274_all.parquet"
name2_g511_d = lambda i: f"20231117-Na22-d{i}-12-tz6-ML200_noMa_D10_soft_sc_g511_all.parquet"
data_dict = {
             data_g1274_name:              (datag1274 := pd.read_csv("data/"+data_g1274_name)),
             data_g511_name:               (datag511 := pd.read_csv("data/"+data_g511_name)),
             data_g1274_unfiltered_name:   (datag1274unfiltered := pd.read_csv("data/"+data_g1274_unfiltered_name)),
             data_g511_unfiltered_name:    (datag511unfiltered := pd.read_csv("data/"+data_g511_unfiltered_name)),
             data_g1274_partfltr_name_all: (datag1274partfltr_all := pd.read_parquet("data/"+data_g1274_partfltr_name_all)),
             data_g511_partfltr_name_all:  (datag511partfltr_all := pd.read_parquet("data/"+data_g511_partfltr_name_all)),
             data_g1274_partfltr_name_all2:(datag1274partfltr_all2 := pd.read_parquet("data/"+data_g1274_partfltr_name_all2)),
             data_g511_partfltr_name_all2: (datag511partfltr_all2 := pd.read_parquet("data/"+data_g511_partfltr_name_all2)),
            } | {
                    name_g1274_d(i): select_from_source(datag1274partfltr_all, select_channels=[i]) for i in range(0,13)
            } | {
                    name_g511_d(i): select_from_source(datag511partfltr_all, select_channels=[i]) for i in range(0,13)
            } | {
                    name2_g1274_d(i): select_from_source(datag1274partfltr_all2, select_channels=[i]) for i in range(0,13)
            } | {
                    name2_g511_d(i): select_from_source(datag511partfltr_all2, select_channels=[i]) for i in range(0,13)
            }
x_to_t = lambda x: 160-(x*4)
t_to_x = lambda t: (160-t)/4

In [None]:
print("Usable waveforms:",len(datag511partfltr_all2[(datag511partfltr_all2["Ch"] != 4) & (datag511partfltr_all2["Ch"] != 6)]))
print(datag511partfltr_all2.columns)

In [None]:
# Show first 100 waveforms
dfplot = get_waveforms(select_channels=0,source_data=datag1274partfltr_all)
print("Total waveform count:",len(dfplot.columns))
dfplot85_95: pd.DataFrame = dfplot.loc[:,(dfplot.loc[199] > 0.85) & (dfplot.loc[199] < 0.95)]
dfplot85: pd.DataFrame = dfplot.loc[:,dfplot.loc[199] < 0.85]
# dfplot.iloc[:,:100].iplot(title="First 100 waveforms")
dfplot85_95.iloc[:,:100].set_index(dfplot85.index*4).iplot(title="First 100 waveforms with last value between 0.85 and 0.95",theme="white")
fig = dfplot85_95.iloc[:,:100].set_index(dfplot85.index*4).iplot(asFigure=True,theme="white").update_layout(height=400,width=700,margin=dict(l=20, r=20, t=20, b=20),showlegend=False,xaxis_title="time [ns]",yaxis_title="Normalised signal")
# fig.show()
# fig.write_image("0_Waveforms_8595_first100_partfltr.pdf")
dfplot85.iloc[:,:100].iplot(title="First 100 waveforms with last value below 0.85")
fig = dfplot85.iloc[:,:100].set_index(dfplot85.index*4).iplot(asFigure=True,theme="white").update_layout(height=400,width=700,margin=dict(l=20, r=20, t=20, b=20),showlegend=False,xaxis_title="time [ns]",yaxis_title="Normalised signal")
# fig.show()
# fig.write_image("1_Waveforms_lt85_first100_partfltr.pdf")
print(f"Final point <0.85 rate: {len(dfplot85.columns)/len(dfplot.columns):.2%} ({len(dfplot85.columns)} waveforms), 0.85-0.95 rate: {len(dfplot85_95.columns)/len(dfplot.columns):.2%} ({len(dfplot85_95.columns)} waveforms)")
# data_dict["raw"]["normalized"]["FIR"]["gBOTH"]["range_cut"].iloc[:,:50].iplot(title="First 50 partially filtered normalized waveforms E(4000-5000)(11050-11250)")
get_waveforms(source_data=data_dict[name_g511_d(0)], select_energies=(4000,5000)).iloc[:,:100].iplot(title="First 100 partially filtered normalized waveforms")

dfw = get_waveforms(source_data=data_dict[name_g511_d(0)], select_energies=(4000,5000)).iloc[:,:100]
fig = dfw.rename(columns={c:c[:c.find("]")+1] for c in dfw.columns}).set_index(dfplot85.index*4).iplot(asFigure=True,theme="white").update_layout(height=400,width=700,margin=dict(l=20, r=20, t=20, b=20),showlegend=False,xaxis_title="time [ns]",yaxis_title="Normalised signal")
# fig.show()
# fig.write_image("2_Waveforms_fine_first100_partfltr.pdf")
dfplot85.iloc[:,100:].iplot(title="First 100 partially filtered normalized waveforms with final_val<0.85 removed")

In [None]:
s_diff = datag511partfltr_all["dT"] - datag511partfltr_all["Tfit"]
s_diff.name = "dT - Tfit"
# display(pd.concat([datag511partfltr_all,s_diff],axis=1).loc[datag511partfltr_all["Ch"] == 1, :])
histogram(pd.concat([datag511partfltr_all,s_diff],axis=1).loc[datag511partfltr_all["Ch"] == 3, [s_diff.name,"Tfit","dT","T0"]],bins=[-30,30,1])

print(ModelInfo.from_database(model_name="MLPRegressorModel",model_version=5))
print(ModelInfo.from_database(model_name="MLPRegressorModel",model_version=2992))
# print(ModelInfo.from_database("MLPRegressorModel",model_version=2993)) # 2993-2996 do not exist
print(energy_line_plot.cache_info())

In [None]:
# PCAcomp.  BEST / WORST (g511)     BEST / WORST (g1274)
# 20:       2353 / 2355             2373 / 2370
# 21:       2360 / 2357             2374 / 2379
# 22:       2361 / 2365             2383 / 2384
# df_results = pd.DataFrame({"model":[2353, 2360, 2361, 2373, 2374, 2383, 2355, 2357, 2365, 2370, 2379, 2384],
#                            "PCA components":[20, 21, 22]*4,"_g511": [np.nan]*12,"FWHM_g1274": [np.nan]*12,
#                            "FWHM_g511 (450-600)": [np.nan]*12,"FWHM_g1274 (450-600)": [np.nan]*12,
#                            "trained_on":(["g511"]*3+["g1274"]*3)*2,
#                            }).set_index("model")#rename_axis("limbs", axis="columns")
# for model in df_results.index:
#     model_pred_fig_511 = plot_predictions(data_g511_name, (11050,11250), model, data_dict, "MLPRegressorModel", False)
#     model_pred_fig_1274 = plot_predictions(data_g1274_name, (4000,5000), model, data_dict, "MLPRegressorModel", False)
#     model_pred_fig_511_515 = plot_predictions(data_g511_name, (450,600), model, data_dict, "MLPRegressorModel", False)
#     model_pred_fig_1274_515 = plot_predictions(data_g1274_name, (450,600), model, data_dict, "MLPRegressorModel", False)
#     df_results.loc[model, "FWHM_g511"] = model_pred_fig_511._params["dT_act - dT_pred Gaussian"]["sigma"]*2*np.sqrt(2*np.log(2))
#     df_results.loc[model, "FWHM_g1274"] = model_pred_fig_1274._params["dT_act - dT_pred Gaussian"]["sigma"]*2*np.sqrt(2*np.log(2))
#     df_results.loc[model, "FWHM_g511 (450-600)"] = model_pred_fig_511_515._params["dT_act - dT_pred Gaussian"]["sigma"]*2*np.sqrt(2*np.log(2))
#     df_results.loc[model, "FWHM_g1274 (450-600)"] = model_pred_fig_1274_515._params["dT_act - dT_pred Gaussian"]["sigma"]*2*np.sqrt(2*np.log(2))
# display(df_results)

from functools import partial

def lineariser(df: pd.DataFrame, lin_term: float, bias: float = 0):
    return (df.max().values * lin_term) + bias

def part_lin(lin_term: float, bias: float = 0):
    return partial(lineariser, lin_term=lin_term, bias=bias) #custom_func=part_lin(0.01)

#  511 * x - 17.8370 + y = 0  -   (0.034906 * 511 = )
# 1274 * x +  8.6423 + y = 0  +     ()
# =============================
#  763 * x + 26.4793     = 0    -> x,y = -0.034704194,35.57084313
import mlflow.pyfunc
regressor = mlflow.pyfunc.load_model(model_uri=f"models:/MLPRegressorModel/2398")
# print(isinstance(regressor, mlflow.pyfunc.PyFuncModel), str(regressor.loader_module) == "mlflow.sklearn")


### Run experiments...

In [None]:
# Run single experiment
model_type = "sklearn" # or "sklearn" or "xgboost"
                                            # data511 or datag511partfltr or datag511unfiltered
# data_and_name = datag511partfltr,data_g511_partfltr_name # data_g511_name or data_g511_partfltr_name
data_and_name = select_from_source(datag511partfltr_all,select_channels=[0,2,5,8,9,10]),name_g511_d([0,2,5,8,9,10]) # test: 1,3,7,11
# data_temp_dict = {data_and_name[0]:data_and_name[1]}

if model_type == "xgboost":
    ...
    # result_single_exp = run_experiment(
    #     data=data_and_name[0],  
    #     data_name=data_and_name[1],
    #     select_channels=[0],
    #     select_energies=(11050,11250),
    #     pca_components=None,
    #     model_type="xgboost",
    #     max_depth=50,
    #     n_estimators=3,
    #     max_leaves= 0,
    #     test_size=0.2,
    #     uniform_test_set=[5000,6000,7000,8000,9000,10000,11000,12000]#,8000,9000,10000,11000]
    # )
else:
    result_single_exp = run_experiment(
        data=data_and_name[0],
        data_name=data_and_name[1],
        select_channels=[],
        select_energies=(5000,15000),
        pca_components=None,
        model_type="sklearn",
        hidden_layers=[23],
        test_size=0.3,
        uniform_test_set=[5000,6000,7000,8000,9000,10000,11000,12000,13000,15000], #,8000,9000,10000,11000]
        dT_correcting=True
    )#._params
result_single_exp.show()

In [None]:
default_params= {"model_type": "SKlearn",
                 "select_channels": [0,1,3,5,7,9,10], # Test: 2, 8, 11
                 "max_iterations": 2_000,
                 "remove_nan_waveforms":True,
                #  "select_energies":(9000,13000), # DEFAULT for th06-th60: (11050,11250)
                 "include_energy": False,
                 "activation": 'relu',
                 "pca_method": PCA,
                 "uniform_test_set": [5000,6000,7000,8000,9000,10000,11000],
                 "test_size":0.3,
                 "which": "Tfit"}
source_dat = select_from_source(datag511partfltr_all2,select_channels=[0,1,3,5,7,9,10])
data_run = {name2_g511_d([0,1,3,5,7,9,10])+"+.95": source_dat.loc[source_dat["s199"] >= .95]}

# pca_components_list = [None] #[100,None] #[16,18,20,21,22,23,64]
# hidden_layers_list = [[100]] #[5,10,20,50]
# Final EXP NN: [[100,[23]],[128,[64]],[128,[128]],[None,[23]]]
pca_hidden_layers_list = [[None,[23]]] #[[4,[4]],[8,[8]],[12,[12]],[16,[16]],[20,[20]],[20,[16]],[24,[24]],[24,[16]],[28,[28]],[28,[16]],[32,[32]],[32,[16]],[48,[48]],[48,[16]],[64,[64]],[64,[16]],[128,[128]],[128,[16]],[None,[24]],[None,[16]]]
select_energies = [(5000,50000)] #[(9000,50000),(8000,50000)] #[(10000,50000),(5000,13000),()]
which = ["Tfit"]
exp_list = [[obj for obj in tup] for tup in itertools.product([k for k in data_run.keys()],pca_hidden_layers_list,select_energies,which)]
print(exp_list)
iterations = [5]*len(exp_list)
print(len(exp_list),"experiments,",sum(iterations), f"iterations (={sum(iterations)/len(exp_list)}*{len(pca_hidden_layers_list)}*{len(which)})")
results = {}
# break
mlflow.set_tracking_uri("http://127.0.0.1:5000") # 5000: local, 30000: external
iteration = 0
for exp,iters in zip(exp_list,iterations):
    params = default_params
    params |= {"pca_components": exp[1][0], "hidden_layers": exp[1][1], "select_energies": exp[2], "which":exp[3]}
    result = {str(i):None for i in range(iters)}
    # if [exp[1]] == exp[2] == 22:
    #     print(f"Got layers {[exp[1]]} equal to {exp[2]}, skipping experiment")
    #     continue
    for i in range(iters):
        iteration += 1
        print(f"[MLFlow run] Starting iteration {i+1}/{iters} ({iteration}/{sum(iterations)}) with params {params}...")
        if "raw" in exp[0]:
            result[str(i)] = run_experiment(data_run[exp[0]], exp[0], **params)._params
        else:
            result[str(i)] = run_experiment(data_run[exp[0]], exp[0], **params)._params
    results[str(exp)] = result

In [None]:
# get_waveforms(source_data=data, select_energies=range_E, include_energy=False)
# Best results:         abs                                 avg
# E5000-6000    13.358  8000-50000, 100-[23] @ 10.485       5000-13000, 100-[16] @ 10.71
# E6000-7000    11.841  9000-13000, 23-[100] @  9.227       9000-13000, 23-[100] @  9.61
# E7000-8000    10.374  9000-50000, 23-[23]  @  8.682       9000-50000, 23-[100] @  8.82
# E8000-9000    10.292  5000-13000, ALL-[100] @ 8.371       5000-13000, ALL-[100] @ 8.49
# E9000-10000    9.439  9000-13000, 23-[100] @  7.689       5000-13000, 100-[16] @  7.92
# E10000-11000  11.282  5000-13000, ALL-[100] @ 7.034       5000-13000, ALL-[100] @ 7.12
# E11000-12000   7.064  5000-13000, 100-[16] @  6.004       5000-13000, 100-[16] @  6.28

# Eleastsquares 2949:  8000-50000 100-[23]  @ 5.599, 2897:  5000-13000 ALL-[100] @ 5.608, 2851:  5000-13000 100-[16] @ 5.605
#                739:  8000-13000 16-4*40   @ 3.444,  748:  8000-13000 16-4*50   @ 3.475,  743:  8000-50000 16-4*50  @ 3.512
# Elinear       2897:  5000-13000 ALL-[100] @ 5.651, 2949:  8000-50000 100-[23]  @ 5.654, 2851:  5000-13000 100-[16] @ 5.662
#                748:  8000-13000 16-4*50   @ 4.367,  743:  8000-50000 16-4*50   @ 4.410,  739:  8000-13000 16-4*40  @ 4.447
# Try 2995       2949 & 2897
plotting.settings(show_dt=True,show_pred=True)
ignore_values = {"Energy range used": "() eV"}
ignore_values1 = {"tree depth": None}
energy = 5000
y = f"FWHM E{energy}-{energy+1000}"

def square_FWHM_metric(df: pd.DataFrame) -> pd.Series:
    ranges = {"5000-6000":13.3579,"6000-7000":11.841,"7000-8000":10.3735,"8000-9000":10.2916,"9000-10000":9.4387,"10000-11000":11.2823,"11000-12000":7.0638}
    return sum([(df[f"metrics.Uniform test FWHM E{e_range}"]/e_val)**2 for e_range,e_val in ranges.items()])

def linear_FWHM_metric(df: pd.DataFrame) -> pd.Series:
    ranges = {"5000-6000":13.3579,"6000-7000":11.841,"7000-8000":10.3735,"8000-9000":10.2916,"9000-10000":9.4387,"10000-11000":11.2823,"11000-12000":7.0638}
    return sum([df[f"metrics.Uniform test FWHM E{e_range}"]/e_val for e_range,e_val in ranges.items()])
linear_FWHM_metric.FWHM = square_FWHM_metric.FWHM = 7

y = square_FWHM_metric

# df_version_mapper = get_model_version_map([102816600889877627])
# display(df_version_mapper)
# display(get_model_version_map([918309536924112984]))
box_plot([918309536924112984], x="Energy range used", y=y, color="PCA components", ignore_vals=ignore_values, facet_row="Hidden layers", height=700, hover_name="model_version").show()
# box_plot([102816600889877627], x="estimators", y=y, color="PCA components", ignore_vals=ignore_values1, facet_row="tree depth", facet_col="Energy range used", hover_name="model_version", height=800).show()
# box_plot([941575026271596123], x="PCA components", y=y, color="Hidden layers", height=700, hover_name="model_version").show()
box_plot([824072748444866548], x="PCA components", y=y, color="Energy range used", facet_col="Tref", height=700, hover_name="model_version").show()
box_plot([824072748444866548], x="PCA components", y="FWHM Test", color="Energy range used", facet_col="Tref", height=700, hover_name="model_version").show()
waveforms5000_13000 = get_waveforms(source_data=data["partfilter_g511_all2_E5000-13000"])
waveforms5000_50000 = get_waveforms(source_data=data["partfilter_g511_all2_E5000-50000"])
energy_line_plot(name_g511_d(0), 2000, 10000, 250, 3158, data, PCA_fit=fitted_PCA(3158, waveforms5000_13000),hist_limit=100,y_sd="FWHM GoF",verbose=0).show()
energy_line_plot(name_g511_d(0), 2000, 10000, 250, 3139, data, PCA_fit=fitted_PCA(3139, waveforms5000_13000),hist_limit=100,y_sd="FWHM GoF",verbose=0).show()
energy_line_plot(name_g511_d(0), 2000, 10000, 250, 3077, data, PCA_fit=fitted_PCA(3077, waveforms5000_13000),hist_limit=100,y_sd="FWHM GoF",verbose=0).show()
energy_line_plot(name_g511_d(0), 2000, 10000, 250, 3066, data, PCA_fit=fitted_PCA(3066, waveforms5000_50000),hist_limit=100,y_sd="FWHM GoF",verbose=0,which="Tfit").show()
energy_line_plot(name_g511_d(1), 2000, 10000, 250, 3158, data, PCA_fit=fitted_PCA(3158, waveforms5000_13000),hist_limit=100,y_sd="FWHM GoF",verbose=0).show()
energy_line_plot(name_g511_d(1), 2000, 10000, 250, 3139, data, PCA_fit=fitted_PCA(3139, waveforms5000_13000),hist_limit=100,y_sd="FWHM GoF",verbose=0).show()
energy_line_plot(name_g511_d(1), 2000, 10000, 250, 3077, data, PCA_fit=fitted_PCA(3077, waveforms5000_13000),hist_limit=100,y_sd="FWHM GoF",verbose=0).show()
energy_line_plot(name_g511_d(1), 2000, 10000, 250, 3066, data, PCA_fit=fitted_PCA(3066, waveforms5000_50000),hist_limit=100,y_sd="FWHM GoF",verbose=0,which="Tfit").show()
# plot_predictions(data_g511_partfltr_name, (), 3032, data, "MLPRegressorModel", PCA_fit=fitted_PCA(3032, waveforms)).show()
# plot_predictions(data_g511_partfltr_name, (), 3028, data, "MLPRegressorModel", PCA_fit=fitted_PCA(3028, waveforms)).show()
# plot_predictions(data_g511_partfltr_name, (), 3000, data, "MLPRegressorModel", PCA_fit=fitted_PCA(3000, waveforms)).show()
# plot_predictions(data_g511_partfltr_name, (), 3033, data, "MLPRegressorModel").show()
# plot_predictions(data_g511_partfltr_name, (), 2949, data, "MLPRegressorModel", PCA_transform_on="partfilter_g511_E8000-50000").show()
# plot_predictions(data_g511_partfltr_name, (), 2897, data, "MLPRegressorModel", PCA_transform_on="partfilter_g511_E5000-13000").show()
# plot_predictions(data_g511_partfltr_name, (), 2851, data, "MLPRegressorModel", PCA_transform_on="partfilter_g511_E5000-13000").show()
# plot_predictions(data_g511_partfltr_name, (), 739, data, "XGBoostedTree", PCA_transform_on="partfilter_g511_E8000-13000").show()
# plot_predictions(data_g511_partfltr_name, (), 748, data, "XGBoostedTree", PCA_transform_on="partfilter_g511_E8000-13000").show()
# plot_predictions(data_g511_partfltr_name, (), 743, data, "XGBoostedTree", PCA_transform_on="partfilter_g511_E8000-50000").show()

In [None]:
for channel,model_v,fitted_pca,which in [(0,3058,fitted_PCA(3058, waveforms5000_13000),"T0"),
                                         (1,3058,fitted_PCA(3058, waveforms5000_13000),"T0"),
                                         (0,3066,fitted_PCA(3066, waveforms5000_50000),"Tfit"),
                                         (1,3066,fitted_PCA(3066, waveforms5000_50000),"Tfit")]:  # Fitting takes about 1.5 min, plotting is nearly instant
    plot_predictions(name_g511_d(channel),(2375,2625),model_v,data,"MLPRegressorModel",channel,PCA_fit=fitted_pca,which=which,title=f"Predictions of NN v{model_v} on Ch{channel}, @E 2500 (arb. units, binwidth 250)").show()
    plot_predictions(name_g511_d(channel),(4875,5125),model_v,data,"MLPRegressorModel",channel,PCA_fit=fitted_pca,which=which,title=f"Predictions of NN v{model_v} on Ch{channel}, @E 5000 (arb. units, binwidth 250)").show()
    plot_predictions(name_g511_d(channel),(8875,9125),model_v,data,"MLPRegressorModel",channel,PCA_fit=fitted_pca,which=which,title=f"Predictions of NN v{model_v} on Ch{channel}, @E 9000 (arb. units, binwidth 250)").show()

In [None]:
# Predicting on other detectors
detector = 1
data |= {name_g511_d(detector): data_dict[name_g511_d(detector)]}
plot_predictions(name_g511_d(detector), (), 2949, data, "MLPRegressorModel", PCA_fit="partfilter_g511_E8000-50000").show()
plot_predictions(name_g511_d(detector), (), 2897, data, "MLPRegressorModel", PCA_fit="partfilter_g511_E5000-13000").show()
plot_predictions(name_g511_d(detector), (), 2851, data, "MLPRegressorModel", PCA_fit="partfilter_g511_E5000-13000").show()
plot_predictions(name_g511_d(detector), (), 739, data, "XGBoostedTree", PCA_fit="partfilter_g511_E8000-13000").show()
plot_predictions(name_g511_d(detector), (), 748, data, "XGBoostedTree", PCA_fit="partfilter_g511_E8000-13000").show()
plot_predictions(name_g511_d(detector), (), 743, data, "XGBoostedTree", PCA_fit="partfilter_g511_E8000-50000").show()

In [None]:
# Predicting on other detectors
detector = 2
data |= {name_g511_d(detector): data_dict[name_g511_d(detector)]}
plot_predictions(name_g511_d(detector), (), 2949, data, "MLPRegressorModel", PCA_transform_on="partfilter_g511_E8000-50000", xaxis_range=[-30,120]).show()
plot_predictions(name_g511_d(detector), (), 2897, data, "MLPRegressorModel", PCA_transform_on="partfilter_g511_E5000-13000", xaxis_range=[-30,120]).show()
plot_predictions(name_g511_d(detector), (), 2851, data, "MLPRegressorModel", PCA_transform_on="partfilter_g511_E5000-13000", xaxis_range=[-30,120]).show()
plot_predictions(name_g511_d(detector), (), 739, data, "XGBoostedTree", PCA_transform_on="partfilter_g511_E8000-13000", xaxis_range=[-30,120]).show()
plot_predictions(name_g511_d(detector), (), 748, data, "XGBoostedTree", PCA_transform_on="partfilter_g511_E8000-13000", xaxis_range=[-30,120]).show()
plot_predictions(name_g511_d(detector), (), 743, data, "XGBoostedTree", PCA_transform_on="partfilter_g511_E8000-50000", xaxis_range=[-30,120]).show()

In [None]:
# Predicting on other detectors
detector = 3
data |= {name_g511_d(detector): data_dict[name_g511_d(detector)]}
plot_predictions(name_g511_d(detector), (), 2949, data, "MLPRegressorModel", PCA_transform_on="partfilter_g511_E8000-50000", xaxis_range=[-30,120]).show()
plot_predictions(name_g511_d(detector), (), 2897, data, "MLPRegressorModel", PCA_transform_on="partfilter_g511_E5000-13000", xaxis_range=[-30,120]).show()
plot_predictions(name_g511_d(detector), (), 2851, data, "MLPRegressorModel", PCA_transform_on="partfilter_g511_E5000-13000", xaxis_range=[-30,120]).show()
plot_predictions(name_g511_d(detector), (), 739, data, "XGBoostedTree", PCA_transform_on="partfilter_g511_E8000-13000", xaxis_range=[-30,120]).show()
plot_predictions(name_g511_d(detector), (), 748, data, "XGBoostedTree", PCA_transform_on="partfilter_g511_E8000-13000", xaxis_range=[-30,120]).show()
plot_predictions(name_g511_d(detector), (), 743, data, "XGBoostedTree", PCA_transform_on="partfilter_g511_E8000-50000", xaxis_range=[-30,120]).show()

In [None]:
# Check on part of set of detector 3
plot_predictions(name_g511_d(3), (5000,50000), 2949, data, "MLPRegressorModel", PCA_transform_on="partfilter_g511_E8000-50000", xaxis_range=[-30,120]).show()
plot_predictions(name_g511_d(3), (5000,50000), 2897, data, "MLPRegressorModel", PCA_transform_on="partfilter_g511_E5000-13000", xaxis_range=[-30,120]).show()
plot_predictions(name_g511_d(3), (5000,50000), 2851, data, "MLPRegressorModel", PCA_transform_on="partfilter_g511_E5000-13000", xaxis_range=[-30,120]).show()
plot_predictions(name_g511_d(3), (5000,50000), 739, data, "XGBoostedTree", PCA_transform_on="partfilter_g511_E8000-13000", xaxis_range=[-30,120]).show()
plot_predictions(name_g511_d(3), (5000,50000), 748, data, "XGBoostedTree", PCA_transform_on="partfilter_g511_E8000-13000", xaxis_range=[-30,120]).show()
plot_predictions(name_g511_d(3), (5000,50000), 743, data, "XGBoostedTree", PCA_transform_on="partfilter_g511_E8000-50000", xaxis_range=[-30,120]).show()

### Visualise experiment results

In [None]:
exp_sigma_lists = lambda dic: {k:[{d_k:pd.Series([d[str(i)][d_k]["sigma"] for i in range(len(d))])} for d_k in d["0"].keys() if not "data" in d_k] for k,d in dic.items()}
exp_results = lambda dic: {k:{"test" if "test" in d_k else "train": ls_d for d_k,ls_d in (ls[0]|ls[1]).items()} for k,ls in dic.items()}
exp_res = lambda dic: {key: {k[k.find(", ")+2: k.find("[",k.find(", "))-2]:v for k,v in dic.items() if key in k} for key in {ky[ky.rfind("["):ky.find("]")+1] for ky in dic.keys()}}
series_ls = lambda dic: [pd.Series(s, name=n) for n,s in {k+"_"+i+"_"+j: v[i][j] for k,v in dic.items() for j in ["test","train"] for i in v.keys()}.items()]
# df_results = pd.DataFrame(series_ls(exp_res(exp_results(exp_sigma_lists(results)))))
# df_results.index = pd.Index(["["+i[i.rfind("'")+3:i.rfind("]")+1]+i[i.rfind("_"):] for i in df_results.index])
# display(df_results)
# print(df_results.index)
# df_results.to_csv("data/results/NN_TruncSVC_testing_gBOTHpartfltr_4000-11250.csv")
mult = 2*np.sqrt(2*np.log(2))
# break
df = pd.read_csv("data/results/XGBoost_g511unfiltered_10350-14000.csv").set_index("experiment")
df_new = pd.DataFrame(data=[(i[1:i.find(",")],i[i.find(",")+1:i.rfind(",")], i[i.rfind(",")+1:i.find("]")], "test" if "test" in i else "train", val*mult) for i,s in df.iterrows() for val in s.values if not pd.isna(val)],
                      columns=["PCA components","# estimators","Max tree depth","set","FWHM"])
# df_new
px.box(df_new, x="PCA components", y="FWHM", color="# estimators",facet_col="set",
       hover_data=["set","# estimators"], points="all" # add day column to hover data
       ).update_traces(boxmean=True).update_layout(title="Box plots of XGBoost hyperparameter optimization (g511 & g1274 unfiltered)").show()

df = pd.read_csv("data/results/XGBoost2_g511unfiltered_10350-14000.csv").set_index("experiment")
df_new = pd.DataFrame(data=[(i[1:i.find(",")],i[i.find(",")+1:i.rfind(",")], i[i.rfind(",")+1:i.find("]")], "test" if "test" in i else "train", val*mult) for i,s in df.iterrows() for val in s.values if not pd.isna(val)],
                      columns=["PCA components","# estimators","Max tree depth","set","FWHM"])
px.box(df_new, x="Max tree depth", y="FWHM", color="# estimators",facet_col="set", facet_row="PCA components",
       hover_data=["set","# estimators"], points="all" # add day column to hover data
       ).update_traces(boxmean=True).update_layout(title="Box plots of XGBoost hyperparameter optimization (g511unfiltered, 10350<E<14000)", height=750).show()

df = pd.read_csv("data/results/PCA_testing_g1274_4000-5000.csv").set_index("experiment")
df_new = pd.DataFrame(data=[(i[:i.find("_")], i[i.find("_")+1:i.rfind("_")], "test" if "test" in i else "train", val*mult) for i,s in df.iterrows() for val in s.values if not pd.isna(val)],
                      columns=["Hidden layers","PCA components","set","FWHM"])
px.box(df_new[df_new["set"] == "test"], x="PCA components", y="FWHM", color="Hidden layers", #[df_new["set"] == "test"]
       hover_data=["set"], points="all" # add day column to hover data
       ).update_traces(boxmean=True).update_layout(title="Box plots of PCA analysis (g1274, 4000<E<5000)").show()

df = pd.read_csv("data/results/PCA_testing_g511_10350-14000.csv").set_index("experiment")
df_new = pd.DataFrame(data=[(i[:i.find("_")], i[i.find("_")+1:i.rfind("_")], "test" if "test" in i else "train", val*mult) for i,s in df.iterrows() for val in s.values if not pd.isna(val)],
                      columns=["Hidden layers","PCA components","set","FWHM"])
px.box(df_new[df_new["set"] == "test"], x="PCA components", y="FWHM", color="Hidden layers",
       hover_data=["set"], points="all" # add day column to hover data
       ).update_traces(boxmean=True).update_layout(title="Box plots of PCA analysis (g511, 10350<E<14000)").show()

df = pd.read_csv("data/results/PCA_testing_g511_11050-11250.csv").set_index("experiment")
df_new = pd.DataFrame(data=[(i[:i.find("_")], i[i.find("_")+1:i.rfind("_")], "test" if "test" in i else "train", val*mult) for i,s in df.iterrows() for val in s.values if not pd.isna(val)],
                      columns=["Hidden layers","PCA components","set","FWHM"])
px.box(df_new[df_new["set"] == "test"], x="PCA components", y="FWHM", color="Hidden layers", #[df_new["set"] == "test"]
       hover_data=["set"], points="all" # add day column to hover data
       ).update_traces(boxmean=True).update_layout(title="Box plots of PCA analysis (g511, 11050<E<11250)").show()

df = pd.read_csv("data/results/PCA_testing_g511unfiltered_11050-11250.csv").set_index("experiment")
df_new = pd.DataFrame(data=[(i[:i.find("_")], i[i.find("_")+1:i.rfind("_")], "test" if "test" in i else "train", val*mult) for i,s in df.iterrows() for val in s.values if not pd.isna(val)],
                      columns=["Hidden layers","PCA components","set","FWHM"])
px.box(df_new[df_new["set"] == "test"], x="PCA components", y="FWHM", color="Hidden layers", #[df_new["set"] == "test"]
       hover_data=["set"], points="all" # add day column to hover data
       ).update_traces(boxmean=True).update_layout(title="Box plots of PCA analysis (g511 unfiltered, 11050<E<11250)").show()


df = pd.read_csv("data/results/PCA_testing_2-128_th60.csv").set_index("experiment")
df_new = pd.DataFrame(data=[(i[:i.find("_")], i[i.find("_")+1:i.rfind("_")], "test" if "test" in i else "train", val*mult) for i,s in df.iterrows() for val in s.values if not pd.isna(val)],
                      columns=["Hidden layers","PCA components","set","FWHM"])
px.box(df_new[df_new["set"] == "test"], x="PCA components", y="FWHM", color="Hidden layers",
       hover_data=["set"], points="all" # add day column to hover data
       ).update_traces(boxmean=True).update_layout(title="Box plots of PCA analysis (th60)").show()


df = pd.read_csv("data/results/PCA_testing_gBOTH_4000-11250.csv").set_index("experiment")
df_new = pd.DataFrame(data=[(i[:i.find("_")], i[i.find("_")+1:i.rfind("_")], "test" if "test" in i else "train", val*mult) for i,s in df.iterrows() for val in s.values if not pd.isna(val)],
                      columns=["Hidden layers","PCA components","set","sigma"])
px.box(df_new[df_new["set"] == "test"], x="PCA components", y="sigma", color="Hidden layers",
       hover_data=["set"], points="all" # add day column to hover data
       ).update_traces(boxmean=True).update_layout(title="Box plots of PCA analysis g511 & g1274 COMBINED (test)", yaxis_title="FWHM").show()
px.box(df_new[df_new["set"] == "train"], x="PCA components", y="sigma", color="Hidden layers",
       hover_data=["set"], points="all" # add day column to hover data
       ).update_traces(boxmean=True).update_layout(title="Box plots of PCA analysis g511 & g1274 COMBINED (train)", yaxis_title="FWHM").show()

In [None]:
all_runs = mlflow.search_runs(experiment_ids=["610951557482905968"],search_all_experiments=True)
all_runs.columns

In [None]:
ignore_values = {} #{"Energy range used": "() eV"}

# FROM https://jse.amstat.org/v14n3/langford.html: "CDF Method 4 includes the middle measurement in the case of n= 4k + 1 and excludes it in the case of n= 4k + 3"

# # ??? / Sklearn NN, Na22 th.60 Ch[] / Energy included for training / 7 pts/box
# box = box_plot([713354320025437357], x="Hidden layers", y="FWHM Test", units=("","ns"), color="PCA components", ignore_vals=ignore_values, height=400, width=700, title=" ") # hover_name="model_version"
# box = box.update_layout(xaxis_range=[-.4,0.4],yaxis_range=[4,8],margin=dict(l=20, r=20, t=20, b=20))
# box.show()  # original FWHM 32.1653
# box.write_image("0_PCAHiddenLayerTesting_th60_exploration.pdf")

# # Hidden layers testing / Sklearn NN, Na22 th.06 Ch[0, 69] / Energy included for training / 7 pts/box
# box = box_plot([955484227220031017], x="Hidden layers", y="FWHM Test", units=("","ns"), color="PCA components", ignore_vals=ignore_values, height=400, width=700, title=" ")
# box = box.update_layout(xaxis_range=[-.4,4.4],margin=dict(l=20, r=20, t=20, b=20))
# box.show()  # original FWHM 8.0392
# box.write_image("1_PCAHiddenLayerTesting_th06_layers.pdf")

# # Layer size testing / Sklearn NN, Na22 th.06 Ch[0, 420] / Energy included for training / 7 pts/box
# box = box_plot([782813455858471214], x="Hidden layers", y="FWHM Test", units=("","ns"), color="PCA components", ignore_vals=ignore_values, height=400, width=700, title=" ")
# box = box.update_layout(xaxis_range=[-.4,5.4],margin=dict(l=20, r=20, t=20, b=20))
# box.show()  # original FWHM 8.0392
# box.write_image("2_PCAHiddenLayerTesting_th06_PandA.pdf")

# PCA testing? / Sklearn NN, Na22 th.06 Ch[0, 360] / Energy included for training / 7 pts/box
box = box_plot([610951557482905968], x="Hidden layers", y="FWHM Test", units=("","ns"), color="PCA components", ignore_vals=ignore_values, facet_row="Alpha", height=400, width=700, title=" ")
box = box.update_layout(xaxis_range=[-.4,5.4],yaxis_range=[4.7,5.9],margin=dict(l=20, r=20, t=20, b=20))
box.show()  # original FWHM 8.0392
box.write_image("3_PCAHiddenLayerTesting_th06_alpha.pdf")

# Hidden layer size == PCA analysis / Sklearn NN, Na22 th.60 Ch[0, 60] / Energy included for training / 7 pts/box #, ignore_vals={}
ignore_values = {("Hidden layers","PCA components"):[["[16]"],[f"{x}" for x in [2,4,6,8,10,12,14,18,20,21,22,23,24,28,32,36,40,44,48,56,64]]]}
box = box_plot([568754552756492242], x="PCA components", y="FWHM Test", units=("","ns"), load_cols="Hidden layers", ignore_vals=ignore_values, height=400, width=700, show_original_FWHM=8.0392, title=" ")
box = box.update_layout(margin=dict(l=20, r=20, t=20, b=20))
box.show()
box.write_image("4_PCAHiddenLayerTesting_th60_PandAself.pdf")

# Hidden layer == 16 PCA analysis / Sklearn NN, Na22 th.60 Ch[0, 60] / Energy included for training / 7 pts/box
ignore_values = {"Hidden layers":[f"[{x}]" for x in [2,4,6,8,10,12,14,18,20,21,22,23,24,28,32,36,40,44,48,56,64]]}
box = box_plot([568754552756492242], x="PCA components", y="FWHM Test", units=("","ns"), color="Hidden layers", ignore_vals=ignore_values, height=400, width=700, show_original_FWHM=8.0392, showlegend=False, title=" ")
box = box.update_layout(margin=dict(l=20, r=20, t=20, b=20))
box.show()
box.write_image("5_PCAHiddenLayerTesting_th60_PandA16.pdf")

In [None]:
print(len(select_from_source(data[name_g511_d(2)],select_energies=(converter[2][0],70000))))

In [None]:
fitted_pca: TruncatedSVD = fitted_PCA(537, get_waveforms(source_data=data_dict[data_g511_partfltr_name_all],select_energies=(10300,12000),select_channels=0), "XGBoostedTree")
fig_tree_predict0 = plot_predictions(name_g511_d(0),(converter[0][0],70000), 537, data, "XGBoostedTree", 0, PCA_fit=fitted_pca, mode="Line", bins=[-60,70,2]) # expected: 8.276, bins=[-50,100,.5]
fig_tree_predict1 = plot_predictions(name_g511_d(1),(converter[1][0],70000), 537, data, "XGBoostedTree", 1, PCA_fit=fitted_pca, mode="Line", bins=[-60,70,2]) # expected: 8.276, bins=[-50,100,.5]
fig_tree_predict2 = plot_predictions(name_g511_d(2),(converter[2][0],70000), 537, data, "XGBoostedTree", 2, PCA_fit=fitted_pca, mode="Line", bins=[-60,70,2]) # expected: 8.276, bins=[-50,100,.5]
fig_tree_predict0 = fig_tree_predict0.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=300, width=1100, title="", xaxis_title="time [ns]", yaxis_title="Prevalence") #, showlegend=False
fig_tree_predict1 = fig_tree_predict1.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=300, width=1100, title="", xaxis_title="time [ns]", yaxis_title="Prevalence")
fig_tree_predict2 = fig_tree_predict2.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=300, width=1100, title="", xaxis_title="time [ns]", yaxis_title="Prevalence")
fig_tree_predict0.show()
fig_tree_predict1.show()
fig_tree_predict2.show()
fig_tree_predict0.write_image("12_tree_gaussians_v537_ch0.pdf")
fig_tree_predict1.write_image("13_tree_gaussians_v537_ch1.pdf")
fig_tree_predict2.write_image("14_tree_gaussians_v537_ch2.pdf")

In [None]:
# ??? analysis / Sklearn NN, Na22 th.60 Ch[0, 60] / Energy included for training / 7 pts/box
# ignore_values = {"Hidden layers":[f"[{x}]" for x in [2,4,6,8,10,12,14,18,20,21,22,23,24,28,32,36,40,44,48,56,64]]}
# g511[0-10]: 701038535816704618, testing basic PCA & hidden layer setups
# g511[0-20]: 913292701605665259, testing basic PCA & hidden layer setups
# g1274[0-20]: 522945764508646794, testing basic PCA & hidden layer setups -> 23 is best
# g511[0-13]: 855799201150735971, testing basic PCA & hidden layer setups -> 22 is best
# 460998677625537521 - small test, 22 best
# 167269493197702931 - reasonably larger test, 23 best
# 915080864014606603 - unfiltered data set, ???
# 7: 616258580092295977 - XGBTree, Na22 g511 Ch0: XGBT models, g511 unfiltered 10.35k-14k (len 22220, te/tr: 50/50)
# 8: 737333999357308260 - XGBTree, Na22 Ch[0, 69]: XGBT models, unfiltered combined raw-5k_11.05k-11.25k, (10.35k-14k, 10.3k-12k) (len 41039, te/tr: 50/50)
# 9: 827539381019731967 - XGBTree, Na22 Ch0: XGBT models - normalized_partfiltered_data_bothGates_4-5k_11.05k-11.25k (len 41039, te/tr: 50/50)
#10: 102816600889877627 - XGBTree, Na22 Ch[0, 23]: XGBT models, dT correcting instead of Tfit
#11: 941575026271596123 - SKLearnNN, Na22 Ch[0, 2, 5, 8, 9, 10]: 23,100,None PCA; [23],[16],[100]?; (5000,13000),(5000,8000),(...)
#12: 824072748444866548 - SKLearnNN, Na22 Ch[0, 2, 5, 7, 9, 10]: 23,100,None PCA; [23],[16],[100]?; (5000,13000),(5000,8000),(...)

DEFAULT_FWHM = 8.0974

def square_FWHM_metric(df: pd.DataFrame) -> pd.Series:
    ranges = {"5000-6000":13.3579,"6000-7000":11.841,"7000-8000":10.3735,"8000-9000":10.2916,"9000-10000":9.4387,"10000-11000":11.2823,"11000-12000":7.0638}
    return sum([(df[f"metrics.Uniform test FWHM E{e_range}"]/e_val)**2 for e_range,e_val in ranges.items()]) / len(ranges) * DEFAULT_FWHM

def linear_FWHM_metric(df: pd.DataFrame) -> pd.Series:
    ranges = {"5000-6000":13.3579,"6000-7000":11.841,"7000-8000":10.3735,"8000-9000":10.2916,"9000-10000":9.4387,"10000-11000":11.2823,"11000-12000":7.0638}
    return sum([df[f"metrics.Uniform test FWHM E{e_range}"]/e_val for e_range,e_val in ranges.items()]) / len(ranges) * DEFAULT_FWHM
linear_FWHM_metric.FWHM = square_FWHM_metric.FWHM = DEFAULT_FWHM

# metric_name = "custom_metric_name"

# ignore_vals = {"Number of estimators":["5","10","20","50"]}
# box = box_plot([616258580092295977], x="Number of estimators", y="FWHM Test", units=("","ns"), color="Max tree depth", facet_row="PCA comp", ignore_vals=ignore_vals, title=" ") #, show_original_FWHM=8.0392
# box = box.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=400, width=700) #, showlegend=False
# box.show()
# box.write_image("7a_TreeTesting_g511_leavesdepth.pdf")

# ignore_vals = {"Number of estimators":["1","2","3"]}
# box = box_plot([616258580092295977], x="Number of estimators", y="FWHM Test", units=("","ns"), color="Max tree depth", facet_row="PCA comp", ignore_vals=ignore_vals, title=" ") #, show_original_FWHM=8.0392
# box = box.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=400, width=700) #, showlegend=False
# box.show()
# box.write_image("7b_TreeTesting_g511_leavesdepth.pdf")

# box = box_plot([737333999357308260], x="PCA components", y="FWHM Test", units=("","ns"), color="Number of estimators", facet_row="Max tree depth", ignore_vals={}, title=" ") #, show_original_FWHM=8.0392
# box = box.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=400, width=700) #, showlegend=False
# box.show() # Unfiltered combined raw
# box.write_image("8_TreeTesting_gBOTH_leavesdepth.pdf")

box = box_plot([827539381019731967], x="PCA components", y="FWHM Test", units=("","ns"), color="Number of estimators", facet_row="Max tree depth", ignore_vals={}, title=" ") #, show_original_FWHM=8.0392
box = box.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=400, width=700) #, showlegend=False
box.show()
box.write_image("9_TreeTesting_gBOTH_leavesdepth.pdf")

#DEFAULT FWHM for below: 8.0974
ignore_vals = {"Energy range":[None]}#,"Max tree depth":[None]}
box = box_plot([102816600889877627], x="PCA components", y="FWHM Test", units=("","ns"), color="Number of estimators", facet_row="Max tree depth", facet_col="Energy range", ignore_vals=ignore_vals, title=" ",hover_name="model_version") #, show_original_FWHM=8.0392
box = box.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=600, width=1000) #, showlegend=False
box.update_layout(height=600).show() # dT cprrecting instead of Tfit
box.write_image("10a_TreeTesting_g511_leavesdepth.pdf")

box = box_plot([102816600889877627], x="PCA components", y=square_FWHM_metric, units=("","ns"), color="Number of estimators", facet_row="Max tree depth", facet_col="Energy range", ignore_vals=ignore_vals, title=" ",hover_name="model_version", custom_metric_name="Rel. Sq. FWHM Test") #, show_original_FWHM=8.0392
box = box.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=600, width=1000) #, showlegend=False
box.update_layout(height=600,yaxis_range=[2.9,8.3]).show()  # dT cprrecting instead of Tfit
box.write_image("10b_TreeTesting_g511_leavesdepth.pdf")

print(ModelInfo.from_database("XGBoostedTree", 733))
# fitted_pca = fitted_PCA(733, get_waveforms(source_data=data_dict[data_g511_partfltr_name_all],select_energies=(8000,50000)), "XGBoostedTree")
# plot_predictions(data_g511_partfltr_name_all, (4000,5000), 733, data_dict, "XGBoostedTree", PCA_fit=fitted_pca, bins=[-50,100,.5]).show()
# plot_predictions(data_g511_partfltr_name_all, (6000,7000), 733, data_dict, "XGBoostedTree", PCA_fit=fitted_pca, bins=[-50,100,.5]).show()
# plot_predictions(data_g511_partfltr_name_all, (10000,11000), 733, data_dict, "XGBoostedTree", PCA_fit=fitted_pca, bins=[-50,100,.5]).show()


In [None]:
DEFAULT_FWHM = 16.9744

def square_FWHM_metric(df: pd.DataFrame) -> pd.Series:
    ranges = {"5000-6000":19.7501,"6000-7000":18.9533,"7000-8000":18.8486,"8000-9000":18.4477,"9000-10000":14.6593,"10000-11000":21.4188,"11000-12000":10.6011}
    return sum([(df[f"metrics.Uniform test FWHM E{e_range}"]/e_val)**2 for e_range,e_val in ranges.items()]) / len(ranges) * DEFAULT_FWHM
square_FWHM_metric.FWHM = DEFAULT_FWHM
# Results [0,2,5,7,9,10] - Tfit vs T0 parameter search
# box = box_plot([824072748444866548], x="PCA components", y="FWHM Test", units=("","ns"), color="Tref", facet_row="Energy range", hover_name="model_version", ignore_vals={})#, title=" ") #, show_original_FWHM=8.0392
# box = box.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=800, width=1000) #, showlegend=False
# box.show()
# box.write_image("11_TreeTesting_gBOTH_leavesdepth.pdf")

# box = box_plot([824072748444866548], x="PCA components", y=square_FWHM_metric, color="Tref", facet_row="Energy range", hover_name="model_version", ignore_vals={})#, title=" ") #, show_original_FWHM=8.0392
# box = box.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=800, width=1000) #, showlegend=False
# box.show()
# box.write_image("11_TreeTesting_gBOTH_leavesdepth.pdf")

# Results [0,2] for hyperparameter search, 10x repeated, from model 2v148 onwards reliable square metric
# box = box_plot([323381473077277848], x="PCA components", y="FWHM Test", units=("","ns"), color="Hidden layers", facet_row="Energy range", hover_name="model_version", ignore_vals={"Hidden layers":"[16]","PCA components":"None"})#, title=" ") #, show_original_FWHM=8.0392
# box = box.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=400, width=500) #, showlegend=False
# box.show()
# ignore_values = {("Hidden layers","PCA components"):[["[16]"],[str(v) for v in [4,8,12,24,28,32,48,64,128]]]}
# box = box_plot([323381473077277848], x="PCA components", y="FWHM Test", units=("","ns"), color="Hidden layers", facet_row="Energy range", hover_name="model_version", ignore_vals=ignore_values)#, title=" ") #, show_original_FWHM=8.0392
# box = box.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=400, width=500) #, showlegend=False
# box.show()

# box.write_image("11_TreeTesting_gBOTH_leavesdepth.pdf")
# SQUARE METRIC IS BROKEN FOR OLDER TESTS, DO NOT USE:
box = box_plot([323381473077277848], x="PCA components", y="FWHM Test", units=("","ns"), color="Hidden layers", facet_row="Energy range", hover_name="model_version", ignore_vals={})#, title=" ") #, show_original_FWHM=8.0392
box = box.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=800, width=1000) #, showlegend=False
box.show()
sort_by = [] #"PCA components"
box = box_plot([323381473077277848], x="PCA components", y=square_FWHM_metric, units=("","ns"), color="Hidden layers", facet_row="Energy range", hover_name="model_version", ignore_vals={}, sort_by=sort_by)#, title=" ") #, show_original_FWHM=8.0392
box = box.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=800, width=1000) #, showlegend=False
box.show()
# box.write_image("11_TreeTesting_gBOTH_leavesdepth.pdf")

In [None]:
sort_by = ["Hidden layers","PCA components"]
box = box_plot([492805174353787986], x="PCA components", y="FWHM Test", units=("","ns"), color="Hidden layers", hover_name="model_version", ignore_vals={},sort_by=sort_by)#, title=" ") #, show_original_FWHM=8.0392
box = box.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=400, width=700, title="") #, showlegend=False
box.show()
box.write_image("19_FinalTestNN_good_boxes.pdf")

In [None]:
# Waveform fitting for last experiment
data = {#data_g511_partfltr_name: datag511partfltr_all,select_channels=0,
        "partfilter_g511_E8000-50000":select_from_source(datag511partfltr_all,select_channels=0,select_energies=(8000,50000)),
        "partfilter_g511_E5000-13000":select_from_source(datag511partfltr_all,select_channels=0,select_energies=(5000,13000)),
        "partfilter_g511_all_E5000-13000":select_from_source(datag511partfltr_all,select_channels=[0,2,5,8,9,10],select_energies=(5000,13000)),
        "partfilter_g511_all2_E5000-13000":select_from_source(datag511partfltr_all,select_channels=[0,2,5,7,9,10],select_energies=(5000,13000)),
        "partfilter_g511_all2_E5000-50000":select_from_source(datag511partfltr_all,select_channels=[0,2,5,7,9,10],select_energies=(5000,50000))} | {
        name_g511_d(detector): data_dict[name_g511_d(detector)] for detector in [0,1,2,3,4,5,6,7,8,9,10,11]} | {
        name2_g511_d(detector): data_dict[name2_g511_d(detector)] for detector in [0,1,2,3,4,5,6,7,8,9,10,11]}
waveforms5000_13000 = get_waveforms(source_data=data["partfilter_g511_E5000-13000"])
waveforms8000_50000 = get_waveforms(source_data=data["partfilter_g511_E8000-50000"])
# waveforms5000_50000 = get_waveforms(source_data=data["partfilter_g511_all2_E5000-50000"])
fitted_pca = {model_v: fitted_PCA(model_v, fitforms, "MLPRegressorModel") for model_v,fitforms in {2949:waveforms8000_50000, 2897:waveforms5000_13000}.items()}

source_dat = select_from_source(datag511partfltr_all,select_channels=[0,2])
waveforms_v2 = get_waveforms(source_data=source_dat.loc[source_dat["s199"] >= .95], select_energies=(5000,50000))
source_dat_large = select_from_source(datag511partfltr_all2,select_channels=[0,1,3,5,7,9,10])
waveforms_v2_large = get_waveforms(source_data=source_dat_large.loc[source_dat_large["s199"] >= .95], select_energies=(5000,50000))
fitted_pca |= {model_v: fitted_PCA(model_v, waveforms_v2, "MLPRegressorModel2") for model_v in [191,198,179,166]}
fitted_pca |= {model_v: fitted_PCA(model_v, waveforms_v2_large, "MLPRegressorModel2") for model_v in [209,210,213,219]}
fitted_pca |= {model_v: None for model_v in [230,231]}

model_name = lambda integer: "MLPRegressorModel2" if integer < 1500 else "MLPRegressorModel"
print(model_name(213), model_name(2949))
# model_name = {166: "MLPRegressorModel2", 179: "MLPRegressorModel2", 191: "MLPRegressorModel2", 198: "MLPRegressorModel2", 2897: "MLPRegressorModel", 2949: "MLPRegressorModel"}
# Eleastsquares 2949:  8000-50000 100-[23]  @ 5.599, 2897:  5000-13000 ALL-[100] @ 5.608, 2851:  5000-13000 100-[16] @ 5.605
#                739:  8000-13000 16-4*40   @ 3.444,  748:  8000-13000 16-4*50   @ 3.475,  743:  8000-50000 16-4*50  @ 3.512
# Elinear       2897:  5000-13000 ALL-[100] @ 5.651, 2949:  8000-50000 100-[23]  @ 5.654, 2851:  5000-13000 100-[16] @ 5.662
#                748:  8000-13000 16-4*50   @ 4.367,  743:  8000-50000 16-4*50   @ 4.410,  739:  8000-13000 16-4*40  @ 4.447
# Try 2995 also (everything from 2997 and up is BAD: dt correcting), try 2935 (FWHM 7.02) but 2949 (FWHM 6.96)

e_corrections = {0: calc_ab(4477,11197),
                 1: calc_ab(4623,11538),
                 2: calc_ab(4212,10512),
                 3: calc_ab(4672,11662),
                 4: (1,0),  # LaBr channel
                 5: calc_ab(1582,3948),
                 6: (1,0),  # LaBr channel
                 7: calc_ab(4747,11866),
                 8: calc_ab(4303,10727),
                 9: calc_ab(4750,11861),
                 10:calc_ab(4113,10268),
                 11: calc_ab(4474,11157)}

In [None]:
corr_tup = calc_ab(4477,11197)
print(corr_tup[0] * 5000 + corr_tup[1],corr_tup[0] * 50000 + corr_tup[1])
corr_tup = calc_ab(4212,10512)
print(corr_tup[0] * 5000 + corr_tup[1],corr_tup[0] * 50000 + corr_tup[1])
corr_tup = calc_ab(1582,3948)
print(corr_tup[0] * 5000 + corr_tup[1],corr_tup[0] * 50000 + corr_tup[1])
print(corr_tup[0] * 1582 + corr_tup[1],corr_tup[0] * 3948 + corr_tup[1])

In [None]:
# Plot predictions for last experiment
# break
# for channel,model_v,which in [(0,198)]:  # Fitting takes about 1.5 min, plotting is nearly instant
#     kwargs = {"PCA_fit":fitted_pca[model_v],"which":"Tfit","bins":[-40,80,1],"xaxis_title":"time [ns]","mode":"Line","shift":0,"show_dt":False,
#               "title":f"Predictions of NN2 v{model_v} on Ch{channel}, @E 2500 (arb. units, binwidth 250)"}
#     plot_predictions(name_g511_d(channel),(2375,2625),model_v,data,"MLPRegressorModel2",channel,**kwargs).show()
#     plot_predictions(name_g511_d(channel),(4875,5125),model_v,data,"MLPRegressorModel2",channel,**kwargs).show()
#     plot_predictions(name_g511_d(channel),(8875,9125),model_v,data,"MLPRegressorModel2",channel,**kwargs).show()
# Try: 210, 213, 230, 231
# energy_line_plot(name2_g511_d(2), 100, 1300, 50, 230, data, "MLPRegressorModel2", PCA_fit=None, correct_energy=e_corrections[2],hist_limit=700,verbose=0,y_sd=None,line_shape="hvh")

In [None]:
# energy_histogram(name_g511_d(7), data, bins=[0,14_000,10])
plotting.settings(show_dt=False,show_pred=False,default_plot_mode="Line")
trace_names = ["191 TCh(0,2)", "198 TCh(0,2)", "210 TCh(0-11)", "213 TCh(0-11)", "230 TCh(0-11)", "231 TCh(0-11)", "2897 TCh(0)", "2949 TCh(0)"]
trace_names = ["209 TCh(0-11)"]
channel_dict = {}
for ch in [0,1,2,3,4,5,7,8,9,10,11]: # 0,1,2,3,4,5,7,8,9,10,11
    fig_elines = []
    for model_v in [219]: #191, 198, 2897, 2949
        fig_eline = energy_line_plot(name2_g511_d(ch), 100, 1300, 50, model_v, data, model_name(model_v), PCA_fit=fitted_pca[model_v], correct_energy=e_corrections[ch],hist_limit=700,verbose=0,y_sd=None,line_shape="hvh")
        fig_elines.append(fig_eline)
    fig_ehist = energy_histogram(name2_g511_d(ch), data, select_energies=(0,1300), bins=[0,1400,2], correct_energy=e_corrections[ch], xaxis_title="Energy [keV]",
                                 title=f"Predictions of NNs on channel {ch} (trained on channel TCh)", colors=["rgba(0,0,0,0.5)"])
    combined_eline = combine_line_plots(fig_elines[0], fig_elines[1:], trace_names=trace_names, equality_tolerance=2e-3)
    channel_dict[ch] = add_energy_histogram(combined_eline, fig_ehist)

In [None]:
channel_dict[2].show()
# channel_dict[4].show()
channel_dict[8].show()
channel_dict[11].show()

In [None]:
plotting.settings(show_dt=False,show_pred=False,default_plot_mode="Line")
channels = [0,1,2,3,5,7,8,9,10,11] # Trained on 0, 1, 3, 5, 7, 9, 10, test on 2, 8, 11
on_data = {ch:name2_g511_d(ch) for ch in channels}
test_models = [191, 198, 209, 210, 213, 219, 230, 231, 2949]
model_names = {m:model_name(m) for m in test_models}
trace_names = ["191", "198", "209", "210", "213", "219", "230", "231", "2949"]

fig_combined_all = combined_channel_line_plot(on_data, test_models, model_names, channels, fitted_pca, data, e_corrections, trace_names, equality_tolerance=1e-2, line_shape="hvh")
fig_combined_test = combined_channel_line_plot(on_data, test_models, model_names, [2,8,11], fitted_pca, data, e_corrections, trace_names, equality_tolerance=1e-2, line_shape="hvh")
fig_combined_all = fig_combined_all.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=400,width=1000, title="") #, showlegend=False #height=400, width=700
fig_combined_test = fig_combined_test.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=400,width=1000, title="") #, showlegend=False #height=400, width=700

change_combined_line_fig(fig_combined_all, remove_traces=[1,2,4,8,9], hide_traces=[3,5,6,7]).show()
change_combined_line_fig(fig_combined_test, remove_traces=[1,2,4,8,9], hide_traces=[3,5,6,7]).show()
fig_combined_all.write_image("20_FinalTestNN_hist_all.pdf")
fig_combined_test.write_image("21_FinalTestNN_hist_test.pdf")
# fig_combined_all._dfs[0]

In [None]:
convert_range = lambda s,e,ch : (s*e_corrections[ch][0]+e_corrections[ch][1],e*e_corrections[ch][0]+e_corrections[ch][1])
print(e_corrections[0], convert_range(4160,4601,0))
print(e_corrections[1], convert_range(4297,4750,1))
print(e_corrections[2], convert_range(3915,4328,2))
print(e_corrections[5], convert_range(1470.4,1625.5,5))
print(e_corrections[7], convert_range(4411,4878,7))
print(e_corrections[8], convert_range(4000,4421,8))
print(e_corrections[11], convert_range(4159,4597,11))
print(e_corrections[0], convert_range(8343,9224,0))
print(e_corrections[1], convert_range(8602,9508,1))
print(e_corrections[2], convert_range(7837,8663,2))
print(e_corrections[5], convert_range(2943.5,3253.3,5))
print(e_corrections[7], convert_range(8843,9776,7))
print(e_corrections[8], convert_range(7999,8841,8))
print(e_corrections[11], convert_range(8320,9195,11))
# print(convert_range(9195,70000,0))

converter = {0:(4160,4601), 1:(4297,4750), 2:(3915,4328), 5:(1470.4,1625.5), 7:(4411,4878), 8:(4000,4421),11:(4159,4597)}
converter2 = {0:(8343,9224), 1:(8602,9508), 2:(7837,8663), 5:(2943.5,3253.3), 7:(8843,9776), 8:(7999,8841),11:(8320,9195)}
# e_corrections = {0: calc_ab(4477,11197),
#                  1: calc_ab(4623,11538),
#                  2: calc_ab(4212,10512),
#                  3: calc_ab(4672,11662),
#                  4: (1,0),  # LaBr channel
#                  5: calc_ab(1582,3948),
#                  6: (1,0),  # LaBr channel
#                  7: calc_ab(4747,11866),
#                  8: calc_ab(4303,10727),
#                  9: calc_ab(4750,11861),
#                  10:calc_ab(4113,10268),
#                  11: calc_ab(4474,11157)}
plotting.settings(show_dt=False,show_pred=False,default_plot_mode="Line")

for i,ch in enumerate([0,1,2,5,7,11]):
    bs = -30
    if ch == 11:
        bs = -70
    print("Plots for channel",ch,":")
    fig_475_525 = plot_predictions(name_g511_d(ch),converter[ch],231,data,"MLPRegressorModel2",ch, PCA_fit=fitted_pca[231],bins=[bs,35,1],xaxis_title="time [ns]",yaxis_title="Prevalence")
    fig_475_525.show()
    fig_475_525 = fig_475_525.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=400,width=1000, title="")
    fig_950_1050 = plot_predictions(name_g511_d(ch),converter2[ch],231,data,"MLPRegressorModel2",ch, PCA_fit=fitted_pca[231],bins=[bs,35,1],xaxis_title="time [ns]",yaxis_title="Prevalence")
    fig_950_1050.show()
    fig_950_1050 = fig_475_525.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=400,width=1000, title="")
    fig_1050_70k = plot_predictions(name_g511_d(ch),(converter2[ch][0],70000),231,data,"MLPRegressorModel2",ch, PCA_fit=fitted_pca[231],bins=[bs,40,1],xaxis_title="time [ns]",yaxis_title="Prevalence")
    fig_1050_70k.show()
    fig_1050_70k = fig_1050_70k.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=400,width=1000, title="")
    fig_475_70k = plot_predictions(name_g511_d(ch),(converter[ch][0],70000),231,data,"MLPRegressorModel2",ch, PCA_fit=fitted_pca[231],bins=[bs,40,1],xaxis_title="time [ns]",yaxis_title="Prevalence")
    fig_475_70k.show()
    fig_475_70k = fig_475_70k.update_layout(margin=dict(l=20, r=20, t=20, b=20), height=400,width=1000, title="")
    # fig_475_525.write_image(f"{i*4+24}_FinalTestNN_hist_475_525_ch{ch}.pdf")
    # fig_950_1050.write_image(f"{i*4+25}_FinalTestNN_hist_950_1050_ch{ch}.pdf")
    # fig_1050_70k.write_image(f"{i*4+26}_FinalTestNN_hist_1050_70k_ch{ch}.pdf")
    # fig_475_70k.write_image(f"{i*4+27}_FinalTestNN_hist_475_70k_ch{ch}.pdf")

In [None]:
latex_table = pd.DataFrame({"Left T$_{fit}$ tail estimate":["13 ns (-18 --- -5 ns)","-","-","10 ns (-18 --- -8 ns)","12 ns (-19 --- -7 ns)"],
                            "Left T$_{pred}$ - T$_{fit}$ tail estimate":["11 ns (-25 --- -14 ns)","-","-","15 ns (-6 --- 9 ns)","14 ns (-64 --- -50 ns)"],
                            "Relative factor left tail": ["0.85","-","-","1.5","1.2"],
                            "Right T$_{fit}$ tail estimate":["30 ns (6 --- 36 ns)","32 ns (6 --- 38 ns)","26 ns (10 --- 36 ns)","44 ns (6 --- 50 ns)","38 ns (7 --- 45 ns)"],
                            "Right T$_{pred}$ - T$_{fit}$ tail estimate":["19 ns (-4 --- 15 ns)","19 ns (-2 --- 21 ns)","16 ns (-11 --- 5 ns)","20 ns (21 --- 41 ns)","22 ns (-39 --- -17 ns)"],
                            "Relative factor right tail": ["0.63","0.72","0.62","0.45","0.58"],},
                            index = ["Channel 1","Channel 2","Channel 5","Channel 7","Channel 11"])
display(latex_table)
latex_table.to_latex("NN_231_tails_table.tex")
#Tail from Gaussian edge to Channel 1: -4 to 15 vs 6 to 36 ns -> = 0.63
#                           Channel 2: -2 to 21 vs 6 to 38 -\> 23/32 = 0.72. 
#                           Channel 5: -11 to 5 vs 10 to 36 -\> 16/26 = 0.62.
#                           Channel 7: 21 to 41 vs 6 to 50 -\> 20/44 = 0.45, -6 to 9 vs -18 to -8 -\> 15/10 = 1.5.
#                           Channel 11: -39 to -17 vs 7 to 45 -\> 22/38 = 0.58, -64 to -50 vs -19 to -7 -\> 14/12 = 1.2

In [None]:
channels = [0,1,2]
test_models = [191, 2949]
trace_names = ["191 TCh(0,2)", "2949 TCh(0)"]
model_names = {m:model_name(m) for m in test_models}
on_data = {ch:name2_g511_d(ch) for ch in channels}

combined_channel_line_plot(on_data, test_models, model_names, channels, fitted_pca, data, e_corrections, trace_names, line_shape="hvh")

In [None]:
plotting.settings(show_dt=False,show_pred=False,default_plot_mode="Line")
channels = [0,1,2,3,5,7,8,9,10,11] # Trained on 0, 1, 3, 5, 7, 9, 10, test on 2, 8, 11
on_data = {ch:name2_g511_d(ch) for ch in channels}
test_models = [191, 198, 209, 210, 213, 230, 231, 2897, 2949]
test_models = [191, 198, 209, 210, 213, 219, 230, 231, 2949]
model_names = {m:model_name(m) for m in test_models}
trace_names = ["191 TCh(0,2)", "198 TCh(0,2)", "209 TCh(0-11)", "210 TCh(0-11)", "213 TCh(0-11)", "230 TCh(0-11)", "231 TCh(0-11)", "2897 TCh(0)", "2949 TCh(0)"]
trace_names = ["191", "198", "209", "210", "213", "219", "230", "231", "2949"]

df_table_all = combined_channel_line_plot(on_data, test_models, model_names, channels, fitted_pca, data, e_corrections, trace_names, to_table=1, equality_tolerance=2e-3)
df_table_test = combined_channel_line_plot(on_data, test_models, model_names, [2,8,11], fitted_pca, data, e_corrections, trace_names, to_table=1, equality_tolerance=2e-3)

s_percentage = lambda df, tracename: df[f"[{tracename}] Tpred - Tref"] / df["Tfit"]
s_percentage_sd = lambda df, tracename: s_percentage(df,tracename) * np.sqrt(
    (df[f"[{tracename}] Tpred - Tref SD"] / df[f"[{tracename}] Tpred - Tref"])**2 + (df[f"Tfit SD"] / df[f"Tfit"])**2
)
s_gain = lambda df, tracename: df["Tfit"] - df[f"[{tracename}] Tpred - Tref"]
s_gain_sd = lambda df, tracename: np.sqrt(df["Tfit SD"]*df["Tfit SD"] + df[f"[{tracename}] Tpred - Tref"]*df[f"[{tracename}] Tpred - Tref"])
get_df_gain = lambda df, tracename: pd.DataFrame({
                                                    tracename:df[f"[{tracename}] Tpred - Tref"],
                                                    tracename+" SD":df[f"[{tracename}] Tpred - Tref SD"],
                                                    tracename+" Rel.Gain":s_percentage(df, tracename),
                                                    tracename+" Rel.Gain SD":s_percentage_sd(df, tracename),
                                                    # tracename+" AbsGain":s_gain(df, tracename),
                                                    # tracename+" AbsGain SD":s_gain_sd(df, tracename)
                                                 })
get_df_gain_combined = lambda df, *tracenames: pd.concat([df[["Tfit","Tfit SD"]]]+[get_df_gain(df, tracename) for tracename in tracenames],axis=1)
get_df_means = lambda dfs, *tracenames: pd.DataFrame({k: get_df_gain_combined(v,*tracenames).mean() for k,v in dfs.items()}).T

df_table_all_sd = get_df_gain_combined(df_table_all,"209", "213", "219", "230", "231") #"191 TCh(0,2)", "198 TCh(0,2)"
df_table_test_sd = get_df_gain_combined(df_table_test, "209", "213", "219", "230", "231")

display(df_table_test_sd)
df_means = get_df_means({"All channel data (0-11, excl. 4,6)":df_table_all,"Test channel data (2,8,11)":df_table_test},"209", "213", "219", "230", "231")
df_table_means = combine_cols_with_errors(df_means, round=3).T
display(df_table_means)
df_table_means.to_latex("NNmeans_table.tex")
df_table_test_sd_rel = combine_cols_with_errors(df_table_test_sd[[col for col in df_table_test_sd.columns if ("Rel" in col or "Tfit" in col)]], round=3)
df_table_test_sd_abs = combine_cols_with_errors(df_table_test_sd[[col for col in df_table_test_sd.columns if "Rel" not in col]], round=3)
df_table_test_sd_rel.index = df_table_test_sd_rel.index.astype(int)
df_table_test_sd_abs.index = df_table_test_sd_abs.index.astype(int)
display(df_table_test_sd_rel)
display(df_table_test_sd_abs)
df_table_test_sd_rel.to_latex("NN_FWHM_table_test_relative.tex")
df_table_test_sd_abs.to_latex("NN_FWHM_table_test_absolute.tex")
df_table_all_sd_rel = combine_cols_with_errors(df_table_all_sd[[col for col in df_table_all_sd.columns if ("Rel" in col or "Tfit" in col)]], round=3)
df_table_all_sd_abs = combine_cols_with_errors(df_table_all_sd[[col for col in df_table_all_sd.columns if "Rel" not in col]], round=3)
df_table_all_sd_rel.index = df_table_all_sd_rel.index.astype(int)
df_table_all_sd_abs.index = df_table_all_sd_abs.index.astype(int)
display(df_table_all_sd_rel)
display(df_table_all_sd_abs)
df_table_all_sd_rel.to_latex("NN_FWHM_table_all_relative.tex")
df_table_all_sd_abs.to_latex("NN_FWHM_table_all_absolute.tex")
# df_table.to_latex()

---