## Reading data from ROOT using UpROOT

_For flat data structures, it's very simple to read in data using uproot,
especially when that data can fit in memory. In this example we read a ROOT file
in and store the data in a DataFrame. The data is then split 80/20, with the first
80 used to form the predicted hypothesis/spectrum, and the last 20 used for validation._

In [None]:
push!(LOAD_PATH, "../src/")
using Batman
using DataFrames
# import Random: rand
using Distributions
# using StatsPlots; pyplot()
using PyPlot
plt.style.use("bat.mplstyle")


### Readiness Checklist
_Once this checklist is completed, it can be removed and this module is complete_
- [x] Read in an arbitrary root file (produced externally)
- [x] Produce multiple 1D PDFs
- [ ] Produce a 2D PDF
- [ ] Produce a ... 4D PDF?
- [x] Mock dataset to fit to a 1D PDF
- [ ] Fit to the 2D distribution (extended likelihood)
- [ ] Produce uncertainties
- [x] Bias
- [x] Pull

In [None]:
# MODEL MC
signalMC = DataStructures.rootreader("assets/signal.root", "bat")
bkgMC = DataStructures.rootreader("assets/background.root", "bat")
bins = collect(-20:0.1:20);

In [None]:
import Interpolations
# Build Model: First take PDF from histogram to get generic function
sig_itp = HistogramPDF(signalMC, :energy; bins=bins, extrapolate=Interpolations.Flat())
sig_pos = HistogramPDF(signalMC, :position; bins=bins)
bkg_itp = HistogramPDF(bkgMC, :energy, bins=bins)
bkg_pos = HistogramPDF(bkgMC, :position; bins=bins)

model = SpectralMonofit()
s = add_parameter!(model, "signal", 20.0)
b = add_parameter!(model, "background", 500.0; σ=1.0 )

add_observable!(model, :energy, -20.0, 20.0)
add_observable!(model, :position, -20.0, 20.0)
add_dataset(:tb1, DataFrame(energy=Float64[], position=Float64[]))

# Each dataset should have a LogPDF of its own
#add_dataset(:tb1, data)
#f1 = constructPDF!(model, s, [sig_itp], [:energy], :tb1)
#f2 = constructPDF!(model, b, [bkg_itp], [:energy], :tb1)
#f1 = constructPDF!(model, s, [sig_pos], [:position], :tb1)
#f2 = constructPDF!(model, b, [bkg_pos], [:position], :tb1)
f1 = constructPDF!(model, s, [sig_itp,sig_pos], [:energy, :position], :tb1)
f2 = constructPDF!(model, b, [bkg_itp,bkg_pos], [:energy, :position], :tb1)
#f1 = constructPDF!(model, s, [sig_itp,sig_itp], [:energy, :energy], :tb1)
#f2 = constructPDF!(model, b, [bkg_itp,bkg_itp], [:energy, :energy], :tb1)
@show f1
#combinePDFs!(model, [f1, f2], :tb1)
combinePDFs!(model, [f1, f2], :tb1)

generate_mock_dataset(model)

options = Dict(
    "ftol_abs"=>0,
    "ftol_rel"=>1e-6,
    "initial_step"=>[10.0, 10.0]
)
results = minimize!(model; options=options);

compute_profiled_uncertainties!(results; σ=1, init_step=[1.0, 1.0], step=0.1)
pretty_results(results)

### 1D component fit: Signal v Background

In [None]:
## Plot spectra
sx = collect(-10:0.01:10)
p = getparam(model, "signal")
signal_y = getparam(model, "signal").fit * sig_itp(sx) * (bins[2] - bins[1])
bkg_y = getparam(model, "background").fit * bkg_itp(sx) * (bins[2] - bins[1])
fs_y = 1000 * sig_itp(sx) * (bins[2]-bins[1])
fb_y = 350 * bkg_itp(sx) * (bins[2]-bins[1])
#plt.plot(sx, fs_y, label="fake signal", color="blue" )
#plt.plot(sx, fb_y, label="fake signal", color="red")
#plt.plot(sx, fs_y+fb_y, label="fake signal", color="black")


plt.plot(sx, signal_y+bkg_y, label="Total", color="black")
plt.plot(sx, signal_y, label="signal")
plt.plot(sx, bkg_y, label="bkg")
plt.hist(Batman.tb1.energy, bins=bins, label="Data")
plt.legend()

In [None]:
## Plot spectra
sx = collect(-10:0.01:10)
p = getparam(model, "signal")
signal_y = getparam(model, "signal").fit * sig_pos(sx) * (bins[2] - bins[1])
bkg_y = getparam(model, "background").fit * bkg_pos(sx) * (bins[2] - bins[1])
plt.plot(sx, signal_y+bkg_y, label="Total", color="black")
plt.plot(sx, signal_y, label="signal")
plt.plot(sx, bkg_y, label="bkg")
plt.hist(Batman.tb1.position, bins=bins, label="Data")
plt.legend()

In [None]:
hs = x -> x >= 0 ? 1 : 0
#profile!("Signal", results; prior=nothing)
#uncertainty!("Signal", results )

interval_plot(results, "signal")
plt.savefig("profile.svg")
plt.show()

In [None]:
correlation_plots(results)
plt.show()

In [None]:
correlation_plots2(results)

In [None]:
## Bias/Pull Testing

bias_vector = []
pull_vector = []
trials = collect(1:1000)

errors = 0
for t in trials
    try
        #mock_dataset()
        generate_mock_dataset(model)
        if size(model.datasets[1], 1) < 1
            continue
        end
        results = minimize!(model; options=options)
        profile!("signal", results; init_step=[1.0, 1.0], step=0.1)
        uncertainty!("signal", results;σ=1)
        sig_stats = getparam(model, "signal")
        bias = sig_stats.fit - sig_stats.init
        if bias >= 0
            pull = bias / abs(sig_stats.fit - sig_stats.low)
        else
            pull = bias / abs(sig_stats.fit - sig_stats.high)
        end
        push!(bias_vector, bias)
        push!(pull_vector, pull)
    catch e
        errors += 1
    end
    print(t,"\r")
end
println("Failure rate: ", errors/maximum(trials))

In [None]:
@show mean = sum(bias_vector) / length(bias_vector)
plt.hist(bias_vector, bins=100);

In [None]:
@show mean = sum(pull_vector)/length(pull_vector)
@show dev = sqrt(sum((pull_vector.-mean).^2)/length(pull_vector))

plt.hist(pull_vector, bins=collect(-3:0.1:3))

println("Pull distribution: ", mean, " +- ", dev);