In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from validphys.loader import FallbackLoader as Loader
from validphys.api import API

In [None]:
# fit_names = [f"NNPDF40_nnlo_as_0{n}_corr_s{l}" for l in "12" for n in [1140,1150,1160,1170,1175,1180,1185,1190,1195,1200,1210,1220]]

#NOTE: 0.1990 was mistakenly done with theoryID 0.0120 instead
fit_names = [f"240218-rs-nnpdf40-corr-mhou-0{n}" for n in range(1140,1220+1,10)]
# fit_names = [f"240219-rs-nnpdf40-corr-0{n}" for n in range(1140,1220+1,10)]


In [None]:
l = Loader()
fits = [l.check_fit(f) for f in fit_names]

In [None]:
from collections import defaultdict
as_fits = defaultdict(list)
for f in fits:
    th = f.as_input()["theory"]["theoryid"]
    alpha = float(API.theory_info_table(theory_db_id = th).loc["alphas"])
    as_fits[alpha].append(f)
as_fits = dict(as_fits)

In [None]:
indexes = {f: API.fitted_replica_indexes(pdf=f.name) for f in fits}
replica_data = {f: API.replica_data(fit=f.name) for f in fits}

In [None]:
aa = []
for i in indexes.values():
    aa.append(i)


In [None]:
def measure(replica_data):
    return replica_data.training*3 + replica_data.validation*1
    # return replica_data.chi2

In [None]:
min_values = {}
for alpha, flist in as_fits.items():
    series = []
    for f in flist:
        s = [measure(d) for d in replica_data[f]]
        series.append(pd.Series(s, index=indexes[f]))
    min_values[alpha] = pd.DataFrame(series).min()
data = pd.DataFrame(min_values)

In [None]:
mins = {}
for ind, row in data.iterrows():
    a, b, c = np.polyfit(data.columns, row, 2)
    if not np.isnan(b): # NaN if not all replicas passed postfit
        mins[ind] = -b / 2 / a
    # mins[ind] = data.columns[np.where(row==row.min())][0]

mins = pd.Series(mins)

In [None]:
print(mins.describe(percentiles=[0.16,0.84]))
print("")
print(f"cv±std = {mins.mean():.5f} ± {mins.std():.5f} ")
print(f"1std interval:  {mins.mean()-mins.std():.5f} to {mins.mean()+mins.std():.5f} ")
print(f"68% c.i:        {mins.describe(percentiles=[0.16,0.84])[4]:.5f} to {mins.describe(percentiles=[0.16,0.84])[6]:.5f} ")

In [None]:
from validphys.plotutils import kde_plot
fig, ax = plt.subplots()
kde_plot(mins,ax=ax)
central = (mins.describe(percentiles=[0.16,0.84])[6] + mins.describe(percentiles=[0.16,0.84])[4])/2
unc = (mins.describe(percentiles=[0.16,0.84])[6] - mins.describe(percentiles=[0.16,0.84])[4])/2
ax.set_title(f"68% c.i: {central:.5f}  ± {unc:.5f}  -- MHOU")
ax.set_xlim(0.118,0.13)
ax.set_xlabel(r"$\alpha_s$")

In [None]:
plt.hist(mins,bins=data.columns-0.0005,edgecolor='black',density=True)
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
from scipy.stats import norm
# p = np.exp(-((x-mins.mean())/mins.std())**2/2)*mins.size/np.sqrt(2*np.pi)
p = norm.pdf(x, mins.mean(), mins.std())
plt.plot(x,p,'k',label=f"{mins.mean():.5f} +/- {mins.std():.5f}")
plt.yticks([])
plt.legend()
plt.savefig('alphashist.pdf')

In [None]:
plt.plot(data.columns, np.array(data.T))