In [None]:
from collections import defaultdict
import numpy as np
import pandas as pd

from validphys.loader import FallbackLoader as Loader
from validphys.api import API

l = Loader()

In [None]:
fit_names = [
    "230131-rs-nnpdf40-corr-01140-a",
    "230131-rs-nnpdf40-corr-01140-b",
    "230131-rs-nnpdf40-corr-01150-a",
    "230131-rs-nnpdf40-corr-01150-b",
    "230131-rs-nnpdf40-corr-01160-a",
    "230131-rs-nnpdf40-corr-01160-b",
    "230131-rs-nnpdf40-corr-01170-a",
    "230131-rs-nnpdf40-corr-01170-b",
    "230131-rs-nnpdf40-corr-01175-a",
    "230131-rs-nnpdf40-corr-01175-b",
    "230131-rs-nnpdf40-corr-01180-a",
    "230131-rs-nnpdf40-corr-01180-b",
    "230131-rs-nnpdf40-corr-01185-a",
    "230131-rs-nnpdf40-corr-01185-b",
    "230131-rs-nnpdf40-corr-01190-a",
    "230131-rs-nnpdf40-corr-01190-b",
    "230131-rs-nnpdf40-corr-01195-a",
    "230131-rs-nnpdf40-corr-01195-b",
    "230131-rs-nnpdf40-corr-01200-a",
    "230131-rs-nnpdf40-corr-01200-b",
    "230131-rs-nnpdf40-corr-01210-a",
    "230131-rs-nnpdf40-corr-01210-b",
    "230131-rs-nnpdf40-corr-01220-a",
    "230131-rs-nnpdf40-corr-01220-b",
]

In [None]:
fits = [l.check_fit(f) for f in fit_names]

In [None]:
def measure(replica_data, tr_datapoints, vl_datapoints):
    return replica_data.training*tr_datapoints + replica_data.validation*vl_datapoints

In [None]:
as_fits = defaultdict(list)
for f in fits:
    th = f.as_input()["theory"]["theoryid"]
    alpha = float(API.theory_info_table(theory_db_id = th).loc["alphas"])
    as_fits[alpha].append(f)
as_fits = dict(as_fits)

In [None]:
common_replica_indices=set([i for i in range(1,231)])
replica_indices_per_fit = {}
for fit in fits:
    replica_indices = API.fitted_replica_indexes(pdf=fit.name)
    common_replica_indices = common_replica_indices & set(replica_indices)
    replica_indices_per_fit[fit.name] = replica_indices

In [None]:
fit_info = {}
for fit in fits:
    replica_data = API.replica_data(fit=fit.name)
    pseudodata=API.read_fit_pseudodata(fit=fit.name)
    replica_info = []
    for rep in range(230):
        if rep+1 in common_replica_indices:
            tr_datapoints = (pseudodata[rep][1].size)
            vl_datapoints = (pseudodata[rep][2].size)
            replicas_chi2_info = (replica_data.pop(0))
            replica_info.append({"replica_data": replicas_chi2_info, "vl_datapoints": vl_datapoints, "tr_datapoints": tr_datapoints})
    fit_info[fit.name] = replica_info
    print(fit.name)


In [None]:
min_values = {}

for alpha, flist in as_fits.items():
    series = []
    for f in flist:
        s = [measure(**d) for d in fit_info[f.name]]
        series.append(pd.Series(s, index=common_replica_indices))
    min_values[alpha] = pd.DataFrame(series).min()
data = pd.DataFrame(min_values)


In [None]:
mins = {}
for ind, row in data.iterrows():
    a, b, c = np.polyfit(data.columns, row, 2)
    if not np.isnan(b):
        mins[ind] = -b / 2 / a
mins = pd.Series(mins)


In [None]:
mins.describe()

In [None]:
from validphys.plotutils import kde_plot

In [None]:
kde_plot(mins)

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(data.columns, np.array(data.T))