In [None]:
# Notes: 
# 1. You need to put the covariance matrix csv files in this folder
# 2. Some of the cells can be a bit computationally expensive, hence I save the output 
#    as pickles to be easily reloaded

In [1]:
from validphys.calcutils import calc_chi2
import numpy as np
import pandas as pd
from validphys.core import ExperimentSpec, FKTableSpec
from validphys import results
from validphys.loader import Loader
from validphys.api import API
import scipy.linalg as la

# Reading in deuteron covmats
it0 = pd.read_csv(
    "covmatrix_global_proton.csv",
    dtype={"user_id": float},
    index_col=[0,1,2], header=[0,1,2]
)

it1dw = pd.read_csv(
    "covmatrix_ite.csv",
    dtype={"user_id": float},
    index_col=[0,1,2], header=[0,1,2]
)

it1shift = pd.read_csv(
    "covmatrix_shift_ite_1.csv",
    dtype={"user_id": float},
    index_col=[0,1,2], header=[0,1,2]
)

In [2]:
# Renaming experiment DYE886R -> DYE886 to correct covariance matrix.
# Commented out because it was changed manually in the csv file.
#tups = []
# for tup in it0.index:
#     if tup[0] == "DYE886R":
#         newtup = ("DYE886", tup[1], tup[2])
#     else: newtup = tup
#     tups.append(newtup)
# newindex = pd.MultiIndex.from_tuples(tups, names=("experiment", "dataset", "id"))

# Relabelling dataframes because otherwise column index is a string rather than an int and this causes problems
# down the line
it0 = pd.DataFrame(it0.values, index=it0.index, columns=it0.index)
it1dw = pd.DataFrame(it1dw.values, index=it1dw.index, columns=it1dw.index)
it1shift = pd.DataFrame(it1shift.values, index=it1shift.index, columns=it1shift.index)

In [3]:
# Importing dataset index so we can wrap everything in a dataframe to prevent misalignment
dsindex_bl = API.experiments_index(experiments={"from_": "fit"},
                                   fit="200609-ern-001",
                                   theoryid=53,
                                   use_cuts="fromfit",
                                   pdf={"from_": "fit"})

In [4]:
# Getting D and T to calculate diffs
datth_bl = API.experiments_results(experiments={"from_": "fit"},
                                   fit="200609-ern-001",
                                   theoryid=53,
                                   use_cuts="fromfit",
                                   pdf={"from_": "fit"})

In [25]:
diffs = []
for experiment in datth_bl:
    diffs.append(experiment[0].central_value - experiment[1].central_value)
diffs_bl = pd.DataFrame([item for sublist in diffs for item in sublist], index=dsindex_bl)


In [28]:
datth_it0 = API.experiments_results(experiments={"from_": "fit"},
                            fit="NNPDF31_nnlo_as_0118_global_deut",
                            theoryid=53,
                            use_cuts="fromfit",
                            pdf={"from_": "fit"})

In [29]:
diffs = []
for experiment in datth_it0:
    diffs.append(experiment[0].central_value - experiment[1].central_value)
diffs_it0 =pd.DataFrame([item for sublist in diffs for item in sublist], index=dsindex_bl)


In [9]:
datth_it1dw = API.experiments_results(experiments={"from_": "fit"},
                                     fit="NNPDF31_nnlo_as_0118_global_deut_ite",
                                     theoryid=53,
                                     use_cuts="fromfit",
                                     pdf={"from_": "fit"})

In [12]:
diffs = []
for experiment in datth_it1dw:
    diffs.append(experiment[0].central_value - experiment[1].central_value)
diffs_it1dw =pd.DataFrame([item for sublist in diffs for item in sublist], index=dsindex_bl)


In [4]:
datth_it1shifted = API.experiments_results(experiments={"from_": "fit"},
                                     fit="NNPDF31_nnlo_as_0118_global_deut_ite_shift",
                                     theoryid=53,
                                     use_cuts="fromfit",
                                     pdf={"from_": "fit"})

In [5]:
diffs = []
for experiment in datth_it1shifted:
    diffs.append(experiment[0].central_value - experiment[1].central_value)
diffs_it1shifted =pd.DataFrame([item for sublist in diffs for item in sublist], index=dsindex_bl)


In [7]:
# Pickling items for easy reloading - from now on can skip cells 5-13
import pickle
pickle.dump( diffs_bl, open( "diffs_bl.p", "wb" ) )
pickle.dump( diffs_it0, open( "diffs_it0.p", "wb" ) )
pickle.dump( diffs_it1dw, open( "diffs_it1dw.p", "wb" ) )
pickle.dump( diffs_it1shifted, open( "diffs_it1shifted.p", "wb" ) )

In [4]:
# Reloading pickles
diffs_bl = pd.read_pickle("diffs_bl.p")
diffs_it0 = pd.read_pickle("diffs_it0.p")
diffs_it1dw = pd.read_pickle("diffs_it1dw.p")
diffs_it1shifted = pd.read_pickle("diffs_it1shifted.p")

In [9]:
#diffs_it0 = pd.DataFrame(diffs_it0.values, index=C_orig.index)
#diffs_bl = pd.DataFrame(diffs_bl.values, index=newindex)
#diffs_it1dw = pd.DataFrame(diffs_it1dw.values, index=newindex)
#diffs_it1shifted = pd.DataFrame(diffs_it1shifted.values, index=newindex)

In [5]:
# Loading original covmat (experimental)
C_orig = API.experiments_covmat( experiments={"from_": "fit"},
                                   fit ="200609-ern-001",
                                   theoryid=53,
                                   use_cuts="fromfit",
                                   pdf={"from_": "fit"})

  exec(code_obj, self.user_global_ns, self.user_ns)
  return self.__call__(name, **kwargs)


In [6]:
# List of datasets in orig covmat and in deuteron fit
dslist = list(dict.fromkeys([tup[1] for tup in C_orig.index]))
dslist_small = list(dict.fromkeys([tup[1] for tup in it0.index]))

In [7]:
# Function to extend the dimensions of a small covmat to that of a big covmat, filling in the empty entries with 0s
def extend_covmat(dslist, bigcovmat, smallcovmat):
    # Make dimensions match those of exp covmat. First make empty df of
    # exp covmat dimensions
    empty_df = pd.DataFrame(0, index=C_orig.index, columns=C_orig.index)
    covmats = []
    for ds1 in dslist:
        for ds2 in dslist:
            if (ds1 in smallcovmat.index.unique(level=1)) and (ds2 in smallcovmat.index.unique(level=1)):
                # If both datasets are in the small covmat, take the relevant ds covmat out the small covmat
                covmat = smallcovmat.xs(ds1,level=1, drop_level=False).T.xs(ds2, level=1, drop_level=False).T
            else:
                # If not, make a ds covmat of 0s of the relevant dimensions 
                covmat = empty_df.xs(ds1,level=1, drop_level=False).T.xs(ds2, level=1, drop_level=False).T
            covmat.reset_index()
            # covmats is a list of the ds covmats in order
            covmats.append(covmat)
    # Chunks is a list of lists of covmats, one list of covmats for each dataset
    chunks = []
    for x in range(0, len(covmats), len(dslist)):
        chunk = covmats[x:x+len(dslist)]
        chunks.append(chunk)
    # Concatenate each chunk into a strip so we have N_dataset strips of covmat
    strips = []
    i=0
    for chunk in chunks:
        i=i+1
        strip = pd.concat(chunk, axis=1)
        strips.append(strip.T)
    strips.reverse()
    # Stack the strips to construct the full covmat
    full_df = pd.concat(strips, axis=1)
    full_df = full_df.reindex(bigcovmat.index)
    full_df = ((full_df.T).reindex(bigcovmat.index)).T
    return full_df

In [8]:
# Extending the deuteron covmats to match size of C_orig
it0total = extend_covmat(dslist, C_orig, it0)
it1dwtotal = extend_covmat(dslist, C_orig, it1dw)
it1shifttotal = extend_covmat(dslist, C_orig, it1shift)

In [9]:
# Calculating total chi2s
calc_chi2(la.cholesky(C_orig, lower=True), diffs_bl)/len(diffs_bl)

array([1.17868292])

In [10]:
calc_chi2(la.cholesky(it0total+C_orig, lower=True), diffs_it0)/len(diffs_it0)

array([1.16000901])

In [11]:
calc_chi2(la.cholesky(it1dwtotal+C_orig, lower=True), diffs_it1dw)/len(diffs_it1dw)

array([1.15869441])

In [12]:
calc_chi2(la.cholesky(it1shifttotal+C_orig, lower=True), diffs_it1shifted)/len(diffs_it1shifted)

array([1.16601756])

In [23]:
# Function to return chi2s by dataset
def chi2s_by_dataset(covmat, diffs):
    chi2s = []
    for dataset in dslist:
        dscovmat = covmat.xs(dataset,level=1, drop_level=False).T.xs(dataset, level=1, drop_level=False).T
        dsdiffs = diffs.xs(dataset, level=1, drop_level=False)
        chi2 = calc_chi2(la.cholesky(dscovmat, lower=True), dsdiffs)/len(dsdiffs)
        chi2s.append((dataset, chi2[0]))
    chi2table = pd.DataFrame(chi2s, columns=["dataset", "chi2"])
    return chi2table
        

In [44]:
chi2s_by_dataset(C_orig+it1dwtotal, diffs_it1dw)

Unnamed: 0,dataset,chi2
0,NMCPD,0.784046
1,NMC,1.542341
2,SLACP,0.91409
3,SLACD,0.493564
4,BCDMSP,1.287699
5,BCDMSD,0.908407
6,CHORUSNUPb,1.259129
7,CHORUSNBPb,0.978447
8,NTVNUDMNFe,0.687331
9,NTVNBDMNFe,0.862956


In [42]:
it1shifttotal.xs("NMCPD",level=1, drop_level=False).T.xs("NMCPD", level=1, drop_level=False).T

Unnamed: 0_level_0,Unnamed: 1_level_0,experiment,NMC,NMC,NMC,NMC,NMC,NMC,NMC,NMC,NMC,NMC,NMC,NMC,NMC,NMC,NMC,NMC,NMC,NMC,NMC,NMC,NMC
Unnamed: 0_level_1,Unnamed: 1_level_1,dataset,NMCPD,NMCPD,NMCPD,NMCPD,NMCPD,NMCPD,NMCPD,NMCPD,NMCPD,NMCPD,NMCPD,NMCPD,NMCPD,NMCPD,NMCPD,NMCPD,NMCPD,NMCPD,NMCPD,NMCPD,NMCPD
Unnamed: 0_level_2,Unnamed: 1_level_2,id,0,1,2,3,4,5,6,7,8,9,...,111,112,113,114,115,116,117,118,119,120
experiment,dataset,id,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3
NMC,NMCPD,0,0.000674,0.000569,0.000138,0.000127,0.000112,0.000087,0.000075,5.660941e-05,0.000035,0.000035,...,0.000045,0.000030,0.000020,0.000011,3.068750e-07,-0.000091,-0.000094,-0.000093,-0.000090,-0.000084
NMC,NMCPD,1,0.000569,0.000586,0.000082,0.000139,0.000188,0.000018,0.000056,8.680320e-05,0.000112,-0.000045,...,0.000049,0.000038,0.000030,0.000023,1.405501e-05,-0.000036,-0.000039,-0.000039,-0.000037,-0.000033
NMC,NMCPD,2,0.000138,0.000082,0.000310,0.000217,0.000133,0.000087,0.000050,1.330676e-05,-0.000022,0.000074,...,0.000005,0.000001,-0.000003,-0.000006,-9.668036e-06,0.000013,-0.000005,-0.000021,-0.000034,-0.000048
NMC,NMCPD,3,0.000127,0.000139,0.000217,0.000203,0.000191,0.000034,0.000047,5.764563e-05,0.000067,-0.000002,...,0.000014,0.000011,0.000007,0.000003,-1.404754e-06,0.000021,0.000014,0.000008,0.000003,-0.000002
NMC,NMCPD,4,0.000112,0.000188,0.000133,0.000191,0.000245,-0.000017,0.000041,9.789617e-05,0.000151,-0.000073,...,0.000022,0.000020,0.000016,0.000012,6.688525e-06,0.000025,0.000028,0.000031,0.000033,0.000036
NMC,NMCPD,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
NMC,NMCPD,116,-0.000091,-0.000036,0.000013,0.000021,0.000025,-0.000031,-0.000020,-1.139299e-05,-0.000008,0.000059,...,0.000097,0.000089,0.000077,0.000065,5.046960e-05,0.001500,0.001329,0.001180,0.001069,0.000943
NMC,NMCPD,117,-0.000094,-0.000039,-0.000005,0.000014,0.000028,-0.000048,-0.000028,-1.048054e-05,0.000001,0.000027,...,0.000074,0.000080,0.000078,0.000076,7.313614e-05,0.001329,0.001271,0.001211,0.001163,0.001102
NMC,NMCPD,118,-0.000093,-0.000039,-0.000021,0.000008,0.000031,-0.000060,-0.000033,-8.415156e-06,0.000010,0.000001,...,0.000056,0.000073,0.000080,0.000085,9.130516e-05,0.001180,0.001211,0.001222,0.001225,0.001219
NMC,NMCPD,119,-0.000090,-0.000037,-0.000034,0.000003,0.000033,-0.000066,-0.000034,-5.165862e-06,0.000018,-0.000016,...,0.000045,0.000071,0.000085,0.000096,1.106571e-04,0.001069,0.001163,0.001225,0.001266,0.001301
