In [None]:
#------------------------------------------------------------------------------------------------------------------------------------------
# basic setup of the notebook
#------------------------------------------------------------------------------------------------------------------------------------------
import numpy as np
import scipy as sp
from scipy import *
from matplotlib import pyplot as plt

# everything in iminuit is done through the Minuit object, so we import it
from iminuit import Minuit
from iminuit.util import describe
from typing import Annotated

# display iminuit version
import iminuit
print("iminuit version:", iminuit.__version__)

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [None]:
#------------------------------------------------------------------------------------------------------------------------------------------
# Importing fixed params for analysis
#------------------------------------------------------------------------------------------------------------------------------------------  
total_size_lat = 32
measured_T     = 16
xi             = 1

In [None]:
#------------------------------------------------------------------------------------------------------------------------------------------
# Read raw data and prepare accordingly
#------------------------------------------------------------------------------------------------------------------------------------------
data=[]
data.append(np.genfromtxt('data/L{}_xi{}.dat'.format(total_size_lat,xi))[:,1])
data=np.array(data[0])   

In [None]:
# First, store the data in a convenient format for the fits

totaltraj=int(len(data)/measured_T)            # Number of montecarlo samples

Gc=np.zeros((int(totaltraj),measured_T))       # Storing the data
for i in range(int(totaltraj)):
    Gc[[i]]=data[range(i*measured_T,(i+1)*measured_T)]

In [None]:
# We read the data, lets create a class for Jackknife sampling

def mylen(x):
    return len(x) if isinstance(x, np.ndarray) or isinstance(x, list) else 1

class jackknife:

    def __init__(self, list):
        self.list=list
        self.Lj=len(list)
        self.Nj=self.Lj
        Nt=mylen(self.list[0])
        jackk=np.zeros((self.Nj-1,Nt))
        self.jackkf=np.zeros((self.Nj,Nt))
        for i in range(self.Nj):
            jackk            = np.delete(self.list,i,axis=0)
            self.jackkf[i]   = np.sum(jackk,axis=0)/(self.Nj-1)

    #sample values: jackknife(original_ensemble).sample()=jackknife_ensemble
    def sample(self):
        return self.jackkf    

    def fcov(self):
        return np.cov(self.jackkf,rowvar=False,bias=True) * (self.Nj-1)     
    
    #augmented sample: jackknife(jackknife_ensemble).up()=original_ensemble
    def up(self):
        Lj=len(self.list)
        mean=np.mean(self.list,axis=0)
        ensem=self.list+(Lj)*(mean-self.list)
        return ensem    

In [None]:
# Now lets code another class to compute means and covariances
class ensemble_stat:

    def __init__(self, list):
        self.list=list

    def mean(self):
        mean=np.mean(self.list,axis=0)
        return mean  

    #reduced variance
    def rcov(self):
        Nj=len(self.list)
        cov=np.cov(self.list, rowvar=False,bias=True) / (Nj-1)
        return cov    

In [None]:
# This is the basic IMinuit penalty function class, lets modify it to include correlations

class LeastSquares:
    """
    Generic least-squares cost function with cov.
    """

    errordef = Minuit.LEAST_SQUARES  # for Minuit to compute errors correctly

    def __init__(self, model, x, y, incov): # IMPORTANT: We are reading the inverse of a covariance matrix
        self.model = model  # model predicts y for given x
        self.x = np.asarray(x)
        self.y = np.asarray(y)
        self.invcov = np.asarray(incov)

    def __call__(self, *par):  # we must accept a variable number of model parameters
        ym  = self.model(self.x, *par)
        fun = np.dot(np.dot((self.y - ym), self.invcov),(self.y - ym))

        return fun
        
class BetterLeastSquares(LeastSquares):

    def __init__(self, model, x, y, incov):
        super().__init__(model, x, y, incov)
        pars = describe(model, annotations=True)
        model_args = iter(pars)
        next(model_args)
        _parameters = {k: pars[k] for k in model_args}


class EvenBetterLeastSquares(BetterLeastSquares):
    @property
    def ndata(self):
        return len(self.x)

In [None]:
# Let's finish by defining a function as a sum of exponentials

def exp_np(t,*pars):
    total=0
    mass=np.abs(pars[1])
    total+=np.abs(pars[0])*np.exp(-mass*t)
    for i in range(1,np.int_((len(pars)+1)/2)):
        mass+=np.abs(pars[2*i+1])
        total+=np.abs(pars[2*i])*np.exp(-mass*t)
    return total

In [None]:
# First, lets start with a simple plot and fit test to some specific data

mcalls=5000
mtol=0.0001


jackk_C_r_t=jackknife(Gc).sample()                             # This is our main data sample for a fixed R     
lt=len(Gc[0])
dfin=min(lt,100)

tfin=3
tffin=16

data_t   = np.linspace(tfin, tffin, tffin-tfin+1)                                     # Creating x axis values
data_y   = ensemble_stat(jackk_C_r_t).mean()[tfin-1:tffin]                        # Obtaining the data averages over the MonteCarlo samples
data_cov = jackknife(Gc).fcov() #ensemble_stat(jackknife(jackk_C_r_t).up()).rcov()          # Obtaining the covariance matrix
data_err = np.sqrt(np.diagonal(data_cov))[tfin-1:tffin]                              # This is the error from the covariance matrix
data_incov=np.linalg.inv(data_cov)[tfin-1:tffin,tfin-1:tffin]


funfit=exp_np                  # Model chosen to fit the C(R,t) for varying time t, fixed R
inipars=np.array([1.,1.])           # Initial parameters for the model to fit

least_squares_np = EvenBetterLeastSquares(funfit, data_t, data_y, data_incov)
m=Minuit(least_squares_np,*inipars)

m.migrad(mcalls).migrad(mcalls).hesse(mcalls)

chi2_total=m.fval

data_t_plot=np.linspace(data_t[0],data_t[-1],1000)
plt.errorbar(data_t, data_y, data_err, fmt="ok", label="data")
plt.plot(data_t_plot, funfit(data_t_plot, *m.values), label="fit")

In [None]:
# Can you repeat the exercise, but without correlations?

mcalls=5000
mtol=0.0001

totaltraj=int(len(data)/measured_T)

jackk_C_r_t=jackknife(Gc).sample()                             # This is our main data sample for a fixed R     
lt=len(Gc[0])
dfin=min(lt,100)

tfin=3
tffin=16

data_t   = np.linspace(tfin, tffin, tffin-tfin+1)                                     # Creating x axis values
data_y   = ensemble_stat(jackk_C_r_t).mean()[tfin-1:tffin]                        # Obtaining the data averages over the MonteCarlo samples
data_cov = jackknife(Gc).fcov() #ensemble_stat(jackknife(jackk_C_r_t).up()).rcov()          # Obtaining the covariance matrix
data_cov = np.diag(np.diag(data_cov))
data_err = np.sqrt(np.diagonal(data_cov))[tfin-1:tffin]                              # This is the error from the covariance matrix
data_incov=np.linalg.inv(data_cov)[tfin-1:tffin,tfin-1:tffin]


funfit=exp_np                  # Model chosen to fit the C(R,t) for varying time t, fixed R
inipars=np.array([1.,1.])           # Initial parameters for the model to fit

least_squares_np = EvenBetterLeastSquares(funfit, data_t, data_y, data_incov)
m2=Minuit(least_squares_np,*inipars)

m2.migrad(mcalls).migrad(mcalls).hesse(mcalls)


data_t_plot=np.linspace(data_t[0],data_t[-1],1000)
plt.errorbar(data_t, data_y, data_err, fmt="ok", label="data")
plt.plot(data_t_plot, funfit(data_t_plot, *m.values), label="fit")

In [None]:
# Compare now both central values and errors for the ground state energy, are they compatible?

print(np.abs(np.array(m.values))[1],np.abs(np.array(m.errors))[1])
print(np.abs(np.array(m2.values))[1],np.abs(np.array(m2.errors))[1])

In [None]:
# Let's now produce a Jackknife set of fits to data

mcalls=5000
mtol=0.0001

jackk_C_r_t=jackknife(Gc).sample()                             # This is our main data sample for a fixed R     
lt=len(Gc[0])
dfin=min(lt,100)

tfin=3
tffin=16

data_t   = np.linspace(tfin, tffin, tffin-tfin+1)                                     # Creating x axis values
data_cov = jackknife(Gc).fcov() #ensemble_stat(jackknife(jackk_C_r_t).up()).rcov()          # Obtaining the covariance matrix
data_err = np.sqrt(np.diagonal(data_cov))[tfin-1:tffin]                              # This is the error from the covariance matrix
data_incov=np.linalg.inv(data_cov)[tfin-1:tffin,tfin-1:tffin]


funfit=exp_np                  # Model chosen to fit the C(R,t) for varying time t, fixed R
inipars=np.array([1.,1.])           # Initial parameters for the model to fit


E_0=np.zeros(totaltraj)
chi2_jackk=np.zeros(totaltraj)
for jackk in range(totaltraj):
    data_y   = (jackk_C_r_t[jackk])[tfin-1:tffin]                        # Obtaining the data averages over the Montecarlo samples

    least_squares_np = EvenBetterLeastSquares(funfit, data_t, data_y, data_incov)

    jackk_fit=Minuit(least_squares_np,*inipars).migrad(mcalls).migrad(mcalls).hesse(mcalls)

    E_0[jackk]        = np.array(jackk_fit.values)[1]
    chi2_jackk[jackk] = jackk_fit.fval 

In [None]:
# Finally, let's compare this value with the simple fit to data

print(ensemble_stat(E_0).mean(),np.sqrt(jackknife(jackknife(E_0).up()).fcov()))

print(np.abs(np.array(m.values))[1],np.abs(np.array(m.errors))[1])

In [None]:
# EXTRA: Use the formula introduced in the lectures for the relation between the total chi2 and the Jackknife or Raw sample chi2's

chi2_total

ensemble_stat(chi2_jackk).mean()

ensemble_stat(chi2_jackk).mean()-(tffin-tfin+1)/(totaltraj-1)

ensemble_stat(chi2_jackk).mean()-(tffin-tfin+1-len(inipars))/(totaltraj-1)