# some preliminary functions
Scroll to third cell

In [16]:
import numpy as np
import os


def experiment_nL(experiment):
    if ('optimistic' in experiment) | ('nocuts' in experiment) | ('best' in experiment):
        experiment = experiment.split('_')[0]
    experiments = ['FASERv', 'FASERv2', 'FLArE10', 'FLArE100','SND','AdvSND']
    index = experiments.index(experiment)
    Na = 6.02e23
    Z_Ar = 18;g_per_cm3_Ar = 1.4;MM_Ar = 39.9
    Z_W = 74;g_per_cm3_W = 19.3;MM_W = 183
    
    #density / Molar Mass * Avogardo * Z = protons / cm^3
    tungsten_proton_density = g_per_cm3_W / MM_W * Na * Z_W 
    LAr_proton_density = g_per_cm3_Ar / MM_Ar * Na * Z_Ar
    
    #density / Molar Mass * Avogardo * Atomic Weight = nucleons / cm^3
    tungsten_nucleon_density = g_per_cm3_W / MM_W * Na * MM_W 
    LAr_nucleon_density = g_per_cm3_Ar / MM_Ar * Na * MM_Ar
    
    #fasernu, SND are tungsten, Flare is liquid argon.
    proton_densities_percm3 = [tungsten_proton_density,tungsten_proton_density,LAr_proton_density,LAr_proton_density,tungsten_proton_density,tungsten_proton_density]
    nucleon_densities_percm3 = [tungsten_nucleon_density,tungsten_nucleon_density,LAr_nucleon_density,LAr_nucleon_density,tungsten_nucleon_density,tungsten_nucleon_density]
    
    #Experimental lengths in cm. (What is SND length? I set to zero for now)
    lengths_cm = np.array([1,6.6,7,30,0.35,0.5])*100
    return nucleon_densities_percm3[index],lengths_cm[index]

def delta_exp(experiment,neutrino):
    if (experiment == 'FASERv2') | (experiment =='FASERv2_nocuts') | (experiment =='FASERv') | (experiment =='FASERv_nocuts'):# | (experiment == 'FASERv2_optimistic'):
        deltheta,delEl,delEh = 0.06e-3,0.3,0.3
    elif (experiment == 'FASERv2_optimistic') | (experiment =='FASERv_optimistic'):
        deltheta,delEl,delEh = 1e-3/2 , 0.3/2 , 0.5/2
    elif experiment =='FASERv2_best':
        deltheta,delEl,delEh = 0 , 0 , 0
    elif (experiment =='FLArE10') | (experiment =='FLArE100') | (experiment =='FLArE10_nocuts'):
        if np.abs(neutrino) == 14:#muon
            deltheta,delEl,delEh = 15e-3,0.3,0.3
        elif np.abs(neutrino) == 12:#electron
            deltheta,delEl,delEh = 15e-3,0.05,0.3
    elif (experiment == 'SND') | (experiment == 'SND_nocuts') | (experiment == 'AdvSND'):
        deltheta,delEl,delEh = 0.06e-3,0.3,0.3
    return deltheta,delEl,delEh

def thresholds(experiment,neutrino):
#     print('Flare only valid for muon neutrinos')
    if (experiment == 'FASERv2') | (experiment == 'FASERv2_optimistic') | (experiment =='FASERv_optimistic')| (experiment =='FASERv_best'):
        #treat minimum as same for lepton, max is maximum of neutrino energy
        EH_min = 100;EH_max = 10e3
        theta_min = 0;theta_max = np.arctan(0.05)
        El_min = 100;El_max = 10e3
    elif experiment == 'FASERv':
        EH_min = 100;EH_max = 10e3
        theta_min = 0;theta_max = np.arctan(0.025)
        El_min = 100;El_max = 10e3  
    elif 'nocuts' in experiment:
        EH_min = 0;EH_max = 10e3
        theta_min = 0;theta_max = np.pi
        El_min = 0;El_max = 10e3
    elif (experiment =='FLArE10') | (experiment =='FLArE100'):
        if np.abs(neutrino) == 14:#muon
            El_min = 2;El_max = 10e3;
            theta_min = 0;theta_max = 0.025
            EH_min = 10e-3;EH_max = 10e3
        elif np.abs(neutrino) == 12:#electron
            El_min = 2;El_max = 2e3
            theta_min = 0;theta_max = 0.5
            EH_min = 10e-3;EH_max = 10e3
    elif experiment =='SND':
        EH_min = 20;EH_max = 10e3
        if np.abs(neutrino) == 14:
            theta_min = 0;theta_max = 0.15
        else:
            theta_min = 0; theta_max = 0.5
        El_min = 20;El_max = 10e3
    elif experiment == 'AdvSND':
        if np.abs(neutrino) == 14:
            EH_min = 20;EH_max = 10e3
            theta_min = 0;theta_max = 0.15
            El_min = 20;El_max = 10e3
        elif np.abs(neutrino) == 12:
            EH_min = 20;EH_max = 10e3
            theta_min = 0;theta_max = 0.5
            El_min = 20;El_max = 10e3
        
    return EH_min,EH_max,theta_min,theta_max,El_min,El_max




def file_saver_clip(header , stringsave , x_lower , x_upper , x_avg , q2_lower , q2_upper , q2_avg , Enu_lower , Enu_upper , Enu_avg , diffxsec , N , stat_error , sys_error  ,fraction_variance_theta, fraction_variance_Elepton,fraction_variance_Ehadron,  MCsamples):
#     myfile = open(stringsave,'w')
    myfile = open(stringsave , 'w')
    myfile.write(header)
    myfile.write(
    f"{'x_lower':<15} {'x_upper':<15} {'x_avg':<15}"\
    f"{'Q2_lower[GeV^2]':<20} {'Q2_upper[GeV^2]':<20} {'Q2_avg[GeV^2]':<20}"\
    f"{'E_nu_lower[GeV]':<20} {'E_nu_upper[GeV]':<20} {'E_nu_avg[GeV]':<20}"\
    f"{'d^sigma/dxdQ2(x_avg,Q2_avg,Enu_avg) pb / GeV^2':<55}"\
    f"{'N_events':<15} {'N_events_errs = Sqrt[N_events]':<34} {'N_sys_errs':<20}"\
    f"{'fraction_error_theta': <25} {'fraction_error_Elepton': <25}"\
    f"{'fraction_error_Ehadron' : <25}"\
    f"{'MC_Samples'} \n")
    for xl , xu , xa, q2l , q2u , q2a, Enul , Enuu , Enua, dx , NN , stat , sys ,sys_frac_theta,sys_frac_Elepton,sys_frac_Ehadron,samples in\
    zip(x_lower , x_upper , x_avg , q2_lower , q2_upper , q2_avg , Enu_lower , Enu_upper , Enu_avg , diffxsec , N , stat_error , sys_error  ,fraction_variance_theta, fraction_variance_Elepton,fraction_variance_Ehadron, MCsamples):
            myfile.write(\
            f"{format(xl, '.6e'):<15} {format(xu, '.6e'):<15} {format(xa, '.6e'):<15}"\
            f"{format(q2l, '.6e'):<20} {format(q2u, '.6e'):<20} {format(q2a, '.6e'):<20}"\
            f"{format(Enul, '.6e'):<20} {format(Enuu, '.6e'):<20} {format(Enua, '.6e'):<20}"\
            f"{format(float(dx), '.6e'):<55}"\
            f"{format(NN, '.6e'):<15} {format(stat, '.6e'):<35}"\
            f"{format(sys, '.6'):<20}"\
            f"{format(float(sys_frac_theta), '.6'):<25} {format(float(sys_frac_Elepton), '.6'):<25}"\
            f"{format(float(sys_frac_Ehadron), '.6'):<25}"\
            f"{format(samples, '.6e')} \n"\
            )

#function to go through and save to .txt in same fashion. Have D_



#returns x_lower,x_upper,q2_lower,q2_upper,N,stat error,sys error in array format of same length


def file_reader(experiment,name,smear_corr='',clipped = 0):
    x_edges = np.logspace(-4,0,30)
    Q2_edges = np.array([4,10**1,10**2,10**3,10**4,10**5])
    ####################################################
    ####################################################
    #first parse data
    x_lower = np.array([])
    x_upper = np.array([])
    Enu_lower = np.array([])
    Enu_upper = np.array([])
    q2_lower = np.array([])
    q2_upper = np.array([])
    stat_error = np.array([])
    sys_error = np.array([])
    diffxsec = np.array([])
    N = np.array([])
    xavg = np.array([])
    qavg = np.array([])
    Enuavg = np.array([])
    xsec = np.array([])
    frac_variance = np.array([])
    frac_variance_theta = np.array([])
    frac_variance_Elepton = np.array([])
    frac_variance_Ehadron = np.array([])
    MCsamples = np.array([])
    
    if clipped == 0:
        readfile = os.path.join(os.getcwd(),name)
#         print(readfile)
        
        
    with open(readfile,'r') as f:
        lines = f.readlines()
        for il,line in enumerate(lines):
#             
            if il <= 1: 
                continue
            else:
                sp = line.split()
                try:
                    x_lower = np.append(x_lower,float(sp[0]));x_upper = np.append(x_upper,float(sp[1]))
                except:
                    pdb.set_trace()
                xavg = np.append(xavg,float(sp[2]));

                q2_lower = np.append(q2_lower,float(sp[3]));q2_upper = np.append(q2_upper,float(sp[4]))
                qavg = np.append(qavg, float(sp[5]))
                
                Enu_lower = np.append(Enu_lower , float(sp[6]));Enu_upper = np.append(Enu_upper , float(sp[7]))
                Enuavg = np.append(Enuavg , float(sp[8]))
                
                diffxsec = np.append(diffxsec , float(sp[9]))

                N = np.append(N,float(sp[10]))
                stat_error = np.append(stat_error,float(sp[11]))
                sys_error = np.append(sys_error,float(sp[12]))
                frac_variance_theta = np.append(frac_variance_theta , float(sp[13]))
                frac_variance_Elepton = np.append(frac_variance_Elepton , float(sp[14]))
                frac_variance_Ehadron = np.append(frac_variance_Ehadron , float(sp[15]))
                MCsamples = np.append(MCsamples , float(sp[16]))
                
    
#     return x_lower, x_upper, q2_lower,q2_upper,N,stat_error,sys_error
    return x_lower , x_upper , xavg , q2_lower , q2_upper , qavg , Enu_lower , Enu_upper , Enuavg , diffxsec , N , stat_error , sys_error  , frac_variance_theta,frac_variance_Elepton,frac_variance_Ehadron,MCsamples

def get_nanlocs(file_array,experiment,save_folder = 'INCLUSIVE',event_minimum = 0,nocharge = 0,xmin_cut = 0,fracmin_cut = 0):
    header_array  = ['' for x in np.arange(len(file_array))]
    print('Cutting Events below',event_minimum)
    N_summation = 0
    for file_index , file in enumerate(file_array):
        file = os.path.join(os.getcwd() , save_folder ,experiment, file)
        
        x_lower , _ , xavg , _ , _ , _ , _ , _ , _ , _ , N , _ , err_sys , _  , _ , _ , _ = file_reader(experiment,file)
        index = np.arange(len(x_lower))
        N_summation += N
        locs = ~np.isnan(xavg);
        
        if file_index == 0:
            location_array = np.arange(len(xavg))
            locs0 = np.copy(locs)
#             print(location_array[locs],location_array[locs],np.intersect1d(location_array[locs],locsE))
        else:
            locs0 = np.intersect1d(location_array[locs] , location_array[locs0])
#         print('here',location_array[locs0])

    #now find bins that cross threshold
    locsx_lower = x_lower < xmin_cut
    locsunc = err_sys / N < fracmin_cut 
    locsuncx = np.intersect1d(location_array[locsx_lower],location_array[locsunc])
    
    locsE = N_summation > event_minimum
    locs_final = np.intersect1d(location_array[locs0] , location_array[locsE])
    locs_final = np.intersect1d(locs_final , locsuncx)
    return location_array[locs_final]

def clip_nans(file_array , locs,experiment,nocharge = 0,save_folder = 'INCLUSIVEorCHARM',append_save = 'inclusiveORcharm'):
    if nocharge == 0:
        for file_index , file in enumerate(file_array):
            #naming
            file_pre = file.split('_')
            file_save = 'clipped_' + '_'.join(file_pre)
#             print(file_save)
            stringsave = os.path.join(os.getcwd() , save_folder , experiment , 'clipped_nan' , file_save)
            file_read = os.path.join(os.getcwd() , save_folder , experiment , file)

            neutrino = file.split('_')[-1:][0].split('.')[0]
            x_lower , x_upper , xavg , q2_lower , q2_upper , qavg , Enu_lower , Enu_upper , Enuavg , diffxsec , N , stat_error , sys_error , frac_sys_theta , frac_sys_Elepton , frac_sys_Ehadron , MCsamples = file_reader(experiment,file_read)
            with open(file_read) as f:
                header = f.readline()

            file_saver_clip(header , stringsave , x_lower[locs] , x_upper[locs] , xavg[locs] , q2_lower[locs] , q2_upper[locs] , qavg[locs] , Enu_lower[locs] , Enu_upper[locs]\
            , Enuavg[locs] , diffxsec[locs] , N[locs] , stat_error[locs] , sys_error[locs] , frac_sys_theta[locs] , frac_sys_Elepton[locs] , frac_sys_Ehadron[locs] , MCsamples[locs])
    elif nocharge == 1:
        for file_index , file in enumerate(file_array):
            #naming
            file_pre = file.split('.')
            file_save = 'clipped_' + file_pre[0] + '_nochargediscrimination.txt'
#             print(file_save)
            stringsave = os.path.join(os.getcwd() , save_folder , experiment , 'clipped_nan' , file_save)
            file_read = os.path.join(os.getcwd() , save_folder , experiment , file)
            
            neutrino = 'nochargediscrimination'
            x_lower , x_upper , xavg , q2_lower , q2_upper , qavg , Enu_lower , Enu_upper , Enuavg , diffxsec , N , stat_error , sys_error , frac_sys_theta , frac_sys_Elepton , frac_sys_Ehadron , MCsamples = file_reader(experiment,file_read)
            with open(file_read) as f:
                header = f.readline()

            file_saver_clip(header , stringsave , x_lower[locs] , x_upper[locs] , xavg[locs] , q2_lower[locs] , q2_upper[locs] , qavg[locs] , Enu_lower[locs] , Enu_upper[locs]\
            , Enuavg[locs] , diffxsec[locs] , N[locs] , stat_error[locs] , sys_error[locs] , frac_sys_theta[locs] , frac_sys_Elepton[locs] , frac_sys_Ehadron[locs] , MCsamples[locs])


def combine_clipped_nunubar(file_array , locs,experiment,neutrino_pid = 14,save_folder = 'INCLUSIVEorCHARM',append_save = 'inclusiveORcharm'):
    file_read_nu = os.path.join(os.getcwd() , save_folder , experiment,'nan_binned_sysevents_%s_%s_nu.txt' %(experiment,append_save))
    x_lower , x_upper , xavg , q2_lower , q2_upper , qavg , Enu_lower , Enu_upper , Enuavg , diffxsecnu , Nnu , stat_error_nu , sys_error_nu , frac_sys_theta_nu , frac_sys_Elepton_nu , frac_sys_Ehadron_nu , MCsamples =\
    file_reader(experiment,file_read_nu)
    
    file_read_nub = os.path.join(os.getcwd() , save_folder , experiment,'nan_binned_sysevents_%s_%s_nub.txt' %(experiment,append_save))
    x_lower , x_upper , xavg , q2_lower , q2_upper , qavg , Enu_lower , Enu_upper , Enuavg , diffxsecnub , Nnub , stat_error_nub , sys_error_nub  , frac_sys_theta_nub , frac_sys_Elepton_nub , frac_sys_Ehadron_nub , MCsamples =\
    file_reader(experiment,file_read_nub)
    
#     #check file array
#     if file_array[0] != file_read_nu and file_array[1] != file_read_nub:
#         raise('are you reading the right files?')
        
    Ntotal = Nnu + Nnub
    

    diffxsec = diffxsecnu[locs] + diffxsecnub[locs]

    
    sys_errortotal = np.sqrt(sys_error_nu**2 + sys_error_nub**2)
    
    frac_sys_theta_total =   np.sqrt(   (Nnu[locs]*frac_sys_theta_nu[locs])**2 +   (Nnub[locs]*frac_sys_theta_nub[locs])**2) / Ntotal[locs]
    frac_sys_Elepton_total = np.sqrt(   (Nnu[locs]*frac_sys_Elepton_nu[locs])**2 + (Nnub[locs]*frac_sys_Elepton_nub[locs])**2) / Ntotal[locs]
    frac_sys_Ehadron_total = np.sqrt(   (Nnu[locs]*frac_sys_Ehadron_nu[locs])**2 + (Nnub[locs]*frac_sys_Elepton_nub[locs])**2) / Ntotal[locs]
    
    with open(file_read_nu) as f:
        headernu = f.readline();seednu = headernu.split('|')[1].split(' ')[2]
    with open(file_read_nub) as f:
        headernub = f.readline();seednub = headernub.split('|')[1].split(' ')[2]
        
    print('Check seeds for nu and nubar' , seednu , seednub)
    if seednu != seednub:
        raise('error')
    
    deltheta,delEl,delEh = delta_exp(experiment , neutrino_pid)
    EH_min,EH_max,theta_min,theta_max,El_min,El_max = thresholds(experiment , neutrino_pid)
    #TODO: Update the header for other experiments. Only valid for FASERv2 right now
    n , L = experiment_nL(experiment )
    
    header = 'Experiment = %s | seed %s | Neutrino = nunubar,%s | Exp. Target density (nucleons / cm3) = 10^(%1.1f) | Exp. Target Length = %1.0f cm | Total Events = %1.1f |\
    del_theta = %1.1f mrad, theta_max = %1.1f mrad |        del_Elepton = %1.1f %s , Elepton_min = %1.1f GeV, Elepton_max = %1.1f GeV|\
    del_Ehadron = %1.1f %s, Ehadron_min = %1.1f GeV, Ehadron_max = %1.1f GeV \n' %(experiment,seednu,str(neutrino_pid),np.log10(n), L ,np.sum(Ntotal[locs]),deltheta *1e3,\
                                                                                   theta_max*1e3 ,delEl *100, '%',El_min,El_max ,delEh * 100, '%',EH_min,EH_max)

    stringsave = 'clipped_nan_binned_sysevents_%s_%s_nochargediscrimination.txt' %(experiment,append_save)
    file_saver_clip(header , os.path.join(os.getcwd() , save_folder , experiment , 'clipped_nan' , stringsave) , x_lower[locs] , x_upper[locs] , xavg[locs] , q2_lower[locs] , q2_upper[locs] , qavg[locs] , Enu_lower[locs] , Enu_upper[locs]\
    , Enuavg[locs] , diffxsec , Ntotal[locs] , np.sqrt(Ntotal[locs]) , sys_errortotal[locs] , frac_sys_theta_total,frac_sys_Elepton_total,frac_sys_Ehadron_total , MCsamples[locs])


In [15]:
def clip_all_nans(experiment,neutrino,save_folder = 'INCLUSIVE',append_save = 'inclusive',event_minimum = 0,xmin_cut = 1.2,fracmin_cut = 1.0):
    #here we clip and remove all nans for the nu, nub , and nocharge cases
    
    #first clip for nu
    #select array of files (4 for nu, 4 for nubar, 8 for nocharge )
    
    if not os.path.exists(os.path.join(os.getcwd() , save_folder , experiment , 'clipped_nan')):
        os.mkdir(os.path.join(os.getcwd() , save_folder , experiment , 'clipped_nan'))
    file_array = ['nan_binned_sysevents_%s_%s_nu.txt' %(experiment,append_save)]
    
    #findlocations of where nans for nu
    locsnu = get_nanlocs(file_array ,experiment,save_folder = save_folder,event_minimum = event_minimum,xmin_cut = xmin_cut,fracmin_cut = fracmin_cut)
    
    #clip these files using the nan locations    
    clip_nans(file_array , locsnu , experiment,nocharge=0,save_folder = save_folder,append_save = append_save)

    #similar for nubar
    file_array = ['nan_binned_sysevents_%s_%s_nub.txt' %(experiment,append_save) ]
    
    locsnub = get_nanlocs(file_array ,experiment,save_folder = save_folder,event_minimum = event_minimum,xmin_cut = xmin_cut,fracmin_cut = fracmin_cut)
    clip_nans(file_array , locsnub , experiment,nocharge=0,save_folder = save_folder,append_save = append_save)

    #similar for no charge, except there are 8 files. 

    file_array = ['nan_binned_sysevents_%s_%s_nub.txt' %(experiment,append_save) , 'nan_binned_sysevents_%s_%s_nu.txt' %(experiment,append_save)]
    
    #locations where all 8 files do not have nans. Right now, there are small differences between nu and nubar
    locsnunub = get_nanlocs(file_array ,experiment,save_folder = save_folder,event_minimum = event_minimum,nocharge = 1,xmin_cut = xmin_cut,fracmin_cut = fracmin_cut)
    

    file_array = ['nan_binned_sysevents_%s_%s_nu.txt' %(experiment,append_save) , 'nan_binned_sysevents_%s_%s_nub.txt' %(experiment,append_save)]
    combine_clipped_nunubar(file_array , locsnunub,experiment,save_folder = save_folder,append_save = append_save)
    
#     print('no nan nu locs:',locsnu,len(locsnu))




# make clips here

In [17]:
#want to clip INCLUSIVE or CHARM?
save_folder = 'INCLUSIVE';append_save = save_folder.lower()

#some kwargs
#fracmin_cut = 1.0 : clip all bins with fractional uncertainty > fracmin_cut
#event_minimum = 0 : clip bins that do not have at least event_minimum number of events
#xmin_cut = 1.2 : clip bins with x > xmin_cut : xmin_cut >1.0 means this does nothing

#clip flare
clip_all_nans('FLArE10',14,save_folder = save_folder ,append_save = append_save,event_minimum = 100,fracmin_cut = 1.0)
clip_all_nans('FLArE100',14,save_folder = save_folder ,append_save = append_save,event_minimum = 100,fracmin_cut = 1.0)

#clip SND
clip_all_nans('SND',14,save_folder = save_folder ,append_save = append_save,event_minimum = 10,fracmin_cut = 1.0)
clip_all_nans('AdvSND',14,save_folder = save_folder ,append_save = append_save,event_minimum = 100,fracmin_cut = 1.0)

#clip faser
clip_all_nans('FASERv',14,save_folder = save_folder ,append_save = append_save,event_minimum = 10,fracmin_cut = 1.0)
clip_all_nans('FASERv2',14,save_folder = save_folder ,append_save = append_save,event_minimum = 100,fracmin_cut = 1.0)

Cutting Events below 100
Cutting Events below 100
Cutting Events below 100
Check seeds for nu and nubar 2 2
Cutting Events below 100
Cutting Events below 100
Cutting Events below 100
Check seeds for nu and nubar 2 2
Cutting Events below 10
Cutting Events below 10
Cutting Events below 10
Check seeds for nu and nubar 2 2
Cutting Events below 100
Cutting Events below 100
Cutting Events below 100
Check seeds for nu and nubar 2 2
Cutting Events below 10
Cutting Events below 10
Cutting Events below 10
Check seeds for nu and nubar 2 2
Cutting Events below 100
Cutting Events below 100
Cutting Events below 100
Check seeds for nu and nubar 2 2
