# Attributes available

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import pandas as pd
import uproot
from tqdm import tqdm
import os
from dataclasses import dataclass
from ROOT import TLorentzVector
import math

Welcome to JupyROOT 6.24/00


In [3]:
file = uproot.open('/data/Top/TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8_RunIIAutumn18/00EE0FB5-B8D0-8A48-8454-103219BD8EE9_Skim.root')
tree = file['Events']
branches = tree.arrays() #columns

tree.show()

name                 | typename                 | interpretation                
---------------------+--------------------------+-------------------------------
run                  | uint32_t                 | AsDtype('>u4')
luminosityBlock      | uint32_t                 | AsDtype('>u4')
event                | uint64_t                 | AsDtype('>u8')
btagWeight_CSVV2     | float                    | AsDtype('>f4')
btagWeight_DeepCSVB  | float                    | AsDtype('>f4')
CaloMET_phi          | float                    | AsDtype('>f4')
CaloMET_pt           | float                    | AsDtype('>f4')
CaloMET_sumEt        | float                    | AsDtype('>f4')
ChsMET_phi           | float                    | AsDtype('>f4')
ChsMET_pt            | float                    | AsDtype('>f4')
ChsMET_sumEt         | float                    | AsDtype('>f4')
nElectron            | uint32_t                 | AsDtype('>u4')
Electron_deltaEtaSC  | float[]                  | AsJagged

# Testing Top Quark reconstruction for specified process


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import pandas as pd
import uproot
from tqdm import tqdm
import os
from dataclasses import dataclass
from ROOT import TLorentzVector
import math
from multiprocessing import Pool
from all_functions_and_constants import *

In [None]:
import os

def list_folders_with_prefix(common_directory, prefix):
    matching_folders = []

    for root, dirs, files in os.walk(common_directory):
        for dir_name in dirs:
            if dir_name.startswith(prefix):
                folder_path = os.path.join(root, dir_name)
                matching_folders.append(folder_path)

    return matching_folders

# Usage:

## Specify "prefix". (examples can be "SingleMuon", "TTToSemiLeptonic,...")


### The cell below prints the folders which contain the root files which will be processed according to the pipeline of selection cuts.

In [None]:
common_directory = '/data/Top/'

# This is the name of the process (can be signal, background, simulated, true)
prefix = 'SingleMuon'

matching_folders = list_folders_with_prefix(common_directory, prefix)

print(matching_folders)

### - The cell below will run the pipeline for each folder, and for each file in each folder. 
### - For each folder in the "matching_folders" (printed above) a homonymous named folder will be created in the current directory.
### - Each file in these folders has the same name as the .root file, but is a .txt file. 
### - The number of rows in each .txt file is the number of counts in that particular histogram bin (the number of bins is specified as a configurable (global) constant in the all_functions_and_constants.py file)
### - Note that "eventweightLumi" is taken into account for the bin-height determination of each .root file
### - The cell uses multiprocessing in order to live to see the result

In [None]:
for i in matching_folders:
    folder_path = i
    output_folder = get_characters_after_third_slash_and_create_folder(folder_path)
    
    folder_elements = list_folder_elements(folder_path)
    
    
    def process_file(j):
        file = uproot.open(os.path.join(folder_path, j))
        tree = file['Events']
        branches = tree.arrays()
        event_list = [{key: event[key] for key in KEEP} for event in branches]
        cleaned_df = pd.DataFrame(event_list)
        cleaned_df = pd.concat([filter_dataframe_iSkim1(df), filter_dataframe_iSkim2(df), filter_dataframe_iSkim3(df), filter_dataframe_iSkim4(df)])
        if cleaned_df.empty:
            return
        cleaned_df = cleaned_df[cleaned_df['CaloMET_sumEt'] > 60]
        if cleaned_df.empty:
            return
        cleaned_df = btag_enough(cleaned_df)
        if cleaned_df.empty:
            return    
        cleaned_df= cleaned_df[cleaned_df.apply(check_muon_condition, axis=1)]
        if cleaned_df.empty:
            return
        cleaned_df = cleaned_df[cleaned_df.apply(filter_function_jet_pt_actual, axis=1)]
        if cleaned_df.empty:
            return        
        cleaned_df = calculate_invariant_mass_HadronicDecay_4Vector_modified(cleaned_df)
        if cleaned_df.empty:
            return
        cleaned_df = calculate_top_quark_masses(cleaned_df)
        if cleaned_df.empty:
            return
        cleaned_df = calculate_closest_mass(cleaned_df)
        if cleaned_df.empty:
            return        
        output_file = f"./{output_folder}/{str(j[:-5])}.txt"
        if not cleaned_df.empty:
            save_weighted_binned_histogram_to_txt(cleaned_df, 'top_quark_mass', NBINS, output_file, 'eventWeightLumi')
            
    num_processes = 8  # (MACHINE ON CLOUD VENETO IS 8 CORES)
    with Pool(processes=num_processes) as pool:
        pool.map(process_file, folder_elements)

## Once the code has been run for every process of interest we can move to *PLOTTING*

### The cell below contains the functions used for plotting. 

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D


def load_binned_histogram_from_txt(file_path):
    with open(file_path, 'r') as file:
        data = [float(line.strip()) for line in file]
    return data



def collect_and_plot_histograms(directory, color_map):
    data = {}

    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".txt"):
                folder = os.path.basename(root)  # Get the folder name
                for prefix, color in color_map.items():
                    if folder.startswith(prefix):
                        if prefix not in data:
                            data[prefix] = {'color': color, 'paths': []}
                        folder_path = os.path.join(root, file)
                        data[prefix]['paths'].append(folder_path)

    fig, ax = plt.subplots()

    bin_edges = np.linspace(LOWER_LIM, UPPER_LIM, NBINS + 1)

    for prefix, folder_data in data.items():
        combined_counts = np.zeros(NBINS)
        for file_path in folder_data['paths']:
            counts = load_binned_histogram_from_txt(file_path)
            combined_counts += counts  # Combine counts from all files with the same prefix

        if prefix.startswith("SingleMuon"):
            # Plot a black dot at the final height of the bar with Poissonian error bars
            bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
            ax.errorbar(bin_centers, combined_counts,yerr=np.sqrt(combined_counts), capsize = 3, fmt='ko', markersize = 2, label=prefix)
        else:
            ax.hist(bin_edges[:-1], bin_edges, weights=combined_counts, alpha=0.4, edgecolor='black', label=prefix, color=folder_data['color'])

    ax.set_title('Combined Histogram - Top Quark')
    ax.set_xlabel('Mass (GeV)')
    ax.set_ylabel('Counts')

    # Create a custom legend
    handles = [Line2D([0], [0], color=color_map[prefix], lw=4, label=prefix) for prefix in data.keys()]
    ax.legend(handles=handles, title="Processes")

    plt.show()
    fig.savefig('new_reco.png', facecolor='w', dpi=300)

### Usage:

1. **directory**: directory in which folders starting with the process prefix are contained
2. **color_map**: colors to assign to each prefix, note that SingleMuon will always be black with poissonian error bars

In [None]:
directory = "/2221_LCP"
color_map = {"SingleMuon": 'black', 'TTToSemiLeptonic': 'red'}
collect_and_plot_histograms(directory, color_map)