### Intended Versions:
- Version 1 of this script was developed within a 3 hour time constraint, as such there are some areas that could be optimized. For example, the main folder architecture could be automatically generated with a recursive approach rather than manually defining the structure.
- Version 2 polishes the details for the final output script
- The Final Version is intended to fully automate the task of outputting the script to a text file and requires adding parameters to generate config scripts for any directory of interest for the DQdisplay.

In [6]:
# IMPORTS
import ROOT
import pandas as pd


# FUNCTIONS
def map_paths(tf,file,f_path,f_path_list):  
    
    """
    
    Preprocesses ROOT runfile, outputs path information of all histograms in directory of interest.(Currently MissingEt, later this will be a function parameter).
    
    """
    
    # Main loop
    for key in tf.GetListOfKeys():    
        input = key.ReadObj()
        
        # Determine if the location in the file we are at is a directory
        if issubclass(type(input),ROOT.TDirectoryFile):   
        
            # Record the path of the directory we are looking in
            try:
                f_path = input.GetPath() 
            except:
                print("cant GetPath")

            # Split the path by '/' so we can determine where we are in the folder structure        
            try:
                split_path = f_path.split("/")
            except:
                print('cant split_path')            
            
            # Recursively go deeper into the file structure depending on the length of split_path
#             if len(split_path) == 3:
    
            if 'run' in split_path[-1]:
                # We are 2 directories deep, go deeper
                f_path,f_path_list = map_paths(input,file,f_path, f_path_list)  
            elif len(split_path) > 2 and 'MissingEt' in split_path[-1]:                
                # We are greater than 2 directories deep and these directories include MissingEt
                f_path, f_path_list = map_paths(input,file,f_path, f_path_list)     
            elif len(split_path) > 2 and any(folder in split_path for folder in ['MissingEt']):                
                # We are greater than 2 directories deep and these directories include any folders in MissingEt
                f_path, f_path_list = map_paths(input,file,f_path, f_path_list)         
            
            else:
                pass
            
            # Record the file_path that will result now that we are done with the current folder level
            #  i.e. the folder path that results from going up a level in the directory
            f_path = f_path.split('/')
            f_path = '/'.join(f_path[:-1])
            
                
        elif issubclass(type(input),ROOT.TH1):
            
            # Record the path of the directory we are looking in with the name of the hist file as part of the path
            try:
                f_path_th1 = f_path + '/' + input.GetName()                
            except:
                print("cant GetPath2")

            # Get the part of f_path that follows the ':'
            f_path_th1 = f_path_th1.split(':')
            f_path_th1 = f_path_th1[1][1:]
            
            # Adds the TH1 path to the list of paths
            f_path_list.append(f_path_th1)
    
    return f_path, f_path_list


def hist_to_df(path):
    
    """
    
    Converts ROOT histogram data from map_paths() to a pandas dataframe.
    
    """
    
    # Get a handle for the root file
    file = ROOT.TFile.Open(path)

    # Get the path_list by processing the ROOT file
    f_path,f_path_list = map_paths(file,file,'',[])
    
    # Convert that list into a dataframe for further use elsewhere
    return pd.DataFrame({'paths':f_path_list})

def gen_alltriggers(all_triggers,others,met_calo,depth):
    """
    Generates parts of the Athena DQdisplay config script specific to the AllTriggers folder in the MissingEt folder.
    """
    
    # Loop through the directories in all_Triggers
    for dir_ in all_triggers:
        
        if dir_ =='BadJets':
            # Generate the BadJets part of the script
            print(f"{depth*tab}dir {dir_}",'{')
            
            # Generate the part of the script for the directories within BadJets
            gen_others(others,met_calo,depth+1)
            
            # Genrate the closing part of this directory
            print(f'{(depth*tab)[:-1]}','}',f'#{dir_}\n')
            print('') # For formatting of the script
            
        elif dir_ =='MET_Calo':
            # Generate the MET_Calo part of the script
            print(f"{depth*tab}dir {dir_}",'{')
            
            # Generate the directories within MET_Calo
            gen_metcalo(met_calo,depth+1)
            
            # Generate the closing part of this directory
            print(f'{(depth*tab)[:-1]}','}',f'#{dir_}\n')
            print('') # For formatting of the script
            
        # If the directory is neither BadJEts or MET_Calo, generate the code for these specific directories
        else:
            print(f"{depth*tab}dir {dir_}",'{',f'\n\n{(depth*tab)[:-1]}','}',f'#{dir_}\n')
    print('') # For formatting of the script
    
    return

def gen_metcalo(met_calo,depth):
    """
    Generates the part of the script for the directories within MET_Calo
    """
    
    for dir_ in met_calo:
        print(f"{depth*tab}dir {dir_}",'{',f'\n\n{(depth*tab)[:-1]}','}',f'#{dir_}\n')
    return

def gen_others(others,met_calo,depth):
    """
    Generates the part of the script for the directories other than BadJets and MET_Calo
    """
    
    for dir_ in others:
        # There are MET_Calo directories within the other directories, generate these parts of the script as well
        if dir_ =='MET_Calo':
            # Generate the directory part of the script for this directory
            print(f"{depth*tab}dir {dir_}",'{')
            
            # Generate the folders within this MET_Calo directory
            gen_metcalo(met_calo,depth+1)
            
            # Generate the closing part of this directory
            print(f'{(depth*tab)[:-1]}','}',f'#{dir_}\n')
            print('') # For formatting of the script
            
        # For every other directory, generate the script according to the established format
        else:
            print(f"{depth*tab}dir {dir_}",'{',f'\n\n{(depth*tab)[:-1]}','}',f'#{dir_}\n')
    print('') # For formatting of the script
    return

def gen_config_MET_main(main_folder,algorithm,reference,run_forconfig):
    """
    Writes the overall script structure minus the histogram specific part of the script.
    """

    # Initialize a sub_folder list
    sub_folders = []
    
    # Initialize the reused directories
    dir2 = 'MET_AntiKt4EMTopo'
    dir3 = 'MET_Calo'

    # Define the AllTriggers directory, MET_Calo directory, and other directories
    all_triggers = ['BadJets','MET_AntiKt4EMPFlow',dir2,dir3]
    others = ['MET_AntiKt4EMPflow',dir2,dir3]
    met_calo = ['EMTopo','MET_Cell']

    # Get a list of paths from the run_forconfig file
    paths = list(hist_to_df(run_forconfig)['paths'])
    
    # Output the first part of the script
    print(f"dir {main_folder}","{")
    print(f"{tab}algorithm = {algorithm}")
    print(f"{tab}reference = {reference}")
    print('') # For formatting of the script

    # Split the paths from paths
    split_paths = [path.split('/')[2:] for path in paths]

    # Loop through the paths and determine the sub folders
    for path in split_paths:
        # Get a handle for the sub folder specific to this path
        sub_folder = path[0]
        # If this sub_folder is unique
        if sub_folder not in sub_folders:
            # Store this sub folder in the sub_folders list
            sub_folders.append(sub_folder)

    # Loop through the sub folders and begin writing the next part of the script
    for sub_folder in sub_folders:
        
        # Write the sub_folder part of the script
        print(f"{tab}dir {sub_folder}",'{',f'\n\n')
        
        # If the sub_folder is AllTriggers, write the part of the script specific to alltriggers
        if sub_folder =='AllTriggers':
            gen_alltriggers(all_triggers,others,met_calo,2)
        # For any sub folder other than AllTriggers, write that part of the script
        else: 
            gen_others(others,met_calo,2)
        print(f'{tab[:-1]}','}',f'#{sub_folder}\n')
        
    return
    
def gen_config_MET_hists(hist_algorithm,run_forconfig):
    """
    Writes the histogram specific part of the script.
    """
    
    # Get a list of paths from the run_forconfig file
    paths = list(hist_to_df(run_forconfig)['paths'])
    
    # Split the paths from paths
    split_paths = [path.split('/')[2:] for path in paths]
    
    # Loop through the paths
    for path in split_paths:
        # Print the output line for the histogram part of the script
        print(f"output = {main_folder}/{'/'.join(path[:-1])}")

        # Print the hist line for the histogram part of the script
        print(f'hist {path[-1]}','{')

#         # Programmatically determine which hist algorithm we should use for this specific histogram
#         if 'hist?' == path[-1]:
#             hist_algorithm = ''
        
        # Print the algorithm line for the histogram part of the script
        print(f'{tab}algorithm = {hist_algorithm}')
        
        # Print the display line for the histogram part of the script
        print(f'{tab}display = LogY')
        
        print('}') # For formatting of the script
    return

In [7]:
# EXAMPLE INPUTS
tab = '    '
main_folder = 'MissingEt'
algorithm = 'METGatherData'
reference = 'CentrallyManagedReferences'
run_forconfig = 'data18_13TeV.00349268.physics_Main.merge.HIST..26844909._000001.pool.root.1'
hist_algorithm = 'METChisq'

In [8]:
gen_config_MET_main(main_folder,algorithm,reference,run_forconfig)

dir MissingEt {
    algorithm = METGatherData
    reference = CentrallyManagedReferences

    dir AllTriggers { 


        dir BadJets {
            dir MET_AntiKt4EMPflow { 

            } #MET_AntiKt4EMPflow

            dir MET_AntiKt4EMTopo { 

            } #MET_AntiKt4EMTopo

            dir MET_Calo {
                dir EMTopo { 

                } #EMTopo

                dir MET_Cell { 

                } #MET_Cell

            } #MET_Calo



        } #BadJets


        dir MET_AntiKt4EMPFlow { 

        } #MET_AntiKt4EMPFlow

        dir MET_AntiKt4EMTopo { 

        } #MET_AntiKt4EMTopo

        dir MET_Calo {
            dir EMTopo { 

            } #EMTopo

            dir MET_Cell { 

            } #MET_Cell

        } #MET_Calo



    } #AllTriggers

    dir CutMet80 { 


        dir MET_AntiKt4EMPflow { 

        } #MET_AntiKt4EMPflow

        dir MET_AntiKt4EMTopo { 

        } #MET_AntiKt4EMTopo

        dir MET_Calo {
            dir EMTopo { 

            } #EMTop

In [9]:
gen_config_MET_hists(hist_algorithm,run_forconfig)

output = MissingEt/AllTriggers/BadJets/MET_AntiKt4EMPflow
hist MET_PFlow_Muon_et {
    algorithm = METChisq
    display = LogY
}
output = MissingEt/AllTriggers/BadJets/MET_AntiKt4EMPflow
hist MET_PFlow_Muon_ex {
    algorithm = METChisq
    display = LogY
}
output = MissingEt/AllTriggers/BadJets/MET_AntiKt4EMPflow
hist MET_PFlow_Muon_ey {
    algorithm = METChisq
    display = LogY
}
output = MissingEt/AllTriggers/BadJets/MET_AntiKt4EMPflow
hist MET_PFlow_Muon_phi {
    algorithm = METChisq
    display = LogY
}
output = MissingEt/AllTriggers/BadJets/MET_AntiKt4EMPflow
hist MET_PFlow_Muon_sumet {
    algorithm = METChisq
    display = LogY
}
output = MissingEt/AllTriggers/BadJets/MET_AntiKt4EMPflow
hist MET_PFlow_PVSoftTrk_et {
    algorithm = METChisq
    display = LogY
}
output = MissingEt/AllTriggers/BadJets/MET_AntiKt4EMPflow
hist MET_PFlow_PVSoftTrk_ex {
    algorithm = METChisq
    display = LogY
}
output = MissingEt/AllTriggers/BadJets/MET_AntiKt4EMPflow
hist MET_PFlow_PVSoftTrk_