In [1]:
import ROOT
import os
import torch

 
TMVA = ROOT.TMVA
TFile = ROOT.TFile

In [2]:
TMVA.Tools.Instance()
useBDT = True  # Boosted Decision Tree

In [3]:
outputFile = TFile.Open("Higgs_ClassificationOutput.root", "RECREATE")

In [4]:
factory = TMVA.Factory(
    "TMVA_Higgs_Classification", outputFile, V=False, ROC=True, Silent=False, Color=True, AnalysisType="Classification"
)

In [5]:

def read_root_files_from_dirs(directories):
    # Dictionary to hold directory and their corresponding ROOT file paths
    root_files_dict = {}

    for directory in directories:
        # Initialize an empty list for the current directory
        root_files = []

        # Check if directory exists
        if not os.path.isdir(directory):
            print(f"Directory '{directory}' does not exist.")
            root_files_dict[directory] = root_files
            continue

        # Walk through all directories and files
        for root, dirs, files in os.walk(directory):
            for filename in files:
                if filename.endswith(".root"):
                    # Create the full path to the file
                    filepath = os.path.join(root, filename)
                    # Add the ROOT file path to the list for this directory
                    root_files.append(filepath)

        # Update the dictionary with the list of ROOT files for this directory
        root_files_dict[directory] = root_files

    return root_files_dict

# Define the parent directories
bkg_dir = os.path.join("bkg")
sgnl_dir = os.path.join("sgnl")

# Read ROOT files from the directories
directories = [bkg_dir, sgnl_dir]
files_dict = read_root_files_from_dirs(directories)

# Accessing ROOT files for each directory
bkg_files = files_dict.get(bkg_dir, [])
sgnl_files = files_dict.get(sgnl_dir, [])


In [6]:
loader = TMVA.DataLoader("dataset")
signalWeight = 1.0
backgroundWeight = 1.0

In [None]:
# for one signal file and one bkg file:
#sgnlFileName = "signal.root"
#bkgFileName = "background.root"
 

#sgnlInputFile = TFile.Open(sgnlFileName)
#bkgInputFile = TFile.Open(bkgFileName)
#signalTree = sgnlInputFile.Get("events")
#backgroundTree = bkgInputFile.Get("events")
####signalTree.Print()
#loader = TMVA.DataLoader("dataset")
#signalWeight = 1.0
#backgroundWeight = 1.0
#loader.AddSignalTree(signalTree, signalWeight)
#loader.AddBackgroundTree(backgroundTree, backgroundWeight)

In [7]:
sgnlInputFiles = {}
sgnlTrees = {}

for i, sgnlRootFiles in enumerate(sgnl_files):
    sgnlInputFiles[i] = TFile.Open(sgnlRootFiles)
    sgnlTrees[i] = sgnlInputFiles[i].Get("events")
    loader.AddSignalTree(sgnlTrees[i]) #, signalWeight)

bkgInputFiles = {}
bkgTrees = {}

for i, bkgRootFiles in enumerate(bkg_files):
    bkgInputFiles[i] = TFile.Open(bkgRootFiles)
    bkgTrees[i] = bkgInputFiles[i].Get("events")
    loader.AddBackgroundTree(bkgTrees[i]) #, signalWeight)


DataSetInfo              : [dataset] : Added class "Signal"
                         : Add Tree events of type Signal with 100000 events
                         : Add Tree events of type Signal with 100000 events
                         : Add Tree events of type Signal with 100000 events
                         : Add Tree events of type Signal with 100000 events
                         : Add Tree events of type Signal with 100000 events
                         : Add Tree events of type Signal with 100000 events
                         : Add Tree events of type Signal with 100000 events
                         : Add Tree events of type Signal with 100000 events
                         : Add Tree events of type Signal with 100000 events
                         : Add Tree events of type Signal with 100000 events
                         : Add Tree events of type Signal with 100000 events
                         : Add Tree events of type Signal with 100000 events
DataSetInfo     



In [8]:
# Adding varialbes
#loader.AddVariable("Jet", "F")
#loader.AddVariable("MissingET", "F")
#loader.AddVariable("MCRecoAssociations", "F")
#loader.AddVariable("EFlowNeutralHadron", "F")
#loader.AddVariable("EFlowPhoton", "F")

#loader.AddVariable("EFlowTrack", "F")


#loader.AddVariable("Electron", "F")
#loader.AddVariable("Muon", "F")


loader.AddVariable("Jet.energy", "Jet Energy", "F")
loader.AddVariable("Jet.momentum.x","Jet MomX","F")
loader.AddVariable("Jet.momentum.y","Jet MomY","F")
loader.AddVariable("Jet.momentum.z","Jet MomZ","F")
#loader.AddVariable("MissingET","MET","F")
loader.AddVariable("Jet.mass","Jet Mass","F")
#loader.AddVariable("CalorimeterHits.position.x","CalX","F")
#loader.AddVariable("CalorimeterHits.position.y","CalY","F")
#loader.AddVariable("CalorimeterHits.position.z","CalZ","F")
#loader.AddVariable("CalorimeterHits.time","CalTime","F")
#loader.AddVariable("CalorimeterHits.type","CalType","F")











# Boosted Decision Trees

if useBDT:
    factory.BookMethod(
        loader,
        TMVA.Types.kBDT,
        "BDT",
        V=False,
        NTrees=1000,
        MinNodeSize="0.5%",
        MaxDepth=5,
        BoostType="AdaBoost",
        AdaBoostBeta=0.5,
        UseBaggedBoost=True,
        BaggedSampleFraction=0.5,
        SeparationType="GiniIndex",
        nCuts=50,
    )

# Deep Neural Network (DNN)
useDNN = True 
if useDNN:
    dnn_options = [
        "Layout=TANH|128,TANH|128,TANH|128,LINEAR",  # Layers and activation functions
        "TrainingStrategy=LearningRate=1e-2,Momentum=0.9,Repetitions=1,"
        "ConvergenceSteps=20,BatchSize=256,TestRepetitions=10,"
        "WeightDecay=1e-4,Regularization=L2,DropConfig=0.0+0.5+0.5+0.0",  # Training strategy
        "Architecture=CPU",  # You can use 'GPU' if available
    ]

    factory.BookMethod(
        loader,
        TMVA.Types.kDNN,
        "DNN",
        ":".join(dnn_options)
    )


In [10]:
# Support Vector Machine (SVM)
useSVM = True
if useSVM:
    factory.BookMethod(
        loader,
        TMVA.Types.kSVM,
        "SVM",
        # Set SVM specific parameters here
        Gamma=0.25,  # RBF kernel parameter
        C=1.0,       # Regularization parameter
        Tol=0.001,   # Tolerance for stopping criterion
        VarTransform=None  # Variable transformation
    )


Factory                  : Booking method: [1mSVM[0m
                         : 


In [None]:
## Train Methods
 
# Here we train all the previously booked methods.
 
factory.TrainAllMethods()
## Test  all methods
 
# Now we test and evaluate all methods using the test data set
factory.TestAllMethods()
 
factory.EvaluateAllMethods()
 

In [None]:
c1 = factory.GetROCCurve(loader)
c1.Draw()
outputFile.Close()