#  T M V A Multiclass
This macro provides a simple example for the training and testing of the TMVA
multiclass classification
- Project   : TMVA - a Root-integrated toolkit for multivariate data analysis
- Package   : TMVA
- Root Macro: TMVAMulticlass



**Author:** Andreas Hoecker  
<i><small>This notebook tutorial was automatically generated with <a href= "https://github.com/root-project/root/blob/master/documentation/doxygen/converttonotebook.py">ROOTBOOK-izer (Beta)</a> from the macro found in the ROOT repository  on Monday, May 01, 2017 at 05:07 AM.</small></i>

In [1]:
%%cpp -d
#include <cstdlib>
#include <iostream>
#include <map>
#include <string>

#include "TFile.h"
#include "TTree.h"
#include "TString.h"
#include "TSystem.h"
#include "TROOT.h"


#include "TMVA/Tools.h"
#include "TMVA/Factory.h"
#include "TMVA/DataLoader.h"
#include "TMVA/TMVAMultiClassGui.h"


using namespace TMVA;

 Arguments are defined. 

In [2]:
TString myMethodList = "";

This loads the library

In [3]:
TMVA::Tools::Instance();

To get access to the gui and all tmva macros

     TString tmva_dir(TString(gRootDir) + "/tmva");
     if(gSystem->Getenv("TMVASYS"))
        tmva_dir = TString(gSystem->Getenv("TMVASYS"));
     gROOT->SetMacroPath(tmva_dir + "/test/:" + gROOT->GetMacroPath() );
     gROOT->ProcessLine(".L TMVAMultiClassGui.C");

---------------------------------------------------------------
 Default MVA methods to be trained + tested

In [4]:
std::map<std::string,int> Use;
Use["MLP"]             = 1;
Use["BDTG"]            = 1;
Use["DNN"]             = 0;
Use["FDA_GA"]          = 0;
Use["PDEFoam"]         = 0;

---------------------------------------------------------------

In [5]:
std::cout << std::endl;
std::cout << "==> Start TMVAMulticlass" << std::endl;

if (myMethodList != "") {
   for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) it->second = 0;

   std::vector<TString> mlist = TMVA::gTools().SplitString( myMethodList, ',' );
   for (UInt_t i=0; i<mlist.size(); i++) {
      std::string regMethod(mlist[i]);

      if (Use.find(regMethod) == Use.end()) {
         std::cout << "Method \"" << regMethod << "\" not known in TMVA under this name. Choose among the following:" << std::endl;
         for (std::map<std::string,int>::iterator it = Use.begin(); it != Use.end(); it++) std::cout << it->first << " ";
         std::cout << std::endl;
         return;
      }
      Use[regMethod] = 1;
   }
}


==> Start TMVAMulticlass


Create a new root output file.

In [6]:
TString outfileName = "TMVAMulticlass.root";
TFile* outputFile = TFile::Open( outfileName, "RECREATE" );

TMVA::Factory *factory = new TMVA::Factory( "TMVAMulticlass", outputFile,
                                            "!V:!Silent:Color:!DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=multiclass" );
TMVA::DataLoader *dataloader=new TMVA::DataLoader("dataset");

dataloader->AddVariable( "var1", 'F' );
dataloader->AddVariable( "var2", "Variable 2", "", 'F' );
dataloader->AddVariable( "var3", "Variable 3", "units", 'F' );
dataloader->AddVariable( "var4", "Variable 4", "units", 'F' );

TFile *input(0);
TString fname = "./tmva_example_multiple_background.root";
if (!gSystem->AccessPathName( fname )) {
   // first we try to find the file in the local directory
   std::cout << "--- TMVAMulticlass   : Accessing " << fname << std::endl;
   input = TFile::Open( fname );
}
else {
   std::cout << "Creating testdata...." << std::endl;
   TString createDataMacro = gROOT->GetTutorialDir() + "/tmva/createData.C";
   gROOT->ProcessLine(TString::Format(".L %s",createDataMacro.Data()));
   gROOT->ProcessLine("create_MultipleBackground(2000)");
   std::cout << " created tmva_example_multiple_background.root for tests of the multiclass features"<<std::endl;
   input = TFile::Open( fname );
}
if (!input) {
   std::cout << "ERROR: could not open data file" << std::endl;
   exit(1);
}

TTree *signalTree  = (TTree*)input->Get("TreeS");
TTree *background0 = (TTree*)input->Get("TreeB0");
TTree *background1 = (TTree*)input->Get("TreeB1");
TTree *background2 = (TTree*)input->Get("TreeB2");

gROOT->cd( outfileName+TString(":/") );
dataloader->AddTree    (signalTree,"Signal");
dataloader->AddTree    (background0,"bg0");
dataloader->AddTree    (background1,"bg1");
dataloader->AddTree    (background2,"bg2");

dataloader->PrepareTrainingAndTestTree( "", "SplitMode=Random:NormMode=NumEvents:!V" );

if (Use["BDTG"]) // gradient boosted decision trees
   factory->BookMethod( dataloader,  TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.10:UseBaggedBoost:BaggedSampleFraction=0.50:nCuts=20:MaxDepth=2");
if (Use["MLP"]) // neural network
   factory->BookMethod( dataloader,  TMVA::Types::kMLP, "MLP", "!H:!V:NeuronType=tanh:NCycles=1000:HiddenLayers=N+5,5:TestRate=5:EstimatorType=MSE");
if (Use["FDA_GA"]) // functional discriminant with GA minimizer
   factory->BookMethod( dataloader,  TMVA::Types::kFDA, "FDA_GA", "H:!V:Formula=(0)+(1)*x0+(2)*x1+(3)*x2+(4)*x3:ParRanges=(-1,1);(-10,10);(-10,10);(-10,10);(-10,10):FitMethod=GA:PopSize=300:Cycles=3:Steps=20:Trim=True:SaveBestGen=1" );
if (Use["PDEFoam"]) // PDE-Foam approach
   factory->BookMethod( dataloader,  TMVA::Types::kPDEFoam, "PDEFoam", "!H:!V:TailCut=0.001:VolFrac=0.0666:nActiveCells=500:nSampl=2000:nBin=5:Nmin=100:Kernel=None:Compress=T" );

if (Use["DNN"]) {
    TString layoutString ("Layout=TANH|100,TANH|50,TANH|10,LINEAR");
    TString training0 ("LearningRate=1e-1, Momentum=0.5, Repetitions=1, ConvergenceSteps=10,"
                       " BatchSize=256, TestRepetitions=10, Multithreading=True");
    TString training1 ("LearningRate=1e-2, Momentum=0.0, Repetitions=1, ConvergenceSteps=10,"
                       " BatchSize=256, TestRepetitions=7, Multithreading=True");
    TString trainingStrategyString ("TrainingStrategy=");
    trainingStrategyString += training0 + "|" + training1;
    TString nnOptions ("!H:V:ErrorStrategy=CROSSENTROPY:VarTransform=N:"
                       "WeightInitialization=XAVIERUNIFORM:Architecture=STANDARD");
    nnOptions.Append (":"); nnOptions.Append (layoutString);
    nnOptions.Append (":"); nnOptions.Append (trainingStrategyString);
    factory->BookMethod(dataloader, TMVA::Types::kDNN, "DNN", nnOptions );
}

Creating testdata....
... event: 0 (2000)
... event: 1000 (2000)
 var1            = -1.14361
 var2            = -0.822373
 var3            = -0.395426
 var4            = -0.529427
created tree: TreeS
... event: 0 (2000)
... event: 1000 (2000)
 var1            = -1.54361
 var2            = -1.42237
 var3            = -1.39543
 var4            = -2.02943
created tree: TreeB0
... event: 0 (2000)
... event: 1000 (2000)
 var1            = -1.54361
 var2            = -0.822373
 var3            = -0.395426
 var4            = -2.02943
created tree: TreeB1
 var1            = 0.463304
 var2            = 1.37192
 var3            = -1.16769
 var4            = -1.77551
created tree: TreeB2
created data file: tmva_example_multiple_background.root
 created tmva_example_multiple_background.root for tests of the multiclass features
DataSetInfo              : [dataset] : Added class "Signal"
                         : Add Tree TreeS of type Signal with 2000 events
DataSetInfo              : [dataset] : 

Train mvas using the set of training events

In [7]:
factory->TrainAllMethods();

Factory                  : [1mTrain all methods[0m
Factory                  : [dataset] : Create Transformation "I" with events from all classes.
                         : 
                         : Transformation, Variable selection : 
                         : Input : variable 'var1' <---> Output : variable 'var1'
                         : Input : variable 'var2' <---> Output : variable 'var2'
                         : Input : variable 'var3' <---> Output : variable 'var3'
                         : Input : variable 'var4' <---> Output : variable 'var4'
Factory                  : [dataset] : Create Transformation "D" with events from all classes.
                         : 
                         : Transformation, Variable selection : 
                         : Input : variable 'var1' <---> Output : variable 'var1'
                         : Input : variable 'var2' <---> Output : variable 'var2'
                         : Input : variable 'var3' <---> Output : variable 'var

Evaluate all mvas using the set of test events

In [8]:
factory->TestAllMethods();

Factory                  : [1mTest all methods[0m
Factory                  : Test method: BDTG for Multiclass classification performance
                         : 
                         : Dataset[dataset] : Create results for testing
                         : Dataset[dataset] : Multiclass evaluation of BDTG on testing sample
                         : Dataset[dataset] : Elapsed time for evaluation of 4000 events: 0.69 sec       
                         : Creating multiclass response histograms...
Factory                  : Test method: MLP for Multiclass classification performance
                         : 
                         : Dataset[dataset] : Create results for testing
                         : Dataset[dataset] : Multiclass evaluation of MLP on testing sample
                         : Dataset[dataset] : Elapsed time for evaluation of 4000 events: 0.00754 sec       
                         : Creating multiclass response histograms...


Evaluate and compare performance of all configured mvas

In [9]:
factory->EvaluateAllMethods();

Factory                  : [1mEvaluate all methods[0m
                         : Evaluate multiclass classification method: BDTG
TFHandler_BDTG           : Variable        Mean        RMS   [        Min        Max ]
                         : -----------------------------------------------------------
                         :     var1:   0.057094     1.0236   [    -3.6592     3.2749 ]
                         :     var2:    0.31579     1.0607   [    -3.6952     3.7877 ]
                         :     var3:    0.11645     1.1227   [    -4.5727     4.5640 ]
                         :     var4:  -0.079113     1.2819   [    -4.7970     4.2221 ]
                         : -----------------------------------------------------------
                         : Dataset[dataset] : Determine optimal multiclass cuts for test data...
                         : Calculating best set of cuts for class Signal
FitterBase               : <GeneticFitter> Optimisation, please be patient ... (inaccurate

--------------------------------------------------------------

Save the output

In [10]:
outputFile->Close();

std::cout << "==> Wrote root file: " << outputFile->GetName() << std::endl;
std::cout << "==> TMVAClassification is done!" << std::endl;

delete factory;
delete dataloader;

==> Wrote root file: TMVAMulticlass.root
==> TMVAClassification is done!


Launch the gui for the root macros

In [11]:
if (!gROOT->IsBatch()) TMVAMultiClassGui( outfileName );