# Particle Tagging

## Classification

### Load the training and test datasets

In [1]:
import ROOT
from larcv import larcv

train_image_chain = ROOT.TChain("image2d_data_tree")
train_image_chain.AddFile('InputFiles/classification_train_5k.root')
print 'Found', train_image_chain.GetEntries(), 'images in training dataset!'

test_image_chain = ROOT.TChain("image2d_data_tree")
test_image_chain.AddFile('InputFiles/classification_test_5k.root')
print 'Found', test_image_chain.GetEntries(), 'images in test dataset!'

train_label_chain = ROOT.TChain("particle_mctruth_tree")
train_label_chain.AddFile('InputFiles/classification_train_5k.root')
print 'Found', train_label_chain.GetEntries(), 'labels in training dataset!'

test_label_chain = ROOT.TChain("particle_mctruth_tree")
test_label_chain.AddFile('InputFiles/classification_test_5k.root')
print 'Found', test_label_chain.GetEntries(), 'labels in test dataset!'

Welcome to JupyROOT 6.12/04
Found 5000 images in training dataset!
Found 5000 images in test dataset!
Found 5000 labels in training dataset!
Found 5000 labels in test dataset!


### Tools to preprocess the images and labels for Keras

In [2]:
import numpy as np
import pandas as pd
from tqdm import tqdm

pdgToCategory =  {11   : 0, #electron
                  22   : 1, #gamma
                  13   : 2, #muon
                  211  : 3, #pion
                  2212 : 4} #proton

def imageToTensor(imageToConvert):
    # imageToConvert is a C++ class larcv::EventImage2D exposed to python interpreter
    # Note here that std::vectors in pyroot are iterable
    array2D = imageToConvert.as_vector()
    #print('Type of image2d_array:', type(array2D))
    #print('Number of images:', array2D.size())
    array3D = []
    for _, image in enumerate(array2D):
        # larcv has a helper function to convert std::vector to numpy array, so we can use that:
        numpyImage = larcv.as_ndarray(image)
        array3D.append(numpyImage)
        
    tensor3D = np.dstack((array3D[0], array3D[1], array3D[2]))

    # Convert 3D tensor to 4D tensor with shape (1, imageLength, imageHeight, 3) and return 4D tensor
    return np.expand_dims(tensor3D, axis=0)

def imageChainToTensor(imageChain):
    listOfTensors = []
    for entry in tqdm(xrange(imageChain.GetEntries())):
        imageChain.GetEntry(entry)
        entryData = imageChain.image2d_data_branch
        listOfTensors.append(imageToTensor(entryData))
    return listOfTensors

def pdgToLabel(labelToConvert):
    # imageToConvert is a C++ class larcv::EventImage2D exposed to python interpreter
    # Note here that std::vectors in pyroot are iterable
    arrayParticle = labelToConvert.as_vector()

    array3D = []
    for _, particle in enumerate(arrayParticle):
        # larcv has a helper function to convert std::vector to numpy array, so we can use that:
        numpyImage = larcv.as_ndarray(image)
        array3D.append(numpyImage)
        
    tensor3D = np.dstack((array3D[0], array3D[1], array3D[2]))

    # Convert 3D tensor to 4D tensor with shape (1, imageLength, imageHeight, 3) and return 4D tensor
    return np.expand_dims(tensor3D, axis=0)

def labelChainToArray(labelChain):
    listOfLabels = []
    for entry in tqdm(xrange(labelChain.GetEntries())):
        labelChain.GetEntry(entry)
        entryData = labelChain.particle_mctruth_branch
        arrayParticle = entryData.as_vector()
        for _, particle in enumerate(arrayParticle):
            listOfLabels.append(pdgToCategory[particle.pdg_code()])
    return np.array(listOfLabels)

In [3]:
from PIL import ImageFile                            
ImageFile.LOAD_TRUNCATED_IMAGES = True          

# Convert every images to 4D tensors
X_train = imageChainToTensor(train_image_chain)
X_test = imageChainToTensor(test_image_chain)

print X_train[0].shape
print X_test[0].shape

100%|██████████| 5000/5000 [00:07<00:00, 695.60it/s]
100%|██████████| 5000/5000 [00:07<00:00, 663.21it/s]

(1, 256, 256, 3)
(1, 256, 256, 3)





In [6]:
from keras.utils import np_utils

# There are 5 different particle types in the dataset
num_classes = 5
trainLabelArray = labelChainToArray(train_label_chain)
testLabelArray = labelChainToArray(test_label_chain)

y_train = np_utils.to_categorical(trainLabelArray, num_classes)
y_test = np_utils.to_categorical(testLabelArray, num_classes)
print y_train[:10]

100%|██████████| 5000/5000 [00:00<00:00, 24981.89it/s]
100%|██████████| 5000/5000 [00:00<00:00, 25538.62it/s]


[[1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]]
