# Phi to Mu-Mu 13 TeV ML Track Reconstruction Preprocessing

Joeri R. Hermans                    
*Departement of Data Science & Knowledge Engineering*          
*Maastricht University, The Netherlands*        

In [1]:
!(date +%d\ %B\ %G)

14 February 2017


In [2]:
import numpy as np
import os

List the CMS event files in the data directory.

In [3]:
!ls ../data/

RelValPhiToMuMu_13_GEN-SIM-RECO_evt4358


List the track parameters files in the Phi to Mu-Mu event. The track parameters were extracted from ROOT files using [https://github.com/diana-hep/c2numpy](https://github.com/diana-hep/c2numpy).

In [4]:
# Set the relevant data directory.
data_directory = "../data/RelValPhiToMuMu_13_GEN-SIM-RECO_evt4358/"
# List the track parameters.
!ls ../data/RelValPhiToMuMu_13_GEN-SIM-RECO_evt4358

trackparams0.npy    trackparams14.npy	trackparams19.npy   trackparams52.npy
trackparams100.npy  trackparams150.npy	trackparams1.npy    trackparams53.npy
trackparams101.npy  trackparams151.npy	trackparams200.npy  trackparams54.npy
trackparams102.npy  trackparams152.npy	trackparams201.npy  trackparams55.npy
trackparams103.npy  trackparams153.npy	trackparams202.npy  trackparams56.npy
trackparams104.npy  trackparams154.npy	trackparams203.npy  trackparams57.npy
trackparams105.npy  trackparams155.npy	trackparams204.npy  trackparams58.npy
trackparams106.npy  trackparams156.npy	trackparams205.npy  trackparams59.npy
trackparams107.npy  trackparams157.npy	trackparams206.npy  trackparams5.npy
trackparams108.npy  trackparams158.npy	trackparams207.npy  trackparams60.npy
trackparams109.npy  trackparams159.npy	trackparams208.npy  trackparams61.npy
trackparams10.npy   trackparams15.npy	trackparams209.npy  trackparams62.npy
trackparams110.npy  trackparams160.npy	trackparams20.npy   trackpar

In [5]:
# Fetch the track parameter files from the data directory.
files = os.listdir(data_directory)

num_files = len(files)
data = np.load(data_directory + files[0])
# Load the track parameters in memory.
for i in range(1, num_files):
    data = np.append(data, np.load(data_directory + files[i]))

In [6]:
# List the column names of the event files.
data.dtype.names

('run',
 'evt',
 'lumi',
 'TrackId',
 'charge',
 'chi2',
 'ndof',
 'normalizedChi2',
 'qoverp',
 'theta',
 'lambda',
 'dxy',
 'd0',
 'dsz',
 'dz',
 'p',
 'pt',
 'px',
 'py',
 'pz',
 'eta',
 'phi',
 'vx',
 'vy',
 'vz',
 'pix_0_x',
 'pix_0_y',
 'pix_0_z',
 'pix_1_x',
 'pix_1_y',
 'pix_1_z',
 'pix_2_x',
 'pix_2_y',
 'pix_2_z',
 'pix_3_x',
 'pix_3_y',
 'pix_3_z',
 'pix_4_x',
 'pix_4_y',
 'pix_4_z',
 'sis_0_x',
 'sis_0_y',
 'sis_0_z',
 'sis_1_x',
 'sis_1_y',
 'sis_1_z',
 'sis_2_x',
 'sis_2_y',
 'sis_2_z',
 'sis_3_x',
 'sis_3_y',
 'sis_3_z',
 'sis_4_x',
 'sis_4_y',
 'sis_4_z',
 'sis_5_x',
 'sis_5_y',
 'sis_5_z',
 'sis_6_x',
 'sis_6_y',
 'sis_6_z',
 'sis_7_x',
 'sis_7_y',
 'sis_7_z',
 'sis_8_x',
 'sis_8_y',
 'sis_8_z',
 'sis_9_x',
 'sis_9_y',
 'sis_9_z',
 'sis_10_x',
 'sis_10_y',
 'sis_10_z',
 'sis_11_x',
 'sis_11_y',
 'sis_11_z',
 'sis_12_x',
 'sis_12_y',
 'sis_12_z',
 'sis_13_x',
 'sis_13_y',
 'sis_13_z',
 'sis_14_x',
 'sis_14_y',
 'sis_14_z',
 'sis_15_x',
 'sis_15_y',
 'sis_15_z',
 'sis_16

In [7]:
# Construct the header string.
num_columns = len(data.dtype.names)
header = data.dtype.names[0]
for i in range(1, num_columns):
    header += "," + data.dtype.names[i]
header += "\n"

!rm phi-to-mumu-13tev.csv
# Save the data as a CSV file for further processing in Apache Spark.
np.savetxt("phi-to-mumu-13tev.csv", data, header=header, delimiter=",")

In [None]:
# Store the data in HDFS.
!hdfs dfs -copyFromLocal phi-to-mumu-13tev.csv