# Development notebook for graph construction methods using the TrackML dataset

In [58]:
from __future__ import print_function
from __future__ import division
from __future__ import absolute_import

import os

import numpy as np
import pandas as pd

from trackml import dataset

## Load the data

In [10]:
data_dir = '/bigdata/shared/TrackML/train_100_events'

In [12]:
# Start with just one event
n_events = 1

data_itr = dataset.load_dataset(data_dir, nevents=n_events)
evtid, hits, cells, particles, truth = next(data_itr)

## Select the barrel hits

In [59]:
def select_hits(hits, truth):
    # Barrel volume and layer ids
    vlids = [(8,2), (8,4), (8,6), (8,8),
             (13,2), (13,4), (13,6), (13,8),
             (17,2), (17,4)]
    n_det_layers = len(vlids)
    # Select barrel layers and assign convenient layer number [0-9]
    vlid_groups = hits.groupby(['volume_id', 'layer_id'])
    hits = pd.concat([vlid_groups.get_group(vlids[i]).assign(layer=i)
                      for i in range(n_det_layers)])
    # Calculate derived variables
    r = np.sqrt(hits.x**2 + hits.y**2)
    phi = np.arctan2(hits.y, hits.x)
    # Select the data columns we need
    hits = (hits[['hit_id', 'z', 'layer']]
            .merge(truth[['hit_id', 'particle_id']], on='hit_id')
            .assign(r=r, phi=phi))
    # Remove duplicate hits
    hits = hits.loc[
        hits.groupby(['particle_id', 'layer'], as_index=False).r.idxmin()
    ]
    return hits

In [60]:
def calc_dphi(phi1, phi2):
    """Computes phi2-phi1 given in range [-pi,pi]"""
    dphi = phi2 - phi1
    dphi[dphi > np.pi] -= 2*np.pi
    dphi[dphi < -np.pi] += 2*np.pi
    return dphi

In [62]:
%%time

selected_hits = select_hits(hits, truth)

CPU times: user 18.5 s, sys: 0 ns, total: 18.5 s
Wall time: 18.4 s


In [63]:
selected_hits.shape

(42420, 6)

## Detector sections

Let's work on splitting the events into several detector sections.

I could split into positive and negative z, as well as phi sectors. Or maybe phi sectors are enough..?
The two ends of the detector are mostly independent.