In [1]:
# System imports
import ast

# Data handling
import numpy as np
import pandas as pd

# Machine learning
from keras import layers, models

# Visualization
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

# Local imports
from data import load_data_events
from metrics import calc_hit_accuracy
from drawing import draw_projections
from drawing import draw_3d_event

%matplotlib notebook

Using Theano backend.
Using cuDNN version 5105 on context None
Mapped name None to device cuda: GeForce GTX 1080 (0000:0D:00.0)


In [None]:
def process_hits_data(df, copy_keys=['evtid', 'barcode', 'volid', 'layid']):
    """Split columns and calculate some derived variables"""
    #hits = pd.DataFrame(df[copy_keys])
    x = df.gpos.apply(lambda pos: pos[0])
    y = df.gpos.apply(lambda pos: pos[1])
    z = df.gpos.apply(lambda pos: pos[2])
    r = np.sqrt(x**2 + y**2)
    phi = np.arctan2(y, x)
    return df[copy_keys].assign(z=z, r=r, phi=phi)

def process_particles_data(df, copy_keys=['evtid', 'barcode', 'q']):
    """Split columns and calculate some derived variables"""
    #particles = pd.DataFrame(df[copy_keys])
    p = df.kin.apply(lambda kin: kin[0])
    theta = df.kin.apply(lambda kin: kin[1])
    phi = df.kin.apply(lambda kin: kin[2])
    pt = p * np.sin(theta)
    eta = -1. * np.log(np.tan(theta / 2.))
    return df[copy_keys].assign(pt=pt, eta=eta, phi=phi)

In [None]:
hits_file_name = '/bigdata/shared/Fermi_tmp_folder/cluster.csv'


# Read and prepare the hits data
hits_columns = ['hitid', 'barcode', 'volid', 'layid',
                'lpos', 'lerr', 'gpos',
                'chans', 'dir', 'direrr']
hits = process_hits_data(
    load_data_events(hits_file_name, columns=hits_columns, print_freq=200))

# Select all barrel hits
barrel_hits = hits[(hits.volid == 8) | (hits.volid == 13) | (hits.volid == 17)]

# Select target tracks that have 8 or more hits in the barrel
signal_tracks = (barrel_hits.groupby(['evtid', 'barcode'])
                 .filter(lambda x: len(x) > 8)
                 .groupby(['evtid', 'barcode']))

Loading /bigdata/shared/Fermi_tmp_folder/cluster.csv
Finished event 0
Finished event 200
Finished event 400
Finished event 600
Finished event 800
Finished event 1000
Finished event 1200
Finished event 1400
Finished event 1600
Finished event 1800
Finished event 2000
Finished event 2200
Finished event 2400
Finished event 2600
Finished event 2800
Finished event 3000
Finished event 3200
Finished event 3400
Finished event 3600
Finished event 3800
Finished event 4000
Finished event 4200
Finished event 4400
Finished event 4600
Finished event 4800
Finished event 5000
Finished event 5200
Finished event 5400
Finished event 5600
Finished event 5800
Finished event 6000
Finished event 6200
Finished event 6400
Finished event 6600
Finished event 6800
Finished event 7000
Finished event 7200
Finished event 7400
Finished event 7600
Finished event 7800
Finished event 8000
Finished event 8200
Finished event 8400
Finished event 8600
Finished event 8800
Finished event 9000
Finished event 9200
Finished event

In [None]:
print('Hits data shape:', hits.shape)
print(signal_tracks.ngroups)

print('Example hit:')
#print(hits.head(1))
print(barrel_hits.head(1))
#print(signal_tracks.head(1))

barrel_hits.to_csv('/bigdata/shared/Fermi_tmp_folder/test_new.csv', sep=' ')

Hits data shape: (29516502, 7)
1177296
Example hit:
    evtid           barcode  volid  layid       phi          r        z
11      0  9013658885554176      8      2  3.116947  33.605406 -246.297
