In [1]:
import numpy as np
import pandas as pd
from mpl_toolkits.mplot3d import Axes3D
import matplotlib.pyplot as plt
import tables
from random import shuffle
from IPython.display import clear_output
from sklearn import metrics
from tqdm import tqdm
import time
import seaborn as sns
%matplotlib inline

  return f(*args, **kwds)
  return f(*args, **kwds)


In [2]:
import ROOT
import root_numpy
import uproot



In [3]:
def read_hits(hits):
    nHits = hits.get('nHits').array()
    pmtID = hits.get('pmtID').array()
    hitTime = hits.get('hitTime').array()
    isDN = hits.get('isDN').array()

    return nHits, pmtID, hitTime, isDN


def read_pos(pos):
    pmt_id = pos.get('pmt_id').array() 
    pmt_x = pos.get('pmt_x').array() 
    pmt_y = pos.get('pmt_y').array() 
    pmt_z = pos.get('pmt_z').array() 

    return pmt_id, pmt_x, pmt_y, pmt_z


def read_true_info(true_info):
    evtID = true_info.get('evtID').array()  
    E = true_info.get('E').array() 
    x = true_info.get('x').array() 
    y = true_info.get('y').array() 
    z = true_info.get('z').array() 
    R = true_info.get('R').array() 

    return evtID, E, x, y, z, R

In [4]:
def hits_to_hdf(t, name):
    nHits, pmtID, hitTime, isDN = read_hits(t)
    
    dt_list = []
    for i in tqdm(range(len(nHits))):
        n = nHits[i]
        
        df2 = pd.DataFrame({
            'event': i,
            'pmtID': pmtID[i], 
            'hitTime': hitTime[i], 
            'isDN': isDN[i]
                             
        }) 
        dt_list.append(df2)
    
    pd.concat(dt_list).to_hdf(name, index=False, key='df', mode='w')


def pos_to_csv(t, name):
    pmt_id, pmt_x, pmt_y, pmt_z = read_pos(t)
    df = pd.DataFrame({ 'pmt_id': pmt_id, 'pmt_x': pmt_x, 
                        'pmt_y': pmt_y, 'pmt_z': pmt_z
                     })
    df.to_csv(name, index=False)


def true_info_to_csv(t, name):
    evtID, E, x, y, z, R = read_true_info(t)
    df = pd.DataFrame({ 'evtID': evtID, 'E': E, 'x': x,
                        'y': y, 'z': z, 'R': R
                     })
    df.to_csv(name, index=False)

In [5]:
def convert(t1, t2, t3, t4, t5):
    hits_to_hdf(t1, '../data/lpmt_hits.h5')
    hits_to_hdf(t2, '../data/spmt_hits.h5')
    pos_to_csv(t4, 'data/lpmt_pos.csv')
    pos_to_csv(t5, 'data/spmt_pos.csv')
    true_info_to_csv(t3, 'data/true_info.csv')

In [12]:
def read_n(hits):
    nHits = hits.get('nHits').array()
    return nHits

In [13]:
def n_hits_to_csv(t, name):
    nHits = read_n(t)
    event_id = np.arange(0, len(nHits))
    df = pd.DataFrame({ 'event_id': event_id, 
                        'nHits': nHits
                     })
    df.to_csv(name, index=False)

## Convert .root to .csv or .h5

In [6]:
r = uproot.open("../data/eplus_hits_dn_0.root")
r.keys()

[b'lpmt_hits;187',
 b'lpmt_hits;186',
 b'spmt_hits;5',
 b'spmt_hits;4',
 b'true_info;1',
 b'lpmt_pos;1',
 b'spmt_pos;1']

In [15]:
t1 = r.get("lpmt_hits")
t2 = r.get("spmt_hits")
t3 = r.get("true_info")
t4 = r.get("lpmt_pos")
t5 = r.get("spmt_pos")

In [11]:
convert(t1, t2, t3, t4, t5)

100%|██████████| 100000/100000 [01:25<00:00, 1166.22it/s]
100%|██████████| 100000/100000 [01:09<00:00, 1428.61it/s]


In [16]:
n_hits_to_csv(t1, 'data/lpmt_n_hits.csv')
n_hits_to_csv(t2, 'data/spmt_n_hits.csv')

In [13]:
! ls -lh data/

total 12M
-rw-rw-r-- 1 leyla leyla 2.4M Nov 22 12:14 lpmt_n_hits.csv
-rw-rw-r-- 1 leyla leyla 622K Nov 22 12:13 lpmt_pos.csv
drwxrwxr-x 2 leyla leyla 4.0K Nov 22 12:06 presentations
-rw-rw-r-- 1 leyla leyla 2.4M Nov 22 12:14 spmt_n_hits.csv
-rw-rw-r-- 1 leyla leyla 911K Nov 22 12:13 spmt_pos.csv
-rw-rw-r-- 1 leyla leyla 5.3M Nov 22 12:13 true_info.csv


In [14]:
! ls -lh ../data/

total 28G
-rw-rw-r-- 1 leyla leyla 5.4G Nov 22 11:21 eplus_hits_dn_0.root
-rw-rw-r-- 1 leyla leyla  22G Nov 22 12:11 lpmt_hits.h5
-rw-rw-r-- 1 leyla leyla 553M Nov 22 12:13 spmt_hits.h5
