# File processor

csv_decoder.py is a packaged version of Josh's methods reading .csv generated by his c++ showering code.
save_and_load.py specifically saves and loads all essential lists/images used in the analysis to np.save formats.

This notebook shows the example use of methods in csv_decoder.py and save_and_load.py. It reads .csv files containing particle eta, phi, pt, ... produced by Josh's showering program, performs fast detector simulation to generate detector & jet images, and saves to faster np.save format. The save(), load() and load_cluster() methods are saved in 

In [1]:
# Import local libraries
import csv_decoder
import save_and_load
import numpy as np

In [2]:
# Jet and event image setting
width = 40
height = 40 

In [None]:
# Reading in Josh's files; background is a single large file (pT cut must be 1 here for Josh's sample)
# This read produces event_list (collection of raw vectors) and event images

# This will be used to test saving mechanisms.

print('Loading background events')
background_event_list,background_mass_list,background_image_list,num_background_files = \
    csv_decoder.load_events("actual", max_Files=1,path="/data1/users/jzlin/MLM/background_7413/",\
                contains="_actual",pt_cut=1, width=width, height=height)
num_background_files = 15693

print('Loading signal events')
signal_event_list,signal_mass_list,signal_image_list,num_signal_files = \
    csv_decoder.load_events("actual", max_Read = len(background_event_list),path="/data1/users/jzlin/MLM/heavy_signal/",\
                contains="_signal",pt_cut=1, width=width, height=height)

# Check size of dataset
print(len(background_mass_list),len(signal_mass_list))

# Zero centering and normalizing
background_image_list, signal_image_list = csv_decoder.zero_center_and_normalize(background_image_list,signal_image_list)

# This is not used
background_mass_window = np.logical_and(np.array(background_mass_list) > 115,np.array(background_mass_list) < 135)

# Cluster events_lists into jets. The results are named background/signal_event_list_clustered
print('Clustering')
background_event_list_clustered = csv_decoder.cluster_event(background_event_list)
signal_event_list_clustered = csv_decoder.cluster_event(signal_event_list)

# Reclustering the events (i.e. clustering within events)
print('Reclustering')
background_reclustered = csv_decoder.recluster_event(background_event_list_clustered)
signal_reclustered = csv_decoder.recluster_event(signal_event_list_clustered)

# Produce jet images, the zero-center and normalize
print('Producing jet images')
background_recluster_images = csv_decoder.return_fine_image_list_reclustered(background_event_list,
                                                           background_reclustered,0.8, width=width, height=height)
signal_recluster_images = csv_decoder.return_fine_image_list_reclustered(signal_event_list,
                                                           signal_reclustered,0.8, width=width, height=height)

background_recluster_images, signal_recluster_images = csv_decoder.zero_center_and_normalize(background_recluster_images, signal_recluster_images)

In [None]:
# Weight calculation for Josh's sample
backgroundCross = 2.048e-06 # Cross-section of processes in millibarns, NOT USED

actual_background_cross=2.84e-9 # In barns, used in background weight
average_number_accepted=2162 # Used in background weight

actual_signal_cross = np.average([1.738e-14,1.7277e-14]) # Used in signal weight
signal_accepted = np.average([8708-189,8827-172]) # Used in signal weight 

background_weight = actual_background_cross*35.9*1e15/(average_number_accepted*num_background_files)
signal_weight = actual_signal_cross*35.9*1e15/(signal_accepted*num_signal_files)

In [3]:
# Time: 19.26s
save_and_load.save('test-all', background_event_list, signal_event_list, background_mass_list, signal_mass_list,\
        background_weight, signal_weight, background_image_list, signal_image_list,\
        background_recluster_images, signal_recluster_images)
new_background_event_list, new_signal_event_list, new_background_mass_list, new_signal_mass_list,\
        new_background_weight, new_signal_weight, new_background_image_list, new_signal_image_list,\
        new_background_recluster_images, new_signal_recluster_images = save_and_load.load('test-all')

In [4]:

print(np.array_equal(new_background_mass_list, background_mass_list))
print(np.array_equal(new_signal_mass_list, signal_mass_list))
print(np.array_equal(new_background_weight, background_weight))
print(np.array_equal(new_signal_weight, signal_weight))
print(np.array_equal(new_background_image_list, background_image_list))
print(np.array_equal(new_signal_image_list, signal_image_list))
print(np.array_equal(new_background_recluster_images, background_recluster_images))
print(np.array_equal(new_signal_recluster_images, signal_recluster_images))

NameError: name 'background_mass_list' is not defined

In [None]:
print(len(background_mass_list)+len(signal_mass_list))