In [1]:
import cProfile
from io import StringIO
from functools import wraps
import time
from collections import defaultdict
import pstats
from tqdm import tqdm
import torch
from pympler import asizeof

from time_res_util import get_compiled_NF_model
from momentum_prediction_util import load_defaultdict

Using device cuda:0


In [2]:
def profile_function(func):
    """
    Decorator to profile a specific function using cProfile
    """
    @wraps(func)
    def wrapper(*args, **kwargs):
        profiler = cProfile.Profile()
        try:
            return profiler.runcall(func, *args, **kwargs)
        finally:
            s = StringIO()
            stats = pstats.Stats(profiler, stream=s).sort_stats('cumulative')
            stats.print_stats(20)  # Print top 20 time-consuming operations
            print(s.getvalue())
    return wrapper

'''MEMORY PROFILING'''
import linecache
import os
import tracemalloc

def display_top(snapshot, key_type='lineno', limit=3):
    snapshot = snapshot.filter_traces((
        tracemalloc.Filter(False, "<frozen importlib._bootstrap>"),
        tracemalloc.Filter(False, "<unknown>"),
    ))
    top_stats = snapshot.statistics(key_type)

    print("Top %s lines" % limit)
    for index, stat in enumerate(top_stats[:limit], 1):
        frame = stat.traceback[0]
        # replace "/path/to/module/file.py" with "module/file.py"
        filename = os.sep.join(frame.filename.split(os.sep)[-2:])
        print("#%s: %s:%s: %.1f KiB"
              % (index, filename, frame.lineno, stat.size / 1024))
        line = linecache.getline(frame.filename, frame.lineno).strip()
        if line:
            print('    %s' % line)

    other = top_stats[limit:]
    if other:
        size = sum(stat.size for stat in other)
        print("%s other: %.1f KiB" % (len(other), size / 1024))
    total = sum(stat.size for stat in top_stats)
    print("Total allocated size: %.1f KiB" % (total / 1024))

tracemalloc.start()

In [3]:
inputProcessedData = "./data/processed_data/no_inner_old_SIDIS_run_2_1000events.json"
model_compile = get_compiled_NF_model()
processed_data = load_defaultdict(inputProcessedData)


  self.load_state_dict(torch.load(path))


In [5]:
batch_size = 50000
device = 'cuda'
normalizing_flow = model_compile

nn_input = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(list)))))
nn_output = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(list)))))

all_context = []
all_time_pixels = []
all_metadata = []
num_pixel_list = ["num_pixels_high_z","num_pixels_low_z"]
print("Processing data in new_prepare_nn_input...")
for event_idx, event_data in tqdm(processed_data.items()):
    for stave_idx, stave_data in event_data.items():
        for layer_idx, layer_data in stave_data.items():
            for segment_idx, segment_data in layer_data.items():
                trueID_list = []
                for particle_id, particle_data in segment_data.items():
#                         print(f"keys of particle data: {particle_data.keys()}")
#                         print(f"types: {type(particle_data['z_pos'])},{type(particle_data['hittheta'])},{type(particle_data['hitmomentum'])}")
                    base_context = torch.tensor([particle_data['z_pos'], particle_data['hittheta'], particle_data['hitmomentum']], 
                                                dtype=torch.float32)
                    base_time_pixels_low = torch.tensor([particle_data['time'], particle_data['num_pixels_low_z']], 
                                                    dtype=torch.float32)
                    base_time_pixels_high = torch.tensor([particle_data['time'], particle_data['num_pixels_high_z']], 
                                                    dtype=torch.float32)
                    if particle_data['trueID'] not in  trueID_list:
                        trueID_list.append(particle_data['trueID'])
                    for SiPM_idx in range(2):
                        z_pos = particle_data['z_pos']
                        context = base_context.clone()
                        context[0] = z_pos
                        num_pixel_tag = num_pixel_list[SiPM_idx]
                        all_context.append(context.repeat(particle_data[num_pixel_tag], 1))
                        if(SiPM_idx == 0):
                            all_time_pixels.append(base_time_pixels_high.repeat(particle_data[num_pixel_tag], 1))
                        else:
                            all_time_pixels.append(base_time_pixels_low.repeat(particle_data[num_pixel_tag], 1))
                        # Assuming particle_data is a dictionary-like object and trueID_list is defined
                        fields = [
                            'truemomentum', 'trueID', 'truePID', 'hitID', 'hitPID', 
                            'truetheta', 'truephi', 'strip_x', 'strip_y', 'strip_z', 
                            'hit_x', 'hit_y', 'hit_z', 'KMU_trueID', 'KMU_truePID', 
                            'KMU_true_phi', 'KMU_true_momentum_mag', 'KMU_endpoint_x', 
                            'KMU_endpoint_y', 'KMU_endpoint_z'
                        ]

                        # Print types of each particle_data field
#                             for field in fields:
#                                 value = particle_data.get(field, None)
#                                 print(f"{field}: {type(value)}")

#                             # Print the type of len(trueID_list)
#                             print(f"len(trueID_list): {type(len(trueID_list))}")

                        all_metadata.extend([(event_idx,stave_idx, layer_idx,segment_idx, SiPM_idx, particle_data['truemomentum'],particle_data['trueID'],particle_data['truePID'],particle_data['hitID'],particle_data['hitPID'],particle_data['truetheta'],particle_data['truephi'],particle_data['strip_x'],particle_data['strip_y'],particle_data['strip_z'],len(trueID_list),particle_data['hit_x'],particle_data['hit_y'],particle_data['hit_z'],particle_data['KMU_trueID'],particle_data['KMU_truePID'],particle_data['KMU_true_phi'],particle_data['KMU_true_momentum_mag'],particle_data['KMU_endpoint_x'],particle_data['KMU_endpoint_y'],particle_data['KMU_endpoint_z'])] * particle_data[num_pixel_tag])

all_context = torch.cat(all_context)
all_time_pixels = torch.cat(all_time_pixels)

print("Sampling data...")
sampled_data = []
begin = time.time()
for i in tqdm(range(0, len(all_context), batch_size)):
    batch_end = min(i + batch_size, len(all_context))
    batch_context = all_context[i:batch_end].to(device)
    batch_time_pixels = all_time_pixels[i:batch_end]

    with torch.no_grad():
        samples = abs(normalizing_flow.sample(num_samples=len(batch_context), context=batch_context)[0]).squeeze(1)

    sampled_data.extend(samples.cpu() + batch_time_pixels[:, 0])
end = time.time()
print(f"sampling took {end - begin} seconds")

Processing data in new_prepare_nn_input...


100%|██████████| 6/6 [00:00<00:00, 16.47it/s]


Sampling data...


100%|██████████| 15/15 [00:20<00:00,  1.36s/it]

sampling took 20.35903811454773 seconds





### ORIGINAL W/nested dictionaries

In [6]:
print("Reorganizing data...")
begin = time.time()
for (event,stave, layer,segment, SiPM, momentum,trueID,truePID,hitID,hitPID,theta,phi,strip_x,strip_y,strip_z,trueID_list_len,hit_x,hit_y,hit_z,KMU_trueID,KMU_truePID,KMU_true_phi,KMU_true_momentum_mag,KMU_endpoint_x,KMU_endpoint_y,KMU_endpoint_z), sample in zip(all_metadata, sampled_data):
    nn_input[event][stave][layer][segment][SiPM].append(sample)

    nn_output[event][stave][layer][segment][SiPM].append(torch.tensor([momentum,trueID,truePID,hitID,hitPID,theta,phi,strip_x,strip_y,strip_z,trueID_list_len,hit_x,hit_y,hit_z,KMU_trueID,KMU_truePID,KMU_true_phi,KMU_true_momentum_mag,KMU_endpoint_x,KMU_endpoint_y,KMU_endpoint_z]))
end = time.time()
print(f"reorganizing took {end - begin} seconds")

Reorganizing data...
reorganizing took 9.405727624893188 seconds


In [13]:
asizeof.asizeof(nn_output) / 1024 / 1024

285.01953887939453

### NEW WITH ONE DICTIONARY

In [14]:
new_nn_input = defaultdict(list)
new_nn_output = defaultdict(list)

In [15]:
print("Reorganizing data...")
begin = time.time()
for (event,stave, layer,segment, SiPM, momentum,trueID,truePID,hitID,hitPID,theta,phi,strip_x,strip_y,strip_z,trueID_list_len,hit_x,hit_y,hit_z,KMU_trueID,KMU_truePID,KMU_true_phi,KMU_true_momentum_mag,KMU_endpoint_x,KMU_endpoint_y,KMU_endpoint_z), sample in zip(all_metadata, sampled_data):
    new_nn_input[f"{event}_{stave}_{layer}_{segment}_{SiPM}"].append(sample)

    new_nn_output[f"{event}_{stave}_{layer}_{segment}_{SiPM}"].append(torch.tensor([momentum,trueID,truePID,hitID,hitPID,theta,phi,strip_x,strip_y,strip_z,trueID_list_len,hit_x,hit_y,hit_z,KMU_trueID,KMU_truePID,KMU_true_phi,KMU_true_momentum_mag,KMU_endpoint_x,KMU_endpoint_y,KMU_endpoint_z]))
end = time.time()
print(f"reorganizing took {end - begin} seconds")

Reorganizing data...
reorganizing took 11.132388830184937 seconds


In [16]:
asizeof.asizeof(new_nn_output) / 1024 / 1024

128.6405258178711

In [31]:
begin = time.time()
nn_input,nn_output = new_prepare_nn_input(processed_data, model_compile,batch_size = 50000)
end = time.time()
print(f"new_prepare_nn_input took {(end - begin)} seconds")
snapshot = tracemalloc.take_snapshot()

display_top(snapshot)

Processing data in new_prepare_nn_input...


100%|██████████| 6/6 [00:00<00:00,  9.55it/s]


Sampling data...


100%|██████████| 15/15 [00:16<00:00,  1.07s/it]


sampling took 16.07623791694641 seconds
Reorganizing data...
reorganizing took 15.902801513671875 seconds
         2490697 function calls (2433322 primitive calls) in 32.656 seconds

   Ordered by: cumulative time
   List reduced from 245 to 20 due to restriction <20>

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        1    4.117    4.117   32.658   32.658 /tmp/ipykernel_376719/3175385229.py:1(new_prepare_nn_input)
       15    0.009    0.001   12.776    0.852 /hpc/group/vossenlab/rck32/ML_venv/lib64/python3.9/site-packages/normflows/core.py:286(sample)
22695/255    0.090    0.000   12.766    0.050 /hpc/group/vossenlab/rck32/ML_venv/lib64/python3.9/site-packages/torch/nn/modules/module.py:1549(_wrapped_call_impl)
22695/255    0.107    0.000   12.764    0.050 /hpc/group/vossenlab/rck32/ML_venv/lib64/python3.9/site-packages/torch/nn/modules/module.py:1555(_call_impl)
      120    0.002    0.000   12.568    0.105 /hpc/group/vossenlab/rck32/ML_venv/lib64/python

In [37]:
display_top(snapshot)

Top 3 lines
#1: ipykernel_376719/3175385229.py:78: 60573.6 KiB
    nn_output[event][stave][layer][segment][SiPM].append(torch.tensor([momentum,trueID,truePID,hitID,hitPID,theta,phi,strip_x,strip_y,strip_z,trueID_list_len,hit_x,hit_y,hit_z,KMU_trueID,KMU_truePID,KMU_true_phi,KMU_true_momentum_mag,KMU_endpoint_x,KMU_endpoint_y,KMU_endpoint_z]))
#2: torch/_tensor.py:1053: 54715.7 KiB
    return iter(self.unbind(0))
#3: ipykernel_376719/3175385229.py:76: 5843.8 KiB
    nn_input[event][stave][layer][segment][SiPM].append(sample)
209 other: 349.9 KiB
Total allocated size: 121483.0 KiB
