# Overview

This notebook is to process the benchmark data collected.

Testing data size: 800_000 events

In [1]:
import numpy as np
import pandas as pd

In [7]:
def getDataFromFile(filename):
    with open(filename, "r") as f:
        lines = f.readlines()
        total_time = float(lines[2].split()[2])  # total time in sec
        h5read_time = float(lines[35].split()[2])/1e6   # read time in sec
        cluster_time = float(lines[56].split()[2])/1e6  # clustering time in sec
        clusterToImg_time = float(lines[82].split()[2])/1e6    # convert events to image time in sec
        h5write_time = float(lines[98].split()[2])/1e6  # write to h5 time in sec
    return total_time, h5read_time, cluster_time, clusterToImg_time, h5write_time

# test
testfn = "benchmark/benchmark_fastGaussian_11.txt"
print(getDataFromFile(testfn))

(304.572, 0.865766, 170.72902, 131.422973, 1.314299)


## Weighted centroid

In [12]:
rawdata = np.array([getDataFromFile(f"benchmark/benchmark_weightedCentroid_{i}.txt") for i in range(100)])
headers = ["total_time", "h5read_time", "cluster_time", "clusterToImg_time", "h5write_time"]
df_wgtCentroid = pd.DataFrame(data=rawdata, columns=headers)
df_wgtCentroid["io_time"] = df_wgtCentroid["h5read_time"] + df_wgtCentroid["h5write_time"]

df_wgtCentroid = df_wgtCentroid/8*10  # adjusted to 1_000_000 events

df_wgtCentroid.describe()

Unnamed: 0,total_time,h5read_time,cluster_time,clusterToImg_time,h5write_time,io_time
count,100.0,100.0,100.0,100.0,100.0,100.0
mean,369.910088,1.043649,214.224115,152.72893,1.622922,2.666571
std,3.304409,0.107339,2.946254,1.542076,0.065188,0.127301
min,350.635,0.67161,196.857234,151.139228,1.526083,2.197693
25%,368.212188,0.955999,213.05105,151.872479,1.586254,2.604185
50%,369.11,1.068033,213.764615,152.294651,1.614674,2.686856
75%,371.581562,1.121626,215.240393,152.705571,1.645116,2.731782
max,377.96625,1.237715,222.584271,158.542064,2.04476,3.093009


## LMFit Gaussian

In [13]:
rawdata = np.array([getDataFromFile(f"benchmark/benchmark_lmfitGaussian_{i}.txt") for i in range(100)])
headers = ["total_time", "h5read_time", "cluster_time", "clusterToImg_time", "h5write_time"]
df_lmfit = pd.DataFrame(data=rawdata, columns=headers)
df_lmfit["io_time"] = df_lmfit["h5read_time"] + df_lmfit["h5write_time"]

df_lmfit = df_lmfit/8*10

df_lmfit.describe()

Unnamed: 0,total_time,h5read_time,cluster_time,clusterToImg_time,h5write_time,io_time
count,100.0,100.0,100.0,100.0,100.0,100.0
mean,494.9217,0.878128,214.331279,277.697482,1.683315,2.561443
std,10.715598,0.102889,4.808584,9.697102,0.083571,0.131582
min,486.84875,0.674916,207.146121,273.747409,1.19345,2.018779
25%,491.5225,0.808095,212.900664,275.573525,1.649495,2.489889
50%,493.219375,0.836038,213.298148,276.71398,1.686538,2.523883
75%,494.096563,0.911837,213.830216,277.590793,1.712285,2.614332
max,588.45125,1.150397,254.866644,372.198148,2.15183,2.937111


## FastGaussian

In [14]:
rawdata = np.array([getDataFromFile(f"benchmark/benchmark_fastGaussian_{i}.txt") for i in range(100)])
headers = ["total_time", "h5read_time", "cluster_time", "clusterToImg_time", "h5write_time"]
df_fastG = pd.DataFrame(data=rawdata, columns=headers)
df_fastG["io_time"] = df_fastG["h5read_time"] + df_fastG["h5write_time"]

df_fastG = df_fastG/8*10

df_fastG.describe()

Unnamed: 0,total_time,h5read_time,cluster_time,clusterToImg_time,h5write_time,io_time
count,100.0,100.0,100.0,100.0,100.0,100.0
mean,381.753075,1.065141,214.159615,164.590848,1.644161,2.709302
std,2.119583,0.084336,1.686817,1.556662,0.057349,0.098805
min,375.8725,0.67418,208.291652,163.177545,1.530251,2.204431
25%,380.555,0.997525,213.346412,163.91176,1.609633,2.645044
50%,381.141875,1.066691,213.925719,164.168284,1.639127,2.702383
75%,382.271875,1.125977,214.372809,164.663586,1.67179,2.7717
max,390.75,1.248335,220.515066,173.387395,1.91018,2.992404


## Concat for better statistics on common section

In [15]:
df_concat = pd.concat([df_wgtCentroid, df_lmfit, df_fastG])

df_concat.describe()

Unnamed: 0,total_time,h5read_time,cluster_time,clusterToImg_time,h5write_time,io_time
count,300.0,300.0,300.0,300.0,300.0,300.0
mean,415.528288,0.995639,214.238336,198.339087,1.650133,2.645772
std,56.822297,0.12915,3.3878,56.706814,0.073731,0.134921
min,350.635,0.67161,196.857234,151.139228,1.19345,2.018779
25%,371.582188,0.901634,213.056286,152.73721,1.609505,2.55943
50%,381.141875,1.009367,213.643878,164.168284,1.642868,2.658443
75%,491.475,1.107221,214.3185,275.503552,1.687895,2.73406
max,588.45125,1.248335,254.866644,372.198148,2.15183,3.093009
