# Notebook for the performance test report 
This Python code is used to generate a performance report PDF from PCM/uprof monitoring data collected during readout application tests. 

#### IMPORTANT: Before running this notebook check that you have your UPROF data or you have run the Getting_formating_data.ipynb notebook to get the PCM data.

The key functions are:
* **plot_vars_comparison()**: Plots performance metrics from PCM/uprof data for multiple tests into comparison plots. It generates a plot for each socket.
* **create_report_performance()**: Creates the full PDF report.
    * Processes the raw PCM/uprof data if needed
    * Generates the comparison plots by calling plot_vars_comparison()
    * Adds intro text, table of tests, and the plots to the PDF
    * Prints CPU core pinning info for each test
    * It takes input data from a specified folder, processes it, generates plots in an output folder, and builds the PDF report with custom text, table, and plots.

Helper functions all bdefined in basic_functions.py:

* make_name_list(): Generates lists of file names in the input folder
* break_file_name(): Parses info from a file name
* add_new_time_format(): Adds a timestamp column to PCM/uprof data
* uprof_pcm_formatter(): Converts uprof data to PCM-like format
* json_info(): Prints CPU pinning info for a test

So in summary, it automates generating a performance report from raw monitoring data, including custom intro text, test info table, comparison plots, and configuration details.


#### Note: change the paths to fit yours

In [None]:
# Import the modules needed, defining paths and functions
from basic_functions import *

pcm_columns_list_0 = ['C0 Core C-state residency', 'Socket0 Memory Bandwidth',
                    'Socket0 Instructions Per Cycle', 'Socket0 Instructions Retired Any (Million)',
                    'Socket0 L2 Cache Misses', 'Socket0 L2 Cache Hits',
                    'Socket0 L3 Cache Misses', 'Socket0 L3 Cache Hits']
pcm_columns_list_1 = ['C0 Core C-state residency', 'Socket1 Memory Bandwidth',
                    'Socket1 Instructions Per Cycle', 'Socket1 Instructions Retired Any (Million)',
                    'Socket1 L2 Cache Misses', 'Socket1 L2 Cache Hits',
                    'Socket1 L3 Cache Misses', 'Socket1 L3 Cache Hits']
uprof_columns_list_0 = [' Utilization (%) Socket0', 'Total Mem Bw (GB/s) Socket0',
                        'IPC (Sys + User) Socket0', 'IRA Socket0',   #<------------- we don't have this (IRA) data 
                        'L2 Miss (pti) Socket0', 'L2 Access (pti) Socket0',
                        'L3 Miss Socket0', 'L3 Miss % Socket0']
uprof_columns_list_1 = ['Utilization (%) Socket1', 'Total Mem Bw (GB/s) Socket1',
                        'IPC (Sys + User) Socket1', 'IRA Socket1',   #<------------- we don't have this (IRA) data 
                        'L2 Miss (pti) Socket1', 'L2 Access (pti) Socket1',
                        'L3 Miss Socket1', 'L3 Miss % Socket1']
label_names = ['CPU Utilization (%)', 'Memory Bandwidth (GB/sec)',
            'Instructions Per Cycle', 'Instructions Retired Any (Million)',
            'L2 Cache Misses (Million)', 'L2 Cache [Misses/Accesses] (%)',
            'L3 Cache Misses (Million)', 'L3 Cache [Misses/Accesses] (%)']
label_columns = ['Socket0','Socket1']

def plot_vars_comparison(input_dir, output_dir, all_files, pdf_name):
    X_plot, Y_plot_0, Y_plot_1, label_plot_0, label_plot_1 = [], [], [], [], []
    
    for i, file_i in enumerate(all_files):    
        info = break_file_name(file_i)
        data_frame = pd.read_csv(f'{input_dir}/{file_i}.csv')
        X_plot.append(data_frame['NewTime'].values.tolist())
                
        Y_tmp_0, Y_tmp_1, label_tmp_0, label_tmp_1 = [], [], [], []
        
        if info[0]=='grafana':
            for k, (columns_pcm_0, columns_pcm_1) in enumerate(zip(pcm_columns_list_0, pcm_columns_list_1)):
                Y_0, label_0 = get_column_val(data_frame, [columns_pcm_0], [label_columns[0]], file_i)  
                Y_1, label_1 = get_column_val(data_frame, [columns_pcm_1], [label_columns[1]], file_i)  
                Y_tmp_0.append(Y_0)
                label_tmp_0.append(label_0)
                Y_tmp_1.append(Y_1)
                label_tmp_1.append(label_1)
        else:
            for k, (columns_uprof_0, columns_uprof_1) in enumerate(zip(uprof_columns_list_0, uprof_columns_list_1)):
                Y_0, label_0 = get_column_val(data_frame, [columns_uprof_0], [label_columns[0]], file_i)
                Y_1, label_1 = get_column_val(data_frame, [columns_uprof_1], [label_columns[1]], file_i)
                Y_tmp_0.append(Y_0)
                label_tmp_0.append(label_0)
                Y_tmp_1.append(Y_1)
                label_tmp_1.append(label_1)
    
        Y_plot_0.append(Y_tmp_0)
        label_plot_0.append(label_tmp_0)
        Y_plot_1.append(Y_tmp_1)
        label_plot_1.append(label_tmp_1)
    
    # Here we make the plot:
    matplotlib.rcParams['font.family'] = 'DejaVu Serif'
    rows=cols=2
    rows_cols = rows*cols
    fig, axs = plt.subplots(rows, cols, figsize=(18, 8))
    plt.style.use('default')
    axs = axs.flatten()
    #axs[3].axis('off')
    
    for i in range(len(Y_plot_0)):  #number of files or tests
        for j in range(len(Y_plot_0[i])):  #number of metrix
            if j < rows_cols:
                label0_ij0 = re.sub('_', ' ', label_plot_0[i][j][0])
                axs[j].plot(X_plot[i], Y_plot_0[i][j][0], color=color_list[i], label=label0_ij0, linestyle=linestyle_list[0])
                axs[j].set_ylabel(f'{label_names[j]}')
                axs[j].set_xlabel('Time (min)')
                axs[j].grid(which='major', color='gray', linestyle='dashed')
                axs[j].legend(loc='upper left')
            else:
                pass
                
    plt.tight_layout()
    plt.savefig(f'{output_dir}/Fig0_{pdf_name}_results_socket0.png')
    print(f'{output_dir}/Fig0_{pdf_name}_results_socket0.png')
    plt.close() 
    
    fig, axs = plt.subplots(rows, cols, figsize=(18, 8))
    plt.style.use('default')
    axs = axs.flatten()   
    
    for i in range(len(Y_plot_0)):  
        for j in range(len(Y_plot_0[i])):
            if j < rows_cols:
                pass
            else:
                label0_ij0 = re.sub('_', ' ', label_plot_0[i][j][0])
                axs[j-rows_cols].plot(X_plot[i], Y_plot_0[i][j][0], color=color_list[i], label=label0_ij0, linestyle=linestyle_list[0])
                axs[j-rows_cols].set_ylabel(f'{label_names[j]}')
                axs[j-rows_cols].set_xlabel('Time (min)')
                axs[j-rows_cols].grid(which='major', color='gray', linestyle='dashed')
                axs[j-rows_cols].legend(loc='upper left')
                
    plt.tight_layout()
    plt.savefig(f'{output_dir}/Fig1_{pdf_name}_results_cache_socket0.png')
    print(f'{output_dir}/Fig1_{pdf_name}_results_cache_socket0.png')
    plt.close() 
    
    fig, axs = plt.subplots(rows, cols, figsize=(18, 8))
    plt.style.use('default')
    axs = axs.flatten()
    
    for i in range(len(Y_plot_1)):  
        for j in range(len(Y_plot_1[i])):
            if j < rows_cols:
                label1_ij0 = re.sub('_', ' ', label_plot_1[i][j][0])
                axs[j].plot(X_plot[i], Y_plot_1[i][j][0], color=color_list[i], label=label1_ij0, linestyle=linestyle_list[0])
                axs[j].set_ylabel(f'{label_names[j]}')
                axs[j].set_xlabel('Time (min)')
                axs[j].grid(which='major', color='gray', linestyle='dashed')
                axs[j].legend(loc='upper left')
            else:
                pass
    
    plt.tight_layout()
    plt.savefig(f'{output_dir}/Fig2_{pdf_name}_results_socket1.png')
    print(f'{output_dir}/Fig2_{pdf_name}_results_socket1.png')
    plt.close() 
    
    fig, axs = plt.subplots(rows, cols, figsize=(18, 8))
    plt.style.use('default')
    axs = axs.flatten()
    
    for i in range(len(Y_plot_1)):  
        for j in range(len(Y_plot_1[i])):
            if j < rows_cols:
                pass
            else:
                label1_ij0 = re.sub('_', ' ', label_plot_1[i][j][0])
                axs[j-rows_cols].plot(X_plot[i], Y_plot_1[i][j][0], color=color_list[i], label=label1_ij0, linestyle=linestyle_list[0])
                axs[j-rows_cols].set_ylabel(f'{label_names[j]}')
                axs[j-rows_cols].set_xlabel('Time (min)')
                axs[j-rows_cols].grid(which='major', color='gray', linestyle='dashed')
                axs[j-rows_cols].legend(loc='upper left')
    
    plt.tight_layout()
    plt.savefig(f'{output_dir}/Fig3_{pdf_name}_results_cache_socket1.png')
    print(f'{output_dir}/Fig3_{pdf_name}_results_cache_socket1.png')
    plt.close() 

def create_report_performance(input_dir, output_dir, all_files, readout_name, daqconf_files, core_utilization_files, parent_folder_dir, print_info=True, pdf_name='performance_report', repin_threads_file=[None], comment=['TBA']):    
    directory([input_dir, output_dir])

    # Open pdf file
    pdf = FPDF()
    pdf.add_page()
    pdf.ln(1)
    pdf.image(f'{parent_folder_dir}/tools/dune_logo.jpg', w=180)
    pdf.ln(2)
    pdf.set_font('Times', 'B', 16)
    pdf.cell(40,10,'Performance Report')
    pdf.ln(10)
    
    # creating report
    pdf.set_font('Times', '', 10)
    pdf.write(5, 'The tests were run for the WIBEth data format. The Figures 1 and 2 show the results of the tests ran (Table1) using the different metrics. \n')
    pdf.write(5, '    * L2-hits is the fraction of requests that make it to L2 at all. Similar for L3. \n')
    pdf.write(5, '    * L2-misses is the fraction of requests that make it to L2 at all and then miss in L2. Similar for L3. \n')
    pdf.ln(10)
    
    #-------------------------------------------TABLE-----------------------------------------------
    # Data to tabular
    rows_data = []
    headers = ['Test', 'Readout SRV', 'dunedaq', 'Socket', 'General comments']
    rows_data.append(headers)
    
    line_height = pdf.font_size * 2
    col_width = [pdf.epw/3.8, pdf.epw/8, pdf.epw/7, pdf.epw/12, pdf.epw/4]  
    lh_list = [] #list with proper line_height for each row
    
    for i, file_i in enumerate(all_files):
        info = break_file_name(file_i)
        test_info = re.sub('_', ' ', info[5])
        line = [test_info, info[2], info[1], info[3], comment[i]]
        rows_data.append(line)
    
    # Determine line heights based on the number of words in each cell
    for row in rows_data:
        max_lines = 1  # Initialize with a minimum of 1 line
        for datum in row:
            lines_needed = len(str(datum).split('\n'))  # Count the number of lines
            max_lines = max(max_lines, lines_needed)

        lh_list.append(line_height * max_lines)
        
    # Add table rows with word wrapping and dynamic line heights
    for j, row in enumerate(rows_data):
        line_height_table = lh_list[j] 
        for k, datum in enumerate(row):
            pdf.multi_cell(col_width[k], line_height_table, datum, border=1, align='L', new_x=XPos.RIGHT, new_y=YPos.TOP, max_line_height=pdf.font_size)
            
        pdf.ln(line_height_table)
        
    pdf.write(5, 'Table. Summary of the tests ran. \n')    
    pdf.ln(10)
    
    #-------------------------------------------- FIGURES START ------------------------------------------------
    plot_vars_comparison(input_dir, output_dir, all_files, pdf_name)
    
    if info[3] == '0' or info[3] == '01':
        pdf.image(f'{output_dir}/Fig0_{pdf_name}_results_socket0.png', w=180)
        pdf.write(5, 'Figure. Socket0 results of the tests ran using the metrics CPU Utilization (%), Memory Bandwidth (GB/sec), Instructions Per Cycle, Instructions Retired Any (Million).')
        pdf.ln(10)
        pdf.image(f'{output_dir}/Fig1_{pdf_name}_results_cache_socket0.png', w=180)
        pdf.write(5, 'Figure. Socket0 results of the tests ran using the metrics L2 Cache Misses (Million), L2 Cache [Misses/Hits] (%), L3 Cache Misses (Million), and L3 Cache [Misses/Hits] (%).')
        pdf.ln(10)
        
    if info[3] == '1' or info[3] == '01':
        pdf.image(f'{output_dir}/Fig2_{pdf_name}_results_socket1.png', w=180)
        pdf.write(5, 'Figure. Socket1 results of the tests ran using the metrics CPU Utilization (%), Memory Bandwidth (GB/sec), Instructions Per Cycle, Instructions Retired Any (Million).')
        pdf.ln(10)
        pdf.image(f'{output_dir}/Fig3_{pdf_name}_results_cache_socket1.png', w=180)
        pdf.write(5, 'Figure. Socket1 results of the tests ran using the metrics L2 Cache Misses (Million), L2 Cache [Misses/Hits] (%), L3 Cache Misses (Million), and L3 Cache [Misses/Hits] (%).')
        pdf.ln(10)
    #-------------------------------------------- FIGURES END ------------------------------------------------
    
    #---------------------------------------- CONFIGURATIONS START ---------------------------------------------
    if print_info:
        pdf.write(5, 'Configurations: \n', 'B')
        for i in range(len(all_files)):
            info = break_file_name(all_files[i])
            var_i = readout_name[i]
            file_daqconf_i = daqconf_files[i]
            file_core_i = core_utilization_files[i]
            repin_threads_file_i = repin_threads_file[i]
            
            json_info(file_daqconf=file_daqconf_i, file_core=file_core_i, parent_folder_dir=parent_folder_dir, input_dir=input_dir, var=var_i, pdf=pdf, if_pdf=print_info, repin_threads_file=repin_threads_file_i)           

    pdf.ln(20)
    pdf.set_font('Times', '', 10)
    pdf.write(5, f'The End, made on {current_time()}')
    pdf.output(f'{output_dir}/{pdf_name}_report.pdf')
    #---------------------------------------- CONFIGURATIONS END ---------------------------------------------
    
    print(f'The report was create and saved to {output_dir}/{pdf_name}.pdf')

print('Ready to run and process')

In [None]:
## dvargas: path to the folder where the results are stored

#basic_path ='/eos/home-d/dvargas/For_UoT'
#report_path = '/eos/home-d/dvargas/dunedaq_reports'
#performancetest_path = '/eos/home-d/dvargas/performancetest'

basic_path ='/Users/dvargaso/Documents/My_Codes/DUNE_Python_Tools'
report_path = '/Users/dvargaso/Documents/My_Codes/DUNE_Python_Tools/dunedaq_reports'
performancetest_path = '/Users/dvargaso/Documents/My_Codes/DUNE_Python_Tools/performancetest'

results_path0 = f'{basic_path}/performance_results/1CRP_np02srv003'
results_path1 = f'{basic_path}/performance_results/1CRP_np02srv001'
results_path2 = f'{basic_path}/performance_results/2CRPs_np02srv003'
results_path3 = f'{basic_path}/performance_results/2CRPs_np04srv031'
results_path4 = f'{basic_path}/performance_results/2CRPs_intel'
results_path5 = f'{basic_path}/performance_results/2CRPs_all'
results_path6 = f'{basic_path}/performance_results/2CRPs_amd'
results_path7 = f'{basic_path}/performance_results/2CRPs_np02srv001'
results_path8 = f'{basic_path}/performance_results/2x2CRPs_np04srv031'
results_path9 = f'{basic_path}/performance_results/1CRP_all'

print('Paths were setup')

**create_report_performance(input_dir, output_dir, all_files, readout_name, daqconf_files, core_utilization_files, parent_folder_dir, print_info, pdf_name, repin_threads_file=None, comment=['general comments about the run'])**
   * **input_dir** is path to directory where the results of the test were saved by the 'extract_grafana_data' step and where you saved the UPROF and core utilisation files too.
   * **output_dir** is path to directory where you want to store the plots and report.
   * **all_files** is a list of all the files in the 'input_dir' directory that you want to include in the report.
   * **readout_name** is a list of the names of the readout app names to be included in the report per files in 'all_files' so is a list of lists.
   * **daqconf_files** is a list of the configuration files for the DAQ setup used in the test per files in 'all_files' so is a list of lists.
   * **core_utilization_files** is a list of the core utilization files per files in 'all_files' so is a list of lists.
   * **parent_folder_dir** this directory refers to the folder where the 'cpupining' and 'daqconf' are present if you are running the performancetest app then it should be the performancetest folder path.
   * **print_info** is to set if you want the information of the configuration and cpupinning in the report. By default it will be True. 
   * **pdf_name** is the name of the report. By default it will be 'performance_report' but you may want to give it a more descriptic name.
   * **repin_threads_file** in the case you are using a custom cpu pinning configuration, this is the name to the file containing the pinning information. It need to be in the same folder as the rest of cpu pinning files
   * **comment** is a list of strings per files in 'all_files' that will be included in the report as general comments about the run or configuration, etc. 

In [None]:
## dvargas: creating the performance report for 100G = 1xCRP

paths = [results_path0, results_path1]
names = ['performancetest_1CRPs_intel-003', 'performancetest_1CRPs_amd-001']
app_003_1 = ['runp02srv003eth1']
app_001_1 = ['runp02srv001eth1']
Apps = [[app_003_1, app_003_1], [app_001_1]]
comments = [['Ice lake family', 'Ice lake family'], ['AMD']]
repin_threads = [[None, None], [None]]

files_all = [['grafana-v4_4_6-np02srv003-1-eth-singlenic_tp_recording_numa1', 
              'grafana-v4_4_6-np02srv003-1-eth-singlenic_tp_numa1'],
             ['reformatted_uprof-v4_4_6-np02srv001-1-eth-singlenic_tp_numa1']]

files_daqconf = [['np02_daq_tp_recording_ssh', 'np02_daq_tp_ssh'], ['np02_daq_tp_ssh']]

files_core_utilization = [['reformatted_core_utilization-v4_4_6-np02srv003-1-eth-singlenic_tp_recording_numa1', 
                           'reformatted_core_utilization-v4_4_6-np02srv003-1-eth-singlenic_tp_numa1'],
                          ['reformatted_core_utilization-v4_4_6-np02srv001-1-eth-singlenic_tp_numa1']]

for list_path, list_file, list_var, list_daqconf, list_core, list_name, list_repin_threads, list_comm in zip(paths, files_all, Apps, files_daqconf, files_core_utilization, names, repin_threads, comments):
    create_report_performance(input_dir=list_path, 
                              output_dir=report_path, 
                              all_files=list_file, 
                              readout_name=list_var, 
                              daqconf_files=list_daqconf, 
                              core_utilization_files=list_core,
                              parent_folder_dir=performancetest_path, 
                              print_info=True, 
                              pdf_name=list_name, 
                              repin_threads_file=list_repin_threads, 
                              comment=list_comm)

print('THE END')

In [None]:
## dvargas: creating the performance report for 200G = 2xCRP

paths = [results_path7, results_path3]
names = ['performancetest_2CRPs_amd-001', 'performancetest_2CRPs_intel-031']
app_001_10 = ['runp02srv001eth0', 'runp02srv001eth1']
app_031_30 = ['runp04srv031eth0', 'runp04srv031eth3']
Apps = [[app_001_10], [app_031_30, app_031_30]]
comments = [['AMD'], ['Sapphire rapids family', 'Sapphire rapids family']]
repin_threads = [[None], [None, None]]

files_all = [['reformatted_uprof-v4_4_6-np02srv001-10-eth-dualnic_tp_numa10'], 
             ['grafana-v4_4_6-np04srv031-10-eth-dualnic_tp_recording_numa30', 
              'grafana-v4_4_6-np04srv031-10-eth-dualnic_tp_numa30']]

files_daqconf = [['np02_daq_tp_ssh'], ['np02_daq_tp_recording_ssh', 'np02_daq_tp_ssh']]

files_core_utilization = [['reformatted_core_utilization-v4_4_6-np02srv001-10-eth-dualnic_tp_numa10'],
                          ['reformatted_core_utilization-v4_4_6-np04srv031-10-eth-dualnic_tp_recording_numa30', 
                           'reformatted_core_utilization-v4_4_6-np04srv031-10-eth-dualnic_tp_numa30']]

for list_path, list_file, list_var, list_daqconf, list_core, list_name, list_repin_threads, list_comm in zip(paths, files_all, Apps, files_daqconf, files_core_utilization, names, repin_threads, comments):
    create_report_performance(input_dir=list_path, 
                              output_dir=report_path, 
                              all_files=list_file, 
                              readout_name=list_var, 
                              daqconf_files=list_daqconf, 
                              core_utilization_files=list_core,
                              parent_folder_dir=performancetest_path, 
                              print_info=True, 
                              pdf_name=list_name, 
                              repin_threads_file=list_repin_threads, 
                              comment=list_comm)

print('THE END')

In [None]:
## dvargas: creating the performance report for 400G = 2x(2xCRP)

app_031_20 = ['runp04srv031eth0', 'runp04srv031eth2']
app_031_31 = ['runp04srv031eth1', 'runp04srv031eth3']
Apps = [app_031_20, app_031_31, app_031_20, app_031_31]
comments = ['Sapphire rapids family', 'Sapphire rapids family', 'Sapphire rapids family', 'Sapphire rapids family']
repin_threads = [None, None, None, None]

files_all = ['grafana-v4_4_6-np04srv031-0-eth-double_dualnic_tp_recording_numa20', 
             'grafana-v4_4_6-np04srv031-1-eth-double_dualnic_tp_recording_numa31', 
             'grafana-v4_4_6-np04srv031-0-eth-double_dualnic_tp_numa20', 
             'grafana-v4_4_6-np04srv031-1-eth-double_dualnic_tp_numa31']

files_daqconf = ['np02_daq_tp_recording_ssh', 'np02_daq_tp_recording_ssh', 'np02_daq_tp_ssh', 'np02_daq_tp_ssh']

files_core_utilization = ['reformatted_core_utilization-v4_4_6-np04srv031-0-eth-double_dualnic_tp_recording_numa20',
                          'reformatted_core_utilization-v4_4_6-np04srv031-1-eth-double_dualnic_tp_recording_numa31',
                          'reformatted_core_utilization-v4_4_6-np04srv031-0-eth-double_dualnic_tp_numa20',
                          'reformatted_core_utilization-v4_4_6-np04srv031-1-eth-double_dualnic_tp_numa31']

create_report_performance(input_dir=results_path8, 
                          output_dir=report_path, 
                          all_files=files_all, 
                          readout_name=Apps, 
                          daqconf_files=files_daqconf, 
                          core_utilization_files=files_core_utilization,
                          parent_folder_dir=performancetest_path, 
                          print_info=True, 
                          pdf_name='performancetest_2x2CRPs_intel-031', 
                          repin_threads_file=repin_threads, 
                          comment=comments)

print('THE END')