# Import the modules needed, defining paths and funtions  

In [None]:
import os
import sys
import csv
import time
import glob
import json
import requests
import re
import shutil
import subprocess
import matplotlib
import matplotlib.pyplot as plt 
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import matplotlib.dates as mdates
import numpy as np
import pickle as pkl
import pandas as pd
from pathlib import Path
from datetime import datetime as dt
from fpdf import FPDF  

label_names = ['CPU Utilization (%)', 
               'Memory Bandwidth (GB/sec)',
               'L2 Cache Misses (Million)', 
               'L3 Cache Misses (Million)',
               'CPU Power Consumption (Watt)']

pcm_columns_list = [['C0 Core C-state residency'],
                    ['Socket0 Memory Bandwidth', 'Socket1 Memory Bandwidth'],
                    ['Socket0 L2 Cache Misses', 'Socket1 L2 Cache Misses'], 
                    ['Socket0 L3 Cache Misses', 'Socket1 L3 Cache Misses'], 
                    ['Package Joules Consumed Socket0 Energy Consumption', 'Package Joules Consumed Socket1 Energy Consumption']]
uprof_columns_list = [['CPU Utilization'], 
                      ['Total Mem Bw (GB/s) Socket0', 'Total Mem Bw (GB/s) Socket1'],
                      ['L2 Miss (pti) Socket0', 'L2 Miss (pti) Socket1'],
                      ['L3 Miss Socket0', 'L3 Miss Socket1'], 
                      ['socket0-package-power','socket1-package-power']]
alma9_os = ['np02srv004']

#for pcm ['Socket0 L2 Cache Misses Per Instruction', 'Socket1 L2 Cache Misses Per Instruction']
#for uprof [' Utilization (%) Socket0', 'Utilization (%) Socket1', 'L2 Hit Ratio Socket0', 'L2 Hit Ratio Socket1']

label_columns = ['Socket0', 'Socket1'] 
color_list = ['red', 'blue', 'green', 'cyan', 'orange', 'yellow', 'magenta', 'lime', 'purple', 'navy', 'hotpink', 'olive', 'salmon', 'teal', 'darkblue', 'darkgreen', 'darkcyan', 'darkorange', 
              'deepskyblue', 'darkmagenta', 'sienna', 'chocolate', 'orangered', 'gray', 'royalblue', 'gold', 'peru', 'seagreen', 'violet', 'tomato', 'lightsalmon', 'crimson', 'lightblue', 
              'lightgreen', 'lightpink', 'black', 'darkgray', 'lightgray', 'saddlebrown', 'brown', 'khaki', 'tan', 'turquoise', 'linen', 'lawngreen', 'coral']
linestyle_list = ['solid', 'dotted', 'dashed', 'dashdot']

marker_list = ['s','o','.','p','P','^','<','>','*','+','x','X','d','D','h','H']

def directory(input_dir):
    # Create directory (if it doesn't exist yet):
    for dir_path in input_dir:
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)
            
def get_unix_timestamp(time_str):
    formats = ['%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H:%M:%S']
    for fmt in formats:
        try:
            timestamp = dt.strptime(time_str, fmt).timestamp()
            return int(timestamp * 1000) if '.' in time_str else int(timestamp)
        except ValueError:
            pass
    raise ValueError('Invalid time format: {}'.format(time_str))

def make_column_list(file, input_dir):
    data_frame = pd.read_csv('{}/{}.csv'.format(input_dir, file))
    columns_list = list(data_frame.columns) 
    ncoln=len(columns_list) 
    return columns_list

def datenum(d, d_base):
    t_0 = d_base.toordinal() 
    t_1 = dt.fromordinal(t_0)
    T = (d - t_1).total_seconds()
    return T

def is_hidden(input_dir):
    name = os.path.basename(os.path.abspath(input_dir))
    if name.startswith('.'):
        return 'true'
    else:
        return 'false'

def make_file_list(input_dir):
    file_list =  []
    for root, dirs, files in os.walk(input_dir):
        for name in files:
            if is_hidden(name) == 'true':  
                print (name, ' is hidden, trying to delete it')
                os.remove(os.path.join(input_dir, name))
            else:
                file_list.append(os.path.join(input_dir, name))
                
    return file_list

def make_name_list(input_dir):
    l=os.listdir(input_dir)
    name_list=[x.split('.')[0] for x in l]
    
    all_list = []
    all_plots_list = []
    pcm_list = []
    uprof_list = []
    time_list = []
    reformated_uprof_list = []
    reformated_time_list = []
    
    for i, name_i in enumerate(name_list):
        if 'reformatter_uprof-' in name_i:
            reformated_uprof_list.append(name_i)
            all_plots_list.append(name_i)
            
        elif 'reformatter_timechart-' in name_i:
            reformated_time_list.append(name_i)
            
        elif 'uprof-' in name_i:
            uprof_list.append(name_i)
            all_list.append(name_i)
            
        elif 'timechart-' in name_i:
            time_list.append(name_i)
            
        elif 'grafana-' in name_i:
            pcm_list.append(name_i)
            all_list.append(name_i)
            all_plots_list.append(name_i)
            
        else:
            pass

    return pcm_list, uprof_list, time_list, reformated_uprof_list, reformated_time_list, all_list, all_plots_list

def fetch_grafana_panels(grafana_url, dashboard_uid):
    # Get dashboard configuration
    dashboard_url = '{}/api/dashboards/uid/{}'.format(grafana_url, dashboard_uid)   
    response = requests.get(dashboard_url)
    
    if response.status_code != 200:
        print('Error in fetch_grafana_panels: Failed to fetch dashboard data. Status code: ', response.status_code)
        return None
    
    dashboard_data = response.json()
    # Extract panels data
    panels = dashboard_data['dashboard']['panels']
    return panels

def get_query_urls(panel, host):
    targets = panel.get('targets', [])
    queries = []
    queries_label = []
    for target in targets:
        if 'expr' in target:
            query = target['expr'].replace('${host}', host)
            queries.append(query)
            queries_label.append(target['legendFormat'])
    
    if queries:
        return queries, queries_label
    else:
        return None

def extract_data_and_stats_from_panel(grafana_url, dashboard_uid, delta_time, host, input_dir, output_csv_file):
    for dashboard_uid_to_use in dashboard_uid:
        panels_data = fetch_grafana_panels(grafana_url, dashboard_uid_to_use)
        if not panels_data:
            print('Error in extract_data_and_stats_from_panel: Failed to fetch dashboard panels data.')
            return
        
        url = '{}/api/datasources/proxy/1/api/v1/query_range'.format(grafana_url)
        start_timestamp = get_unix_timestamp(delta_time[0])
        end_timestamp = get_unix_timestamp(delta_time[1])
        all_dataframes = []
        
        for panel_i, panel in enumerate(panels_data):
            if 'targets' not in panel:
                print('Skipping panel ', panel['title'], ' with no targets.')
                continue
            
            query_urls, queries_label = get_query_urls(panel, host)
            if not query_urls:
                print('Skipping panel ', panel['title'], ' with no valid query URL.')
                continue
            
            for i, query_url in enumerate(query_urls):
                column_name = '{} {}'.format(queries_label[i], panel['title'])
                data = {
                    'query': query_url,
                    'start': start_timestamp,
                    'end': end_timestamp,
                    'step': 2
                }

                response = requests.post(url, data=data)
                response_data = response.json()
                
                if response.status_code != 200:
                    print('Error: Failed to fetch dashboard data. Status code:content ', response.status_code, ':', response.content)
                    print('Response panel:data:content for panel ', panel['title'], ':', response_data, ':', response.content)
                    return None

                if 'data' not in response_data or 'resultType' not in response_data['data'] or response_data['data']['resultType'] != 'matrix':
                    print('Skipping query with no valid response in panel: ', panel['title'])
                    continue

                result = response_data['data']['result'][0]
                metric = result['metric']
                values = result.get('values', [])
                values_without_first_column = [row[1:] for row in values]

                if not values:
                    print('Skipping query with no valid response in panel: ', panel['title'])
                    continue

                timestamps = [val[0] for val in values]
                df_first = pd.DataFrame(values, columns=['Timestamp', column_name])
                df_first['Timestamp'] = pd.to_datetime(df_first['Timestamp'], unit='s')
                df = pd.DataFrame(values_without_first_column, columns=[column_name])
                
                if panel_i == 0 and i == 0: 
                    df_tmp = df_first
                else:
                    df_tmp = df
                
                all_dataframes.append(df_tmp)

        # Combine all dataframes into a single dataframe
        combined_df = pd.concat(all_dataframes, axis=1)

        # Save the combined dataframe as a CSV file
        output = '{}/grafana-{}.csv'.format(input_dir, output_csv_file)
        try:
            combined_df.to_csv(output, index=False)
            print('Data saved to CSV successfully:', output)
        except Exception as e:
            print('Exception Error: Failed to save data to CSV:', str(e))

def uprof_pcm_formatter(input_dir, file):
    #data_frame = pd.read_csv('{}/{}.csv'.format(input_dir, file), skiprows=[1, 47], error_bad_lines=False) 

    newfile = 'reformatter_{}'.format(file)
    f = open('{}/{}.csv'.format(input_dir, file),'r')
    f_new = open('{}/{}.csv'.format(input_dir, newfile),'w')
    
    for line in f:
        # extract initial time
        if 'Profile Time:' in line:
            full_date = line[14:-1]
            full_date = full_date.replace('/', '-')
            msec0 = int(full_date[20:23])
            sec0  = int(full_date[17:19])
            min0  = int(full_date[14:16])
            hour0 = int(full_date[11:13])
            day0  = int(full_date[8:10])
        
        # append package numbers to headers,
        # and add headers for l2 cache hit ratio
        if 'Package' in line:
            header1 = line.split(',')
        if 'Timestamp' in line:
            header2 = line.split(',')[1:]

            package_num = '0'
            header_new = ['Timestamp']
            for package,header in zip(header1,header2):
                if (package=='\n') or (header=='\n'):
                    header_new += ['L2 Hit Ratio Socket0', 'L2 Hit Ratio Socket1', 'CPU Utilization', '\n']
                    header_new_str = ','.join(header_new)
                    f_new.write(header_new_str)
                if 'Package' in package:
                    package_num = package[-1]
                header_new += [header+' Socket' + package_num]
          
        # generate full timestamps
        if re.search('..:..:..:...,', line):
            msec_n_old = int(line[9:12])
            sec_n_old = int(line[6:8])
            min_n_old = int(line[3:5])
            hour_n_old = int(line[0:2])
            
            msec_n = (msec_n_old + msec0) % 1000
            msec_carryover = (msec_n_old + msec0) // 1000
            sec_n  = (sec_n_old + sec0 + msec_carryover) % 60
            sec_carryover  = (sec_n_old + sec0 + msec_carryover) // 60
            min_n  = (min_n_old + min0 + sec_carryover) % 60
            min_carryover = (min_n_old + min0 + sec_carryover) // 60
            hour_n = (hour_n_old + hour0 + min_carryover) % 24
            hour_carryover = (hour_n_old + hour0 + min_carryover) // 24
            day_n  = (day0 + hour_carryover)
            date_n = '{year_month}-{day:02d} {hour:02d}:{min:02d}:{sec:02d}'.format(year_month=full_date[0:7], day=day_n, hour=hour_n, min=min_n, sec=sec_n)
            line_n = re.sub('..:..:..:...', date_n, line)
            
            # calculate L2 Hit ratio
            line_list = line_n.split(',')
            l2_hit_ratio_0 = float(line_list[19]) / (float(line_list[19]) + float(line_list[15])) * 100
            l2_hit_ratio_0 = str(round(l2_hit_ratio_0, 2))
            l2_hit_ratio_1 = float(line_list[41]) / (float(line_list[41]) + float(line_list[37])) * 100
            l2_hit_ratio_1 = str(round(l2_hit_ratio_1, 2))

            # CPU Utilization
            cpu_utiliz = float(line_list[1]) + float(line_list[22])
            cpu_utiliz = str(round(cpu_utiliz, 2))
            
            line_list[-1] = l2_hit_ratio_0
            line_list.append(l2_hit_ratio_1)
            line_list.append(cpu_utiliz)
            line_list.append('\n')
            line_n = ','.join(line_list)
            f_new.write(line_n)   
            
    f.close()
    f_new.close()
    
def uprof_timechart_formatter(input_dir, file):
    newfile = 'reformatter_{}'.format(file)
    f = open('{}/{}.csv'.format(input_dir, file),'r')
    f_new = open('{}/{}.csv'.format(input_dir, newfile),'w')

    header = True
    for line in f:
        # get & reformat full date
        if 'Profile Start Time:' in line:
            full_date = line.split(',')[1]
            month = month2num(full_date[0:3])
            date = int(full_date[4:6])
            year = int(full_date[7:11])
            full_date_new = '{year}-{month:02d}-{date:02d}'.format(year=year, month=month, date=date)

        # Reformat timestamps
        if not header:
            timestamp_n = line.split(',')[1]
            timestamp_n = timestamp_n.split(':')
            hour_n = int(timestamp_n[0])
            min_n = int(timestamp_n[1])
            sec_n = int(timestamp_n[2])
            date_n = ',{year_month_day} {hour:02d}:{min:02d}:{sec:02d},'.format(year_month_day=full_date_new, hour=hour_n, min=min_n, sec=sec_n)

            line_n = re.sub(',.*:.*:.*:...,', date_n, line)
            f_new.write(line_n)

        # header=False indicates next line is data
        if 'Timestamp' in line:
            header = False
            f_new.write(line)

    f.close()
    f_new.close()

def month2num(month_str):
    if month_str=='Jan':
        return 1
    elif month_str=='Feb':
        return 2
    elif month_str=='Mar':
        return 3
    elif month_str=='Apr':
        return 4
    elif month_str=='May':
        return 5
    elif month_str=='Jun':
        return 6
    elif month_str=='Jul':
        return 7
    elif month_str=='Aug':
        return 8
    elif month_str=='Sep':
        return 9
    elif month_str=='Oct':
        return 10
    elif month_str=='Nov':
        return 11
    elif month_str=='Dec':
        return 12
    else:
        print('Warning: invalid month')

def combine_time_and_uprof_files(input_dir, time_file, uprof_file):
    input_file0 = '{}/reformatter_{}.csv'.format(input_dir, time_file)
    input_file1 = '{}/reformatter_{}.csv'.format(input_dir, uprof_file)
        
    data_frame0 = pd.read_csv(input_file0) 
    data_frame1 = pd.read_csv(input_file1) 
    df_merged = data_frame0.merge(data_frame1, how='outer')
    
    # Save the combined dataframe as a CSV file
    output = '{}/reformatter_{}.csv'.format(input_dir, uprof_file)
    try:
        df_merged.to_csv(output, index=False)
        print('Data saved to CSV successfully:', output)
    except Exception as e:
        print('Error in combine_time_and_uprof_files: Failed to save data to CSV:', str(e))

def break_file_name(file):
    info=file.split('-')
    return info

def check_OS(server):
    OS = 'Centos8'
    if server in alma9_os:
        OS = 'Alma9'
    return OS 

def add_new_time_format(input_dir, file):
    data_frame = pd.read_csv('{}/{}.csv'.format(input_dir, file))  

    # Add new time format
    newtime=[]
    x_0 = data_frame['Timestamp'][0]
    d_0 = dt.strptime(x_0,'%Y-%m-%d %H:%M:%S')
    for index, value in enumerate(data_frame['Timestamp']):   
        d = dt.strptime(value,'%Y-%m-%d %H:%M:%S')
        d_new = (datenum(d, d_0)-datenum(d_0, d_0))/60.
        newtime.append(d_new) 

    data_frame.insert(0, 'NewTime', newtime, True)
    data_frame.to_csv('{}/{}.csv'.format(input_dir, file), index=False)
    
def get_column_val(df, columns, labels, file):
    val = []
    label = []
    info = break_file_name(file)
    
    for j, (columns_j, label_j) in enumerate(zip(columns, labels)):
        if columns_j in ['NewTime', 'Timestamp']:
            pass
        elif columns_j in ['C0 Core C-state residency', 'CPU Utilization']:
            Y_tmp = df[columns_j].mul(1)
            Y = Y_tmp.values.tolist()
            val.append(Y)
            label.append('{} {}'.format(info[5], info[2]))
        elif columns_j in ['Socket0 L3 Cache Misses Per Instruction', 'Socket1 L3 Cache Misses Per Instruction', 'L2 Miss (pti) Socket0', 'L2 Miss (pti) Socket1']:
            Y_tmp = df[columns_j].mul(1)
            Y = Y_tmp.values.tolist()
            val.append(Y)
            label.append('{} {} {}'.format(info[5], info[2] , label_j))
        elif columns_j in ['Socket0 L2 Cache Misses', 'Socket1 L2 Cache Misses', 'Socket0 L3 Cache Misses', 'Socket1 L3 Cache Misses']:
            Y_tmp = df[columns_j].mul(1)
            Y = Y_tmp.values.tolist()
            val.append(Y)
            label.append('{} {} {}'.format(info[5], info[2] , label_j))
        elif columns_j in ['L3 Miss Socket0', 'L3 Miss Socket1']:
            Y_tmp = df[columns_j].div(1000000000)
            Y = Y_tmp.values.tolist()
            val.append(Y)
            label.append('{} {} {}'.format(info[5], info[2] , label_j))
        elif columns_j in ['Socket0 Memory Bandwidth', 'Socket1 Memory Bandwidth']:
            Y_tmp = df[columns_j].div(1000)
            Y = Y_tmp.values.tolist()
            val.append(Y)
            label.append('{} {} {}'.format(info[5], info[2] , label_j))
        elif columns_j in ['L2 Hit Ratio Socket0', 'L2 Hit Ratio Socket1']:
            Y_tmp = df[columns_j].div(1)
            Y = Y_tmp.values.tolist()
            val.append(Y)
            label.append('{} {} {}'.format(info[5], info[2] , label_j))
        elif columns_j in ['Socket0 L2 Cache Misses Per Instruction', 'Socket1 L2 Cache Misses Per Instruction']:
            Y_tmp = df[columns_j].mul(100)
            Y = Y_tmp.values.tolist()
            val.append(Y)
            label.append('{} {} {}'.format(info[5], info[2] , label_j))
        else:
            Y = df[columns_j].values.tolist()
            val.append(Y)
            label.append('{} {} {}'.format(info[5], info[2] , label_j))
    
    return val, label
    
def plot_vars_comparison(input_dir, output_dir):
    X_plot = []
    Y_plot = []
    label_plot = []
    
    pcm_file, uprof_file, time_file, reformated_uprof_file, reformated_time_file, all_file, all_plots_file = make_name_list(input_dir)
    
    for i, file_i in enumerate(all_plots_file):    
        info = break_file_name(file_i)
        data_frame = pd.read_csv('{}/{}.csv'.format(input_dir, file_i))
        X_plot.append(data_frame['NewTime'].values.tolist())
            
        Y_tmp = []
        label_tmp = []
        
        for j, (columns_pcm, columns_uprof) in enumerate(zip(pcm_columns_list, uprof_columns_list)):
            if info[0]=='grafana':
                Y, label = get_column_val(data_frame, columns_pcm, label_columns, file_i)            
            #if info[0]=='reformatter_uprof':
            else:
                Y, label = get_column_val(data_frame, columns_uprof, label_columns, file_i)
                
            Y_tmp.append(Y)
            label_tmp.append(label)     
    
        Y_plot.append(Y_tmp)
        label_plot.append(label_tmp)
        
    # Here we make the plot:
    matplotlib.rcParams['font.family'] = 'DejaVu Serif'
    fig, axs = plt.subplots(5, 1, figsize=(14, 18))
    plt.rcParams['axes.grid'] = True
    plt.style.use('default')
    axs = axs.flatten()
    
    for i in range(len(Y_plot)):
        for j in range(len(Y_plot[i])):
            for k in range(len(Y_plot[i][j])): 
                axs[j].plot(X_plot[i], Y_plot[i][j][k], color=color_list[i], label=label_plot[i][j][k], linestyle=linestyle_list[k])
                axs[j].set_ylabel('{}'.format(label_names[j]))
                axs[j].set_xlabel('Time (min)')
                axs[j].legend(loc='center left', bbox_to_anchor=(1, 0.5))
                
    plt.tight_layout()
    plt.savefig('{}/performance_comparison_{}_{}.png'.format(output_dir, info[1], info[4]))
    plt.close()    
    
def convert(s):
    return list(map(lambda x: x, s))

def json_info(file_daqconf, file_cpupins, input_dir, var, pdf, if_pdf=False):   
    with open('{}/cpupins/{}.json'.format(input_dir, file_cpupins), 'r') as ff:
        data = json.load(ff)
        pins = data['daq_application']['--name {}'.format(var)]
        info = json.dumps(pins, skipkeys = True, allow_nan = True)
        data_list = json.loads(info)
        data_threads = convert(data_list['threads'])
        max_file = int(len(data_threads)/3)
       
    with open('{}/daqconfs/{}.json'.format(input_dir, file_daqconf), 'r') as f:
        data0 = json.load(f)
        info0 = json.dumps(data0['boot']['use_connectivity_service'], skipkeys = True, allow_nan = True)
        info1 = json.dumps(data0['readout']['thread_pinning_file'], skipkeys = True, allow_nan = True)
        info2 = json.dumps(data0['readout']['latency_buffer_size'], skipkeys = True, allow_nan = True)
        info3 = json.dumps(data0['readout']['enable_raw_recording'], skipkeys = True, allow_nan = True)
        info4 = json.dumps(data0['readout']['raw_recording_output_dir'], skipkeys = True, allow_nan = True)
        info5 = json.dumps(data0['readout']['generate_periodic_adc_pattern'], skipkeys = True, allow_nan = True)
        info6 = json.dumps(data0['readout']['use_fake_cards'], skipkeys = True, allow_nan = True)
        info7 = json.dumps(data0['readout']['enable_tpg'], skipkeys = True, allow_nan = True)
        info8 = json.dumps(data0['readout']['tpg_threshold'], skipkeys = True, allow_nan = True)
        info9 = json.dumps(data0['readout']['tpg_algorithm'], skipkeys = True, allow_nan = True)
        info10 = json.dumps(data0['hsi']['random_trigger_rate_hz'], skipkeys = True, allow_nan = True)
        if data0['trigger'] == 'trigger_activity_config':
            info11 = json.dumps(data0['trigger']['trigger_activity_config']['prescale'], skipkeys = True, allow_nan = True)
        
        if if_pdf:
            pdf.write(5, 'daqconf file: {} \n'.format(file_daqconf))
            pdf.write(5, '    * use connectivity service: \n'.format(info0))
            pdf.write(5, '    * cpupin file: {} \n'.format(info1))
            pdf.write(5, '        - {} \n'.format(var))
            pdf.write(5, '        - "parent": "{}" \n'.format(data_list['parent']))
            pdf.write(5, '        - "threads": \n')
            
            pdf.set_font('Times', '', 8)
            for i in range(0, max_file):
                pdf.write(5, '                "{}": {}    "{}": {}     "{}": {} \n'.format(data_threads[i], data_list['threads'][data_threads[i]], data_threads[i+max_file], data_list['threads'][data_threads[i+max_file]], data_threads[i+2*max_file], data_list['threads'][data_threads[i+2*max_file]]))
            
            pdf.set_font('Times', '', 10)    
            pdf.write(5, '    * latency buffer size: {} \n'.format(info2))
            pdf.write(5, '    * generate periodic adc pattern: {} \n'.format(info5))
            pdf.write(5, '    * use fake cards: {} \n'.format(info6))
            pdf.write(5, '    * raw recording: {} \n'.format(info3))
            pdf.write(5, '    * location of the data: {} \n'.format(info4))
            pdf.write(5, '    * use tpg: {} \n'.format(info7))
            pdf.write(5, '    * tpg threshold: {} \n'.format(info8))
            pdf.write(5, '    * tpg algorithm: {} \n'.format(info9))
            pdf.write(5, '    * trigger rate: {} \n'.format(info10))
            
            if data0['trigger'] == 'trigger_activity_config':
                pdf.write(5, '    * trigger prescale: {} \n'.format(info11))
            else:
                pdf.write(5, '    * trigger prescale: False \n')
            
            pdf.write(5,'\n')
            
def create_report_performance(input_dir, output_dir, daqconfs_cpupins_folder_parent_dir, process_pcm_files=False, process_uprof_files=False, print_info=True):    
    directory([input_dir, output_dir])
    now = dt.now()
    current_dnt = now.strftime('%Y-%m-%d %H:%M:%S')
    
    pcm_file, uprof_file, time_file, reformated_uprof_file, reformated_time_file, all_file, all_plots_file = make_name_list(input_dir)

    # Open pdf file
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font('Times', 'B', 16)
    pdf.cell(40,10,'Performance Report')
    pdf.write(5,'\n\n')
    
    # Processing the data first
    for i, (file_pcm_i, file_time_i, file_uprof_i) in enumerate(zip(pcm_file, time_file, uprof_file)):
        if process_pcm_files:
            add_new_time_format(input_dir, file_pcm_i)
        if process_uprof_files:
            uprof_timechart_formatter(input_dir, file_time_i)
            uprof_pcm_formatter(input_dir, file_uprof_i)
            combine_time_and_uprof_files(input_dir, file_time_i, file_uprof_i)
            add_new_time_format(input_dir, 'reformatter_{}'.format(file_uprof_i))
    
    info_pcm_basic = break_file_name(all_file[0])
    
    # creating report
    pdf.set_font('Times', '', 10)
    pdf.write(5, 'The tests were ran using the dunedaq verison fddaq-{} and for the WIB{} data format for 8, 16, 24, 32, 40, and 48 streams. The Figure1 shows the comparison of the tests ran (Table1) using the different metrics. \n'.format(info_pcm_basic[1], info_pcm_basic[4]))
    pdf.write(5,'\n')
    plot_vars_comparison(input_dir, output_dir)
    pdf.image('{}/performance_comparison_{}_{}.png'.format(output_dir, info_pcm_basic[1], info_pcm_basic[4]), w=180)
    pdf.write(5, 'Figure1. Comparison of the tests ran using the metrics CPU Utilization (%), Memory Bandwidth (GB/sec), L2 Cache Misses (Million), L3 Cache Misses (Million), CPU Power Consumption (Watt).')
    pdf.write(5,'\n')
    
    #-------------------------------------------TABLE-----------------------------------------------
    data = []
    headers = ['Test', 'Readout app SRV', 'OS', 'NODE', 'Other app SRV']
    data.append(headers)
    
    line_height = pdf.font_size * 2
    col_width = [pdf.epw/3.8, pdf.epw/6.5, pdf.epw/10, pdf.epw/10, pdf.epw/7]
    
    lh_list = [] #list with proper line_height for each row
    use_default_height = 0 #flag
    
    for i, file_i in enumerate(all_file):
        info = break_file_name(file_i)
        line = [info[5], info[2], check_OS(info[2]), info[3], info[6]]
        data.append(line)
    
    for row in data:
        for datum in row:
            word_list = datum.split()
            number_of_words = len(word_list) #how many words
            if number_of_words>2: 
                use_default_height = 1
                new_line_height = pdf.font_size * (number_of_words) #new height change according to data 
        if not use_default_height:
            lh_list.append(line_height)
        else:
            lh_list.append(new_line_height)
            use_default_height = 0
        
    for j, row in enumerate(data):
        for k, datum in enumerate(row):
            line_height = lh_list[j] #choose right height for current row
            pdf.multi_cell(col_width[k], line_height, datum, border=1, align='L', ln=3, max_line_height=pdf.font_size)
               
        pdf.ln(line_height)
    #-------------------------------------------------------------------------------------------------
        
    pdf.write(5, 'Table1. Summary of the tests ran. \n')    
    pdf.write(5,'\n')
    pdf.write(5, 'Configurations: \n', 'B')
    
    for i, file_i in enumerate(all_file):
        info = break_file_name(file_i)
        json_info(file_daqconf='daqconf-{}-{}-{}'.format(info[4], info[5], info[2]), file_cpupins='cpupin-{}-{}-{}'.format(info[4], info[5], info[2]), input_dir=daqconfs_cpupins_folder_parent_dir, var='ru{}{}{}'.format(info[2], info[4], info[3]), pdf=pdf, if_pdf=print_info)
    
    pdf.write(5, '\n\n\n')
    pdf.write(5, 'The End, made on {}'.format(current_dnt))
    pdf.output('{}/performance_report.pdf'.format(output_dir))
    
print('Ready to run and process')

# Proccesing data from Grafana
Note: change the paths to fit yours

To extract the data from a given dashboard in grafana:
* 'grafana_url' is:
    * 'http://np04-srv-009.cern.ch:3000'  (legacy)
    * 'http://np04-srv-017.cern.ch:31023' (new) not working for now use legacy
* 'dashboard_uid' is the unic dashboard identifiyer, you can find this information on the link of the dashboard. The dashboard_uid code is in the web link after/d/.../ 
    * for intel-r-performance-counter-monitor-intel-r-pcm dashboard dashboard_uid = '91zWmJEVk' 
* delta_time is [start, end] given in the format '%Y-%m-%d %H:%M:%S'
* host is the name of the server in study for example: "np02-srv-003"     

file_name: [version]-[server_app_tested]-[numa node]-[data format]-[tests_name]-[server rest_of the apps]
* example of name: v4_1_1-np02srv003-0-eth-stream_scaling-np04srv003

In [None]:
grafana_url = 'http://np04-srv-009.cern.ch:3000' 
dashboard_uid = ['91zWmJEVk']
host_used = 'np02-srv-003'  
delta_time = [['2023-10-01 01:42:30', '2023-10-01 02:54:35'], 
              ['2023-10-06 10:31:52', '2023-10-06 11:42:41'], 
              ['2023-10-05 15:05:45', '2023-10-05 16:17:50'], 
              ['2023-10-05 16:23:30', '2023-10-05 17:36:40']]
output_csv_file = ['NFD23_09_28-np02srv003-0-eth-stream_scaling-np04srv003', 
                   'NFD23_09_28-np02srv003-0-eth-stream_scaling_swtpg-np04srv003', 
                   'NFD23_09_28-np02srv003-0-eth-stream_scaling_recording-np04srv003', 
                   'NFD23_09_28-np02srv003-0-eth-stream_scaling_recording_swtpg-np04srv003']
results_path = '../performance_results'

for delta_time_list, output_csv_file_list in zip(delta_time, output_csv_file):
    extract_data_and_stats_from_panel(grafana_url, dashboard_uid, delta_time=delta_time_list, host=host_used, input_dir=results_path, output_csv_file=output_csv_file_list)
    
print('done :-)')

# Performance report
Note: change the paths to fit yours

In [None]:
results_path = '../performance_results'
report_path = '../reports'
performancetest_path = '../sourcecode/performancetest'

create_report_performance(input_dir=results_path, output_dir=report_path, daqconfs_cpupins_folder_parent_dir=performancetest_path, process_pcm_files=False, process_uprof_files=False, print_info=True)

print('THE END')