# Import the modules needed, defining paths and funtions  

In [65]:
import os
import sys
import csv
import time
import glob
import json
import requests
import re
import shutil
import subprocess
import matplotlib
import matplotlib.pyplot as plt 
import matplotlib.cm as cm
import matplotlib.colors as mcolors
import matplotlib.dates as mdates
import numpy as np
import pickle as pkl
import pandas as pd
from pathlib import Path
from datetime import datetime as dt
from fpdf import FPDF  

label_names = ['CPU Utilization (%)', 
               'Memory Bandwidth (GB/sec)',
               'L2 Cache Misses (Million)', 
               'L3 Cache Misses (Million)',
               'CPU Power Consumption (Watt)']

pcm_columns_list = [['C0 Core C-state residency'],
                    ['Socket0 Memory Bandwidth', 'Socket1 Memory Bandwidth'],
                    ['Socket0 L2 Cache Misses', 'Socket1 L2 Cache Misses'], 
                    ['Socket0 L3 Cache Misses', 'Socket1 L3 Cache Misses'], 
                    ['Package Joules Consumed Socket0 Energy Consumption', 'Package Joules Consumed Socket1 Energy Consumption']]
uprof_columns_list = [['CPU Utilization'], 
                      ['Total Mem Bw (GB/s) Socket0', 'Total Mem Bw (GB/s) Socket1'],
                      ['L2 Miss (pti) Socket0', 'L2 Miss (pti) Socket1'],
                      ['L3 Miss Socket0', 'L3 Miss Socket1'], 
                      ['socket0-package-power','socket1-package-power']]
alma9_os = ['np02srv004']

#for pcm ['Socket0 L2 Cache Misses Per Instruction', 'Socket1 L2 Cache Misses Per Instruction']
#for uprof [' Utilization (%) Socket0', 'Utilization (%) Socket1', 'L2 Hit Ratio Socket0', 'L2 Hit Ratio Socket1']

label_columns = ['Socket0', 'Socket1'] 
color_list = ['red', 'blue', 'green', 'cyan', 'orange', 'yellow', 'magenta', 'lime', 'purple', 'navy', 'hotpink', 'olive', 'salmon', 'teal', 'darkblue', 'darkgreen', 'darkcyan', 'darkorange', 
              'deepskyblue', 'darkmagenta', 'sienna', 'chocolate', 'orangered', 'gray', 'royalblue', 'gold', 'peru', 'seagreen', 'violet', 'tomato', 'lightsalmon', 'crimson', 'lightblue', 
              'lightgreen', 'lightpink', 'black', 'darkgray', 'lightgray', 'saddlebrown', 'brown', 'khaki', 'tan', 'turquoise', 'linen', 'lawngreen', 'coral']
linestyle_list = ['solid', 'dotted', 'dashed', 'dashdot']

marker_list = ['s','o','.','p','P','^','<','>','*','+','x','X','d','D','h','H']

def directory(input_dir):
    # Create directory (if it doesn't exist yet):
    for dir_path in input_dir:
        if not os.path.exists(dir_path):
            os.makedirs(dir_path)
            
def get_unix_timestamp(time_str):
    formats = ['%Y-%m-%d %H:%M:%S.%f', '%Y-%m-%d %H:%M:%S']
    for fmt in formats:
        try:
            timestamp = dt.strptime(time_str, fmt).timestamp()
            return int(timestamp * 1000) if '.' in time_str else int(timestamp)
        except ValueError:
            pass
    raise ValueError('Invalid time format: {}'.format(time_str))

def make_column_list(file, input_dir):
    data_frame = pd.read_csv('{}/{}.csv'.format(input_dir, file))
    columns_list = list(data_frame.columns) 
    ncoln=len(columns_list) 
    return columns_list

def datenum(d, d_base):
    t_0 = d_base.toordinal() 
    t_1 = dt.fromordinal(t_0)
    T = (d - t_1).total_seconds()
    return T

def is_hidden(input_dir):
    name = os.path.basename(os.path.abspath(input_dir))
    if name.startswith('.'):
        return 'true'
    else:
        return 'false'

def make_file_list(input_dir):
    file_list =  []
    for root, dirs, files in os.walk(input_dir):
        for name in files:
            if is_hidden(name) == 'true':  
                print (name, ' is hidden, trying to delete it')
                os.remove(os.path.join(input_dir, name))
            else:
                file_list.append(os.path.join(input_dir, name))
                
    return file_list

def make_name_list(input_dir):
    l=os.listdir(input_dir)
    name_list=[x.split('.')[0] for x in l]
    
    pcm_list = []
    uprof_list = []
    time_list = []
    reformated_uprof_list = []
    reformated_time_list = []
    
    for i, name_i in enumerate(name_list):
        if 'reformatter_uprof-' in name_i:
            reformated_uprof_list.append(name_i)
        elif 'reformatter_timechart-' in name_i:
            reformated_time_list.append(name_i)
        elif 'uprof-' in name_i:
            uprof_list.append(name_i)
        elif 'timechart-' in name_i:
            time_list.append(name_i)
        elif 'grafana-' in name_i:
            pcm_list.append(name_i)
        else:
            pass

    return pcm_list, uprof_list, time_list, reformated_uprof_list, reformated_time_list

def fetch_grafana_panels(grafana_url, dashboard_uid):
    # Get dashboard configuration
    dashboard_url = '{}/api/dashboards/uid/{}'.format(grafana_url, dashboard_uid)   
    response = requests.get(dashboard_url)
    
    if response.status_code != 200:
        print('Error in fetch_grafana_panels: Failed to fetch dashboard data. Status code: ', response.status_code)
        return None
    
    dashboard_data = response.json()
    # Extract panels data
    panels = dashboard_data['dashboard']['panels']
    return panels

def get_query_urls(panel, host):
    targets = panel.get('targets', [])
    queries = []
    queries_label = []
    for target in targets:
        if 'expr' in target:
            query = target['expr'].replace('${host}', host)
            queries.append(query)
            queries_label.append(target['legendFormat'])
    
    if queries:
        return queries, queries_label
    else:
        return None

def extract_data_and_stats_from_panel(grafana_url, dashboard_uid, delta_time, host, input_dir, output_csv_file):
    for dashboard_uid_to_use in dashboard_uid:
        panels_data = fetch_grafana_panels(grafana_url, dashboard_uid_to_use)
        if not panels_data:
            print('Error in extract_data_and_stats_from_panel: Failed to fetch dashboard panels data.')
            return
        
        url = '{}/api/datasources/proxy/1/api/v1/query_range'.format(grafana_url)
        start_timestamp = get_unix_timestamp(delta_time[0])
        end_timestamp = get_unix_timestamp(delta_time[1])
        all_dataframes = []
        
        for panel_i, panel in enumerate(panels_data):
            if 'targets' not in panel:
                print('Skipping panel ', panel['title'], ' with no targets.')
                continue
            
            query_urls, queries_label = get_query_urls(panel, host)
            if not query_urls:
                print('Skipping panel ', panel['title'], ' with no valid query URL.')
                continue
            
            for i, query_url in enumerate(query_urls):
                column_name = '{} {}'.format(queries_label[i], panel['title'])
                data = {
                    'query': query_url,
                    'start': start_timestamp,
                    'end': end_timestamp,
                    'step': 2
                }

                response = requests.post(url, data=data)
                response_data = response.json()
                
                if response.status_code != 200:
                    print('Error: Failed to fetch dashboard data. Status code:content ', response.status_code, ':', response.content)
                    print('Response panel:data:content for panel ', panel['title'], ':', response_data, ':', response.content)
                    return None

                if 'data' not in response_data or 'resultType' not in response_data['data'] or response_data['data']['resultType'] != 'matrix':
                    print('Skipping query with no valid response in panel: ', panel['title'])
                    continue

                result = response_data['data']['result'][0]
                metric = result['metric']
                values = result.get('values', [])
                values_without_first_column = [row[1:] for row in values]

                if not values:
                    print('Skipping query with no valid response in panel: ', panel['title'])
                    continue

                timestamps = [val[0] for val in values]
                df_first = pd.DataFrame(values, columns=['Timestamp', column_name])
                df_first['Timestamp'] = pd.to_datetime(df_first['Timestamp'], unit='s')
                df = pd.DataFrame(values_without_first_column, columns=[column_name])
                
                if panel_i == 0 and i == 0: 
                    df_tmp = df_first
                else:
                    df_tmp = df
                
                all_dataframes.append(df_tmp)

        # Combine all dataframes into a single dataframe
        combined_df = pd.concat(all_dataframes, axis=1)

        # Save the combined dataframe as a CSV file
        output = '{}/grafana-{}.csv'.format(input_dir, output_csv_file)
        try:
            combined_df.to_csv(output, index=False)
            print('Data saved to CSV successfully:', output)
        except Exception as e:
            print('Exception Error: Failed to save data to CSV:', str(e))

def uprof_pcm_formatter(input_dir, file):
    #data_frame = pd.read_csv('{}/{}.csv'.format(input_dir, file), skiprows=[1, 47], error_bad_lines=False) 

    newfile = 'reformatter_{}'.format(file)
    f = open('{}/{}.csv'.format(input_dir, file),'r')
    f_new = open('{}/{}.csv'.format(input_dir, newfile),'w')
    
    for line in f:
        # extract initial time
        if 'Profile Time:' in line:
            full_date = line[14:-1]
            full_date = full_date.replace('/', '-')
            msec0 = int(full_date[20:23])
            sec0  = int(full_date[17:19])
            min0  = int(full_date[14:16])
            hour0 = int(full_date[11:13])
            day0  = int(full_date[8:10])
        
        # append package numbers to headers,
        # and add headers for l2 cache hit ratio
        if 'Package' in line:
            header1 = line.split(',')
        if 'Timestamp' in line:
            header2 = line.split(',')[1:]

            package_num = '0'
            header_new = ['Timestamp']
            for package,header in zip(header1,header2):
                if (package=='\n') or (header=='\n'):
                    header_new += ['L2 Hit Ratio Socket0', 'L2 Hit Ratio Socket1', 'CPU Utilization', '\n']
                    header_new_str = ','.join(header_new)
                    f_new.write(header_new_str)
                if 'Package' in package:
                    package_num = package[-1]
                header_new += [header+' Socket' + package_num]
          
        # generate full timestamps
        if re.search('..:..:..:...,', line):
            msec_n_old = int(line[9:12])
            sec_n_old = int(line[6:8])
            min_n_old = int(line[3:5])
            hour_n_old = int(line[0:2])
            
            msec_n = (msec_n_old + msec0) % 1000
            msec_carryover = (msec_n_old + msec0) // 1000
            sec_n  = (sec_n_old + sec0 + msec_carryover) % 60
            sec_carryover  = (sec_n_old + sec0 + msec_carryover) // 60
            min_n  = (min_n_old + min0 + sec_carryover) % 60
            min_carryover = (min_n_old + min0 + sec_carryover) // 60
            hour_n = (hour_n_old + hour0 + min_carryover) % 24
            hour_carryover = (hour_n_old + hour0 + min_carryover) // 24
            day_n  = (day0 + hour_carryover)
            date_n = '{year_month}-{day:02d} {hour:02d}:{min:02d}:{sec:02d}'.format(year_month=full_date[0:7], day=day_n, hour=hour_n, min=min_n, sec=sec_n)
            line_n = re.sub('..:..:..:...', date_n, line)
            
            # calculate L2 Hit ratio
            line_list = line_n.split(',')
            l2_hit_ratio_0 = float(line_list[19]) / (float(line_list[19]) + float(line_list[15])) * 100
            l2_hit_ratio_0 = str(round(l2_hit_ratio_0, 2))
            l2_hit_ratio_1 = float(line_list[41]) / (float(line_list[41]) + float(line_list[37])) * 100
            l2_hit_ratio_1 = str(round(l2_hit_ratio_1, 2))

            # CPU Utilization
            cpu_utiliz = float(line_list[1]) + float(line_list[22])
            cpu_utiliz = str(round(cpu_utiliz, 2))
            
            line_list[-1] = l2_hit_ratio_0
            line_list.append(l2_hit_ratio_1)
            line_list.append(cpu_utiliz)
            line_list.append('\n')
            line_n = ','.join(line_list)
            f_new.write(line_n)   
            
    f.close()
    f_new.close()
    
def uprof_timechart_formatter(input_dir, file):
    newfile = 'reformatter_{}'.format(file)
    f = open('{}/{}.csv'.format(input_dir, file),'r')
    f_new = open('{}/{}.csv'.format(input_dir, newfile),'w')

    header = True
    for line in f:
        # get & reformat full date
        if 'Profile Start Time:' in line:
            full_date = line.split(',')[1]
            month = month2num(full_date[0:3])
            date = int(full_date[4:6])
            year = int(full_date[7:11])
            full_date_new = '{year}-{month:02d}-{date:02d}'.format(year=year, month=month, date=date)

        # Reformat timestamps
        if not header:
            timestamp_n = line.split(',')[1]
            timestamp_n = timestamp_n.split(':')
            hour_n = int(timestamp_n[0])
            min_n = int(timestamp_n[1])
            sec_n = int(timestamp_n[2])
            date_n = ',{year_month_day} {hour:02d}:{min:02d}:{sec:02d},'.format(year_month_day=full_date_new, hour=hour_n, min=min_n, sec=sec_n)

            line_n = re.sub(',.*:.*:.*:...,', date_n, line)
            f_new.write(line_n)

        # header=False indicates next line is data
        if 'Timestamp' in line:
            header = False
            f_new.write(line)

    f.close()
    f_new.close()

def month2num(month_str):
    if month_str=='Jan':
        return 1
    elif month_str=='Feb':
        return 2
    elif month_str=='Mar':
        return 3
    elif month_str=='Apr':
        return 4
    elif month_str=='May':
        return 5
    elif month_str=='Jun':
        return 6
    elif month_str=='Jul':
        return 7
    elif month_str=='Aug':
        return 8
    elif month_str=='Sep':
        return 9
    elif month_str=='Oct':
        return 10
    elif month_str=='Nov':
        return 11
    elif month_str=='Dec':
        return 12
    else:
        print('Warning: invalid month')

def combine_time_and_uprof_files(input_dir, time_file, uprof_file):
    input_file0 = '{}/reformatter_{}.csv'.format(input_dir, time_file)
    input_file1 = '{}/reformatter_{}.csv'.format(input_dir, uprof_file)
        
    data_frame0 = pd.read_csv(input_file0) 
    data_frame1 = pd.read_csv(input_file1) 
    df_merged = data_frame0.merge(data_frame1, how='outer')
    
    # Save the combined dataframe as a CSV file
    output = '{}/reformatter_{}.csv'.format(input_dir, uprof_file)
    try:
        df_merged.to_csv(output, index=False)
        print('Data saved to CSV successfully:', output)
    except Exception as e:
        print('Error in combine_time_and_uprof_files: Failed to save data to CSV:', str(e))
    
def add_new_time_format(input_dir, file):
    data_frame = pd.read_csv('{}/{}.csv'.format(input_dir, file))  

    # Add new time format
    newtime=[]
    x_0 = data_frame['Timestamp'][0]
    d_0 = dt.strptime(x_0,'%Y-%m-%d %H:%M:%S')
    for index, value in enumerate(data_frame['Timestamp']):   
        d = dt.strptime(value,'%Y-%m-%d %H:%M:%S')
        d_new = (datenum(d, d_0)-datenum(d_0, d_0))/60.
        newtime.append(d_new) 

    data_frame.insert(0, 'NewTime', newtime, True)
    data_frame.to_csv('{}/{}.csv'.format(input_dir, file), index=False)
    
def get_column_val(df, columns, labels, file):
    val = []
    label = []
    info = break_file_name(file)
    
    for j, (columns_j, label_j) in enumerate(zip(columns, labels)):
        if columns_j in ['NewTime', 'Timestamp']:
            pass
        elif columns_j in ['C0 Core C-state residency', 'CPU Utilization']:
            Y_tmp = df[columns_j].mul(1)
            Y = Y_tmp.values.tolist()
            val.append(Y)
            label.append('{} {}'.format(info[5], info[2]))
        elif columns_j in ['Socket0 L3 Cache Misses Per Instruction', 'Socket1 L3 Cache Misses Per Instruction', 'L2 Miss (pti) Socket0', 'L2 Miss (pti) Socket1']:
            Y_tmp = df[columns_j].mul(1)
            Y = Y_tmp.values.tolist()
            val.append(Y)
            label.append('{} {} {}'.format(info[5], info[2] , label_j))
        elif columns_j in ['Socket0 L2 Cache Misses', 'Socket1 L2 Cache Misses', 'Socket0 L3 Cache Misses', 'Socket1 L3 Cache Misses']:
            Y_tmp = df[columns_j].mul(1)
            Y = Y_tmp.values.tolist()
            val.append(Y)
            label.append('{} {} {}'.format(info[5], info[2] , label_j))
        elif columns_j in ['L3 Miss Socket0', 'L3 Miss Socket1']:
            Y_tmp = df[columns_j].div(1000000000)
            Y = Y_tmp.values.tolist()
            val.append(Y)
            label.append('{} {} {}'.format(info[5], info[2] , label_j))
        elif columns_j in ['Socket0 Memory Bandwidth', 'Socket1 Memory Bandwidth']:
            Y_tmp = df[columns_j].div(1000)
            Y = Y_tmp.values.tolist()
            val.append(Y)
            label.append('{} {} {}'.format(info[5], info[2] , label_j))
        elif columns_j in ['L2 Hit Ratio Socket0', 'L2 Hit Ratio Socket1']:
            Y_tmp = df[columns_j].div(1)
            Y = Y_tmp.values.tolist()
            val.append(Y)
            label.append('{} {} {}'.format(info[5], info[2] , label_j))
        elif columns_j in ['Socket0 L2 Cache Misses Per Instruction', 'Socket1 L2 Cache Misses Per Instruction']:
            Y_tmp = df[columns_j].mul(100)
            Y = Y_tmp.values.tolist()
            val.append(Y)
            label.append('{} {} {}'.format(info[5], info[2] , label_j))
        else:
            Y = df[columns_j].values.tolist()
            val.append(Y)
            label.append('{} {} {}'.format(info[5], info[2] , label_j))
    
    return val, label

def plot_vars_comparison(input_dir, output_dir, pcm_file, uprof_file):
    X_pcm_plot = []
    X_uprof_plot = []
    Y_pcm_plot = []
    Y_uprof_plot = []
    label_pcm_plot = []
    label_uprof_plot = []
    
    for i, (file_pcm_i, file_uprof_i) in enumerate(zip(pcm_file, uprof_file)):    
        refor_uprof_file = 'reformatter_{}'.format(file_uprof_i)
    
        info_pcm = break_file_name(file_pcm_i)
        info_uprof = break_file_name(refor_uprof_file)

        data_frame0 = pd.read_csv('{}/{}.csv'.format(input_dir, file_pcm_i))
        data_frame1 = pd.read_csv('{}/{}.csv'.format(input_dir, refor_uprof_file))

        X_pcm_plot.append(data_frame0['NewTime'].values.tolist())
        X_uprof_plot.append(data_frame1['NewTime'].values.tolist())
        
        Y_pcm_tmp = []
        Y_uprof_tmp = []
        label_pcm_tmp = []
        label_uprof_tmp = []
        color_pcm_plot = []
        color_uprof_plot = []
        
        for j, (columns_pcm, columns_uprof) in enumerate(zip(pcm_columns_list, uprof_columns_list)):
            Y_pcm, label_pcm = get_column_val(data_frame0, columns_pcm, label_columns, file_pcm_i)
            Y_pcm_tmp.append(Y_pcm)
            label_pcm_tmp.append(label_pcm)     
            color_pcm_plot.append(color_list[j])

            Y_uprof, label_uprof = get_column_val(data_frame1, columns_uprof, label_columns, refor_uprof_file)
            Y_uprof_tmp.append(Y_uprof)
            label_uprof_tmp.append(label_uprof)
            k=len(color_list)-j-1
            color_uprof_plot.append(color_list[k])

        Y_pcm_plot.append(Y_pcm_tmp)
        Y_uprof_plot.append(Y_uprof_tmp)
        label_pcm_plot.append(label_pcm_tmp)
        label_uprof_plot.append(label_uprof_tmp)

    # Here we make the plot:
    matplotlib.rcParams['font.family'] = 'DejaVu Serif'
    #fig, axs = plt.subplots(3, 2, figsize=(15,10))
    fig, axs = plt.subplots(5, 1, figsize=(14,16))
    plt.rcParams['axes.grid'] = True
    plt.style.use('default')
    axs = axs.flatten()
    #gs = axs[2, 0].get_gridspec()
    #for ax in axs[4:]:
    #    ax.remove()
    #axs[4]= axbig = fig.add_subplot(gs[4:])
    
    for i in range(len(Y_pcm_plot)):
        for j in range(len(Y_pcm_plot[i])):
            for k in range(len(Y_pcm_plot[i][j])):              
                axs[j].plot(X_pcm_plot[i], Y_pcm_plot[i][j][k], color=color_pcm_plot[k], label=label_pcm_plot[i][j][k], linestyle=linestyle_list[i])
                axs[j].plot(X_uprof_plot[i], Y_uprof_plot[i][j][k], color=color_uprof_plot[k], label=label_uprof_plot[i][j][k], linestyle=linestyle_list[i])
                axs[j].set_ylabel('{}'.format(label_names[j]))
                axs[j].set_xlabel('Time (min)')
                #axs[j].legend(loc='upper left')
                axs[j].legend(loc='center left', bbox_to_anchor=(1, 0.5))
                
    plt.tight_layout()
    #plt.show()
    plt.savefig('{}/performance_comparison_{}_{}.png'.format(output_dir, info_pcm[1], info_pcm[4]))
    plt.close()
    
def break_file_name(file):
    info=file.split('-')
    return info

def check_OS(server):
    OS = 'Centos8'
    if server in alma9_os:
        OS = 'Alma9'
    return OS     
    
def json_info(file, input_dir, var, pdf, if_cpupins=False, if_pdf=False):
    pdf.write(5, '====================== {} ====================== \n'.format(file))
    with open('{}/{}.json'.format(input_dir, file), 'r') as f:
        data = json.load(f)
        if if_cpupins:
            pins = data['daq_application']['--name {}'.format(var)]
            info = json.dumps(pins, skipkeys = True, allow_nan = True)
            if if_pdf:
                pdf.write(5, 'readout: {} \n'.format(var))
                pdf.write(5, '{} \n'.format(info))
        else:   
            for var1 in ['readout', 'hsi']:
                pins = data[var1]
                #info = json.dumps(pins, indent = 4, skipkeys = True, allow_nan = True)
                info = json.dumps(pins, skipkeys = True, allow_nan = True)
                if if_pdf:
                    pdf.write(5, '{} \n'.format(var1))
                    pdf.write(5, '{} \n'.format(info))
                    
def create_report_performance(input_dir, output_dir, pcm_file, time_file, uprof_file, daqconfs_cpupins_folder_parent_dir, process_pcm_files=False, process_uprof_files=False, print_info=True):    
    directory([input_dir, output_dir])
    now = dt.now()
    current_dnt = now.strftime('%Y-%m-%d %H:%M:%S')
    
    # Open pdf file
    pdf = FPDF()
    pdf.add_page()
    pdf.set_font('Arial', 'B', 16)
    pdf.cell(40,10,'Performance Report')
    pdf.write(5,'\n\n')
    
    # Processing the data first
    for i, (file_pcm_i, file_time_i, file_uprof_i) in enumerate(zip(pcm_file, time_file, uprof_file)):
        if process_pcm_files:
            add_new_time_format(input_dir, file_pcm_i)
        if process_uprof_files:
            uprof_timechart_formatter(input_dir, file_time_i)
            uprof_pcm_formatter(input_dir, file_uprof_i)
            combine_time_and_uprof_files(input_dir, file_time_i, file_uprof_i)
            add_new_time_format(input_dir, 'reformatter_{}'.format(file_uprof_i))
    
    info_pcm_basic = break_file_name(pcm_file[0])

    pdf.set_font('Arial', '', 8)
    pdf.write(5, 'dunedaq verison: fddaq-{} \n'.format(info_pcm_basic[1]))
    pdf.write(5, 'data format: WIB{} \n'.format(info_pcm_basic[4]))
    pdf.write(5, 'These tests where run for 8, 16, 24, 32, 40, and 48 streams. \n')
        
    for i, (file_pcm_i, file_uprof_i) in enumerate(zip(pcm_file, uprof_file)):
        info_pcm = break_file_name(file_pcm_i)
        info_uprof = break_file_name(file_uprof_i)
        
        pdf.write(5, 'The test {} made were: \n'.format(info_pcm[5]))
        pdf.write(5, '- Running the readout app on server {} ({}) using NUMA NODE {} and running the rest of the apps on server {}. \n'.format(info_pcm[2], check_OS(info_pcm[2]), info_pcm[3], info_pcm[6]))
        pdf.write(5, '- Running the readout app on server {} ({}) using NUMA NODE {} and running the rest of the apps on server {}. \n'.format(info_uprof[2], check_OS(info_uprof[2]), info_uprof[3], info_uprof[6]))
      
    pdf.write(5,'\n')
    plot_vars_comparison(input_dir, output_dir, pcm_file, uprof_file)
    pdf.image('{}/performance_comparison_{}_{}.png'.format(output_dir, info_pcm[1], info_pcm[4]), w=180)
    pdf.write(5,'The figure shows the comparison of the tests ran (mentioned before) using the metrics CPU Utilization (%), Memory Bandwidth (GB/sec), L2 Cache Misses (Million), L3 Cache Misses (Million), CPU Power Consumption (Watt).')
    pdf.write(5,'\n\n')
    
    for i, (file_pcm_i, file_uprof_i) in enumerate(zip(pcm_file, uprof_file)):
        info_pcm = break_file_name(file_pcm_i)
        info_uprof = break_file_name(file_uprof_i)
        
        json_info(file='daqconf-{}-{}-{}'.format(info_pcm[4], info_pcm[5], info_pcm[2]), input_dir='{}/daqconfs'.format(daqconfs_cpupins_folder_parent_dir), var=' ', pdf=pdf, if_cpupins=False, if_pdf=print_info)
        json_info(file='daqconf-{}-{}-{}'.format(info_uprof[4], info_uprof[5], info_uprof[2]), input_dir='{}/daqconfs'.format(daqconfs_cpupins_folder_parent_dir), var=' ', pdf=pdf, if_cpupins=False, if_pdf=print_info)
        json_info(file='cpupin-{}-{}-{}'.format(info_pcm[4], info_pcm[5], info_pcm[2]), input_dir='{}/cpupins'.format(daqconfs_cpupins_folder_parent_dir), var='ru{}{}{}'.format(info_pcm[2], info_pcm[4], info_pcm[3]), pdf=pdf, if_cpupins=True, if_pdf=print_info)
        json_info(file='cpupin-{}-{}-{}'.format(info_uprof[4], info_uprof[5], info_uprof[2]), input_dir='{}/cpupins'.format(daqconfs_cpupins_folder_parent_dir), var='ru{}{}{}'.format(info_uprof[2], info_uprof[4], info_uprof[3]), pdf=pdf, if_cpupins=True, if_pdf=print_info)
        
    pdf.write(10,'\n\n\n')
    pdf.write(5,'The End, made on {}'.format(current_dnt))
    pdf.output('{}/performance_report.pdf'.format(output_dir))

print('Ready to run and process')

Ready to run and process


# Proccesing data from Grafana and UPROF
Note: change the paths to fit yours

To extract the data from a given dashboard in grafana:
* 'grafana_url' is:
    * 'http://np04-srv-009.cern.ch:3000'  (legacy)
    * 'http://np04-srv-017.cern.ch:31023' (new) not working for now use legacy
* 'dashboard_uid' is the unic dashboard identifiyer, you can find this information on the link of the dashboard. The dashboard_uid code is in the web link after/d/.../ 
    * for intel-r-performance-counter-monitor-intel-r-pcm dashboard dashboard_uid = '91zWmJEVk' 
* delta_time is [start, end] given in the format '%Y-%m-%d %H:%M:%S'
* host is the name of the server in study for example: "np02-srv-003"     

file_name: [version]-[server_app_tested]-[numa node]-[data format]-[tests_name]-[server rest_of the apps]
* example of name: v4_1_1-np02srv003-0-eth-stream_scaling-np04srv003

In [66]:
grafana_url = 'http://np04-srv-009.cern.ch:3000' 
dashboard_uid = ['91zWmJEVk']
host_used = 'np02-srv-003'  
delta_time = [['2023-09-19 14:30:10', '2023-09-19 15:41:02'],['2023-09-19 15:49:33', '2023-09-19 17:01:30']]
output_csv_file = ['NFD23_09_12-np02srv003-0-eth-stream_scaling-np04srv003', 'NFD23_09_12-np02srv003-0-eth-stream_scaling_recording-np04srv003']
results_path = '/eos/home-d/dvargas/SWAN_projects/files_servers_performance'

for delta_time_list, output_csv_file_list in zip(delta_time, output_csv_file):
    extract_data_and_stats_from_panel(grafana_url, dashboard_uid, delta_time=delta_time_list, host=host_used, input_dir=results_path, output_csv_file=output_csv_file_list)
    
print('done :-)')

Data saved to CSV successfully: /eos/home-d/dvargas/SWAN_projects/files_servers_performance/grafana-NFD23_09_12-np02srv003-0-eth-stream_scaling-np04srv003.csv
Data saved to CSV successfully: /eos/home-d/dvargas/SWAN_projects/files_servers_performance/grafana-NFD23_09_12-np02srv003-0-eth-stream_scaling_recording-np04srv003.csv
done :-)


# Performance report
Note: change the paths to fit yours

In [67]:
results_path = '/eos/home-d/dvargas/SWAN_projects/files_servers_performance'
report_path = '/eos/home-d/dvargas/SWAN_projects/reports'
performancetest_path = '/eos/home-d/dvargas/UtilsDune/files_for_v4_perf_tests'
pcm_list, uprof_list, time_list, reformated_uprof_list, reformated_time_list = make_name_list(results_path)

create_report_performance(input_dir=results_path, output_dir=report_path, pcm_file=pcm_list, time_file=time_list, uprof_file=uprof_list, daqconfs_cpupins_folder_parent_dir=performancetest_path, process_pcm_files=True, process_uprof_files=False, print_info=True)

print('THE END')

THE END
