In [24]:
BENCHMARK_TOOL = 'NVPROF' # or 'NVPROF'

import subprocess
import numpy as np
import pandas as pd
import io
from typing import Tuple

In [25]:
def read_nvprof(output: str) -> pd.DataFrame:
    csv = output.split('\\n')
    columns = csv[3].split('\",\"')
    data = csv[4:]

    return pd.read_csv(io.StringIO('\n'.join(data)), names=columns)

    
def read_ncu(output: str) -> pd.DataFrame:
    csv = output.split('\\n')
    columns = csv[2].split('\",\"')
    data = csv[4:]

    return pd.read_csv(io.StringIO('\n'.join(data)), names=columns)

def benchmark_kernel(exe_path: str, *args) -> str :
    if BENCHMARK_TOOL == 'NVPROF':
        return str(subprocess.check_output(
            ['nvprof', '--print-gpu-trace', '--csv', exe_path, *args], 
            stderr=subprocess.STDOUT
        ))
    elif BENCHMARK_TOOL == 'NCU':
        return str(subprocess.check_output(
            ['ncu','--csv', exe_path, *args], 
            stderr=subprocess.STDOUT, shell=True
        ))

def read_benchmark(output: str) -> pd.DataFrame:
    if BENCHMARK_TOOL == 'NVPROF':
        return read_nvprof(output)
    elif BENCHMARK_TOOL == 'NCU':
        return read_ncu(output)

def get_kernel_duration(df: pd.DataFrame) -> float:
    if BENCHMARK_TOOL == 'NVPROF':
        return float(df.loc[2, 'Duration']) * 1e-3
    elif BENCHMARK_TOOL == 'NCU':
        return float(df[df['Metric Name'] == 'Duration']['Metric Value'].to_numpy().item().replace(',','')) * 1e-6

In [26]:
arr = []
for i in range(0, 4):
    arr.append(
        get_kernel_duration(
            read_benchmark(benchmark_kernel("./hist", "-f", "./img/chateau.png", "-b", str(i)))
        ) # * 1e3
    )

print(arr) # En milliseconde

[0.025165049000000002, 0.006123537, 0.006125135, 0.006117359]


In [27]:
print("benchmark RGB To HSV:")
read_benchmark(benchmark_kernel("./hist", "-f", "./img/chateau.png", "-b", str(0))) 

banchmark RGB To HSV:


Unnamed: 0,"""Start",Duration,Grid X,Grid Y,Grid Z,Block X,Block Y,Block Z,Registers Per Thread,Static SMem,Dynamic SMem,Size,Throughput,SrcMemType,DstMemType,Device,Context,Stream,Name,"Correlation_ID"""
0,ms,ms,,,,,,,,B,B,MB,GB/s,,,,,,,
1,289.108082,0.156478,,,,,,,,,,1.759644,10.981748,Pageable,Device,NVIDIA Quadro P620 (0),1.0,7.0,[CUDA memcpy HtoD],118.0
2,289.277840,25.138172,32.0,1.0,1.0,1.0,1.0,1.0,19.0,0,0,,,,,NVIDIA Quadro P620 (0),1.0,7.0,"rgb2hsv(unsigned char const *, unsigned int, f...",119.0
3,',,,,,,,,,,,,,,,,,,,


In [28]:
print("benchmark égaliser l'histogramme:")
read_benchmark(benchmark_kernel("./hist", "-f", "./img/chateau.png", "-b", str(3))) 

banchmark égaliser l'histogramme:


Unnamed: 0,"""Start",Duration,Grid X,Grid Y,Grid Z,Block X,Block Y,Block Z,Registers Per Thread,Static SMem,Dynamic SMem,Size,Throughput,SrcMemType,DstMemType,Device,Context,Stream,Name,"Correlation_ID"""
0,ms,ms,,,,,,,,B,B,MB,GB/s,,,,,,,
1,288.981047,0.165407,,,,,,,,,,1.759644,10.388931,Pageable,Device,NVIDIA Quadro P620 (0),1.0,7.0,[CUDA memcpy HtoD],118.0
2,289.150870,8.008897,32.0,32.0,1.0,1.0,1.0,1.0,19.0,0,0,,,,,NVIDIA Quadro P620 (0),1.0,7.0,"rgb2hsv(unsigned char const *, unsigned int, f...",119.0
3,297.160343,8.204735,32.0,1.0,1.0,1.0,1.0,1.0,10.0,0,0,,,,,NVIDIA Quadro P620 (0),1.0,7.0,"histogram(float const *, unsigned int, unsigne...",120.0
4,305.366006,0.037344,32.0,1.0,1.0,1.0,1.0,1.0,28.0,0,0,,,,,NVIDIA Quadro P620 (0),1.0,7.0,"repart(unsigned int const *, unsigned int, uns...",121.0
5,305.403926,13.511733,32.0,1.0,1.0,1.0,1.0,1.0,12.0,0,0,,,,,NVIDIA Quadro P620 (0),1.0,7.0,"equalization(unsigned int const *, unsigned in...",122.0
6,',,,,,,,,,,,,,,,,,,,
