# FAH GPU performance analysis example

**DISCLAIMER: the GPU benchmark data in this repo is experimental and not final. For now, the code and data presented here is intended as an example of parsing performance data from fah-core logs, and of the types of analysis that might be useful to automate intelligent assignment of work units to specific devices.**

Here we analyze performance data (`ns_per_day`) parsed from log files created by `fah-core` for the project `PROJ17113` and `PROJ17111` . This project is intended to be a benchmark suite and contains runs representative of the range of workloads run on FAH today.

In [13]:
import glob
import os
import altair as alt
import numpy as np
import pandas as pd
import seaborn as sns
from fah_log_parser import ParseError, parse, science_log
from fah_log_parser.util.pandas import parse_project_logs

## Parsing a single `science.log` file

In [14]:
log = parse(science_log, "../PROJ17113/RUN0/CLONE0/results0/science.log") # linux 


In [15]:
log = parse(science_log, "../PROJ17113/RUN12/CLONE172/results0/science.log")  # windows

In [16]:
log = parse(science_log, "../PROJ17110/RUN0/CLONE0/results0/science.log")  # older version of log

In [17]:
log.core_header.platform

'win32 10'

In [18]:
platform, device = log.get_active_device()

In [19]:
platform

PlatformInfo(profile='FULL_PROFILE', version='OpenCL 3.0 ', name='Intel(R) OpenCL HD Graphics', vendor='Intel(R) Corporation')

In [20]:
device

Device(name='Intel(R) UHD Graphics 630', vendor='Intel(R) Corporation', version='OpenCL 3.0 NEO ', driver_version='27.20.100.8935')

In [21]:
log.core_log.average_perf_ns_day

9.394

## Parsing all logs in a project

In [22]:
data = parse_project_logs("../PROJ17113/", sample=1000)
data.reset_index().to_feather("checkpoint")
    
data.info()


3201 files found in path ../PROJ17113/






<class 'pandas.core.frame.DataFrame'>
MultiIndex: 943 entries, (10, 132, 0) to (5, 121, 0)
Data columns (total 10 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   os                     943 non-null    object 
 1   platform_name          943 non-null    object 
 2   platform_vendor        943 non-null    object 
 3   platform_version       943 non-null    object 
 4   device_name            943 non-null    object 
 5   device_vendor          943 non-null    object 
 6   device_version         943 non-null    object 
 7   device_driver_version  943 non-null    object 
 8   cuda_enabled           943 non-null    bool   
 9   perf_ns_per_day        942 non-null    float64
dtypes: bool(1), float64(1), object(8)
memory usage: 77.4+ KB


In [23]:
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,os,platform_name,platform_vendor,platform_version,device_name,device_vendor,device_version,device_driver_version,cuda_enabled,perf_ns_per_day
run,clone,gen,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
10,132,0,win32 10,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 12.1.98,NVIDIA GeForce RTX 4070 Ti,NVIDIA Corporation,OpenCL 3.0 CUDA,531.41,True,44.2214
5,17,0,linux 5.15.0-1034-azure,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 12.0.133,NVIDIA GeForce GTX 1080 Ti,NVIDIA Corporation,OpenCL 3.0 CUDA,525.78.01,True,63.6218
3,119,0,win32 10,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 12.0.151,NVIDIA RTX A4500,NVIDIA Corporation,OpenCL 3.0 CUDA,528.89,True,483.2050
12,93,0,linux 5.15.0-1034-azure,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 12.0.133,NVIDIA GeForce GTX 1080 Ti,NVIDIA Corporation,OpenCL 3.0 CUDA,525.78.01,True,187.1220
6,9,0,linux 5.15.0-1034-azure,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 12.0.147,NVIDIA GeForce GTX 970,NVIDIA Corporation,OpenCL 3.0 CUDA,525.89.02,True,44.5413
...,...,...,...,...,...,...,...,...,...,...,...,...
9,50,0,linux 5.15.0-1034-azure,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 12.0.133,NVIDIA GeForce GTX 1080 Ti,NVIDIA Corporation,OpenCL 3.0 CUDA,525.78.01,True,405.6220
8,167,0,win32 10,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 11.7.101,NVIDIA GeForce RTX 3060,NVIDIA Corporation,OpenCL 3.0 CUDA,516.94,True,42.3532
6,106,0,linux 5.15.0-1034-azure,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 12.0.133,NVIDIA GeForce GTX 1080 Ti,NVIDIA Corporation,OpenCL 3.0 CUDA,525.78.01,True,125.4900
4,166,0,win32 10,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 11.7.101,NVIDIA GeForce RTX 3060,NVIDIA Corporation,OpenCL 3.0 CUDA,516.94,True,64.3957


In [12]:
df = (data
 ['perf_ns_per_day']
 .groupby('device_name').filter(lambda x: len(x) > 10)
 .groupby('run').transform(lambda x: (x - x.mean()) / x.std()).rename('z_score_by_run')
 .groupby('device_name').mean()
).reset_index()

alt.Chart(df).mark_bar().encode(
    x=alt.X("z_score_by_run:Q", aggregate='mean'),
    y=alt.Y("device_name:N", sort='-x')
)

KeyError: 'device_name'

In [None]:
df = (data
 .set_index(['run', 'clone', 'gen', 'device_name'])
 ['perf_ns_per_day']
 .groupby(['run', 'device_name']).filter(lambda x: len(x) > 10)
 .groupby('device_name').filter(lambda x: len(x.groupby('run')) == 15)
 .groupby(['run', 'device_name']).mean()
 .rename('mean_ns_per_day')
).reset_index()

alt.Chart(df).mark_point(filled=True).encode(
    x=alt.X('mean_ns_per_day:Q', scale=alt.Scale(type='log')),
    y=alt.Y('device_name:O', sort='-x'),
    color='run:N',
)

In [None]:
sns.clustermap(
    data
    .groupby(['run', 'device_name']).filter(lambda x: len(x) > 10)
    .groupby('device_name').filter(lambda x: len(x.groupby('run')) == 15)
    .groupby(['run', 'device_name'])
    ['perf_ns_per_day']
    .mean()
    .pipe(lambda x: np.log(x))
    .unstack('run'))