# FAH GPU performance analysis example

**DISCLAIMER: the GPU benchmark data in this repo is experimental and not final. For now, the code and data presented here is intended as an example of parsing performance data from fah-core logs, and of the types of analysis that might be useful to automate intelligent assignment of work units to specific devices.**

Here we analyze performance data (`ns_per_day`) parsed from log files created by `fah-core` for the project `PROJ17113` and `PROJ17111` . This project is intended to be a benchmark suite and contains runs representative of the range of workloads run on FAH today.

In [1]:
import glob
import os
import altair as alt
import numpy as np
import pandas as pd
import seaborn as sns
from fah_log_parser import ParseError, parse, science_log
from fah_log_parser.util.pandas import parse_project_logs

* 'allow_mutation' has been removed


## Parsing a single `science.log` file

In [2]:
log = parse(science_log, "../PROJ17113/RUN0/CLONE0/results0/science.log") # linux 


In [3]:
log = parse(science_log, "../PROJ17113/RUN12/CLONE172/results0/science.log")  # windows

In [None]:
log = parse(science_log, "../PROJ17111/RUN0/CLONE0/results0/science.log")  # older version of log

In [5]:
log.core_header.platform

'linux 5.11.0-1025-azure'

In [6]:
platform, device = log.get_active_device()

In [7]:
platform

PlatformInfo(profile='FULL_PROFILE', version='OpenCL 3.0 CUDA 11.5.103', name='NVIDIA CUDA', vendor='NVIDIA Corporation')

In [8]:
device

Device(name='NVIDIA GeForce GTX 980 Ti', vendor='NVIDIA Corporation', version='OpenCL 3.0 CUDA', driver_version='495.46')

In [9]:
log.core_log.average_perf_ns_day

1083.61

## Parsing all logs in a project

In [10]:
if os.path.exists("checkpoint"):
    data = pd.read_feather("checkpoint")
else:
    data = parse_project_logs("../PROJ17113/", sample=1000)
    data.reset_index().to_feather("checkpoint")
    
data.info()


3201 files found in path ../PROJ17113/






<class 'pandas.core.frame.DataFrame'>
MultiIndex: 925 entries, (7, 164, 0) to (9, 80, 0)
Data columns (total 10 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   os                     925 non-null    object 
 1   platform_name          925 non-null    object 
 2   platform_vendor        925 non-null    object 
 3   platform_version       925 non-null    object 
 4   device_name            925 non-null    object 
 5   device_vendor          925 non-null    object 
 6   device_version         925 non-null    object 
 7   device_driver_version  925 non-null    object 
 8   cuda_enabled           925 non-null    bool   
 9   perf_ns_per_day        925 non-null    float64
dtypes: bool(1), float64(1), object(8)
memory usage: 76.0+ KB


In [11]:
data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,os,platform_name,platform_vendor,platform_version,device_name,device_vendor,device_version,device_driver_version,cuda_enabled,perf_ns_per_day
run,clone,gen,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
7,164,0,win32 10,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 11.7.101,NVIDIA GeForce RTX 3060,NVIDIA Corporation,OpenCL 3.0 CUDA,516.94,True,127.82900
8,195,0,linux 5.15.0-1034-azure,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 12.0.151,NVIDIA GeForce GTX 1070,NVIDIA Corporation,OpenCL 3.0 CUDA,525.105.17,True,35.72810
11,191,0,linux 5.15.0-1034-azure,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 12.0.151,NVIDIA GeForce GTX 1070,NVIDIA Corporation,OpenCL 3.0 CUDA,525.105.17,True,8.49244
12,74,0,linux 5.15.0-1034-azure,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 12.0.133,NVIDIA GeForce GTX 1080 Ti,NVIDIA Corporation,OpenCL 3.0 CUDA,525.78.01,True,188.69500
9,79,0,linux 5.15.0-1034-azure,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 12.0.151,NVIDIA GeForce GTX 970,NVIDIA Corporation,OpenCL 3.0 CUDA,525.105.17,True,244.06100
...,...,...,...,...,...,...,...,...,...,...,...,...
13,3,0,linux 5.15.0-1034-azure,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 12.0.133,NVIDIA GeForce GTX 1080 Ti,NVIDIA Corporation,OpenCL 3.0 CUDA,525.78.01,True,57.40890
11,55,0,linux 5.15.0-1034-azure,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 12.0.133,NVIDIA GeForce GTX 1080 Ti,NVIDIA Corporation,OpenCL 3.0 CUDA,525.78.01,True,12.92010
14,134,0,win32 10,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 12.1.98,NVIDIA GeForce RTX 4070 Ti,NVIDIA Corporation,OpenCL 3.0 CUDA,531.41,True,223.63800
11,108,0,linux 5.15.0-1034-azure,NVIDIA CUDA,NVIDIA Corporation,OpenCL 3.0 CUDA 12.0.133,NVIDIA GeForce GTX 1080 Ti,NVIDIA Corporation,OpenCL 3.0 CUDA,525.78.01,True,13.03150


In [13]:
df = (data
 ['perf_ns_per_day']
 .groupby('device_name').filter(lambda x: len(x) > 10)
 .groupby('run').transform(lambda x: (x - x.mean()) / x.std()).rename('z_score_by_run')
 .groupby('device_name').mean()
).reset_index()

alt.Chart(df).mark_bar().encode(
    x=alt.X("z_score_by_run:Q", aggregate='mean'),
    y=alt.Y("device_name:N", sort='-x')
)

KeyError: 'device_name'

In [14]:
df = (data
 .set_index(['run', 'clone', 'gen', 'device_name'])
 ['perf_ns_per_day']
 .groupby(['run', 'device_name']).filter(lambda x: len(x) > 10)
 .groupby('device_name').filter(lambda x: len(x.groupby('run')) == 15)
 .groupby(['run', 'device_name']).mean()
 .rename('mean_ns_per_day')
).reset_index()

alt.Chart(df).mark_point(filled=True).encode(
    x=alt.X('mean_ns_per_day:Q', scale=alt.Scale(type='log')),
    y=alt.Y('device_name:O', sort='-x'),
    color='run:N',
)

KeyError: "None of ['run', 'clone', 'gen'] are in the columns"

In [15]:
sns.clustermap(
    data
    .groupby(['run', 'device_name']).filter(lambda x: len(x) > 10)
    .groupby('device_name').filter(lambda x: len(x.groupby('run')) == 15)
    .groupby(['run', 'device_name'])
    ['perf_ns_per_day']
    .mean()
    .pipe(lambda x: np.log(x))
    .unstack('run'))

RuntimeError: clustermap requires scipy to be available