In [1]:
import altair as alt
import pandas as pd

# CUDA vs. OpenCL performance analysis example

First, generate parsed log dataset using

```bash
parse_fah_log dataframe --data-dir ../data --output ../data/benchmark_data PROJ17101 PROJ17102
```

In [2]:
feather_filename = '2020-09-25-benchmark-data.feather'
benchmark_dir = 'benchmark' # path to store benchmark output

In [3]:
import shutil
shutil.copyfile(f'../data/{feather_filename}', f'{benchmark_dir}/{feather_filename}')

'benchmark/2020-09-25-benchmark-data.feather'

In [4]:
data = pd.read_feather(f'../data/{feather_filename}').set_index(["project", "run", "clone", "gen"]).sort_index()
data.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 113638 entries, ('PROJ17101', 0, 0, 0) to ('PROJ17102', 16, 1368, 0)
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   os                     113638 non-null  object 
 1   platform_name          113638 non-null  object 
 2   platform_vendor        113638 non-null  object 
 3   platform_version       113638 non-null  object 
 4   device_name            113638 non-null  object 
 5   device_vendor          113638 non-null  object 
 6   device_version         113638 non-null  object 
 7   device_driver_version  21954 non-null   object 
 8   cuda_enabled           113638 non-null  bool   
 9   perf_ns_per_day        112130 non-null  float64
dtypes: bool(1), float64(1), object(8)
memory usage: 8.8+ MB


In [5]:
device_name = "GeForce RTX 2080 Ti"

df = (data
      .pipe(lambda df: df[df["device_name"] == device_name])
      .assign(platform=lambda df: df["cuda_enabled"].replace({True: "CUDA", False: "OpenCL"}))
      .sample(5000)
     ).reset_index()

bars = alt.Chart().mark_bar().encode(
    x="platform:N",
    y=alt.Y("max(perf_ns_per_day):Q", title="max ns/day"),
    color='platform:N',
)

error_bars = alt.Chart().mark_errorbar(extent='ci').encode(
    x=alt.X("platform:N", title=None),
    y=alt.Y("perf_ns_per_day:Q", title=None)
)

alt.layer(bars, error_bars, data=df).facet(
    column="run:O",
    title=device_name
)

In [6]:
# List the 50 most "popular" cards (for BETA)

In [7]:
devices = data["device_name"].value_counts()
top_devices = devices[:50]
print(top_devices)

GeForce RTX 2080 Ti       10422
Tesla V100-SXM2-16GB       9729
A100-SXM4-40GB             6356
GeForce GTX 1070           5851
GeForce GTX 1080 Ti        5832
Hainan                     5078
GeForce GTX 1080           4347
Ellesmere                  3933
Tesla K80                  3929
GeForce RTX 2060 SUPER     3426
GeForce RTX 2070 SUPER     3189
GeForce RTX 2080 SUPER     3153
Tesla M60                  3148
GeForce GTX 970            2754
GeForce RTX 2080           2666
GeForce RTX 2060           2269
Capeverde                  2107
GeForce RTX 2070           2078
Tesla T4                   2047
GeForce GTX 1060 6GB       2026
gfx1010                    1701
GeForce GTX 1050 Ti        1255
GeForce GTX 1660 Ti        1150
GeForce GTX 980            1144
Tesla V100-SXM2-32GB       1138
GeForce GTX 1070 Ti        1058
gfx900                     1045
GeForce GTX 1060 3GB        966
GeForce GTX 1660 SUPER      961
GeForce GTX 980 Ti          906
gfx906                      820
GeForce 

In [8]:
# Generate HTML files for each GPU

In [9]:
import os
from rich.progress import track

benchmark_dir = 'benchmark' # path to store benchmark output

devices = data["device_name"].value_counts()
for device_name in track(devices.keys(), description="Generating plots for all GPUs..."):
    # Create plot
    df = data[data["device_name"] == device_name].reset_index()

    bars = alt.Chart().mark_bar().encode(
        x="cuda_enabled:N",
        y=alt.Y("mean(perf_ns_per_day):Q", title="mean ns/day"),
        color='cuda_enabled:N',
    )

    error_bars = alt.Chart().mark_errorbar(extent='ci').encode(
        x=alt.X("cuda_enabled:N", title="cuda"),
        y=alt.Y("perf_ns_per_day:Q", title="")
    )

    chart = alt.layer(bars, error_bars, data=df).facet(
        column="run:O",
        title=device_name
    )

    html_filename = os.path.join(benchmark_dir, f'{device_name}.html')
    chart.save(html_filename)    

Output()

In [10]:
import os
from rich.progress import track

benchmark_dir = 'benchmark' # path to store benchmark output

devices = data["device_name"].value_counts()
for device_name in track(devices.keys(), description="Generating plots for all GPUs..."):
    # Create plot
    df = data[data["device_name"] == device_name].reset_index()

    bars = alt.Chart().mark_bar().encode(
        x="cuda_enabled:N",
        y=alt.Y("max(perf_ns_per_day):Q", title="max ns/day"),
        color='cuda_enabled:N',
    )

    chart = alt.layer(bars, data=df).facet(
        column="run:O",
        title=device_name
    )

    html_filename = os.path.join(benchmark_dir, f'{device_name} - max.html')
    chart.save(html_filename)    

Output()

In [11]:
import os
from rich.progress import track

benchmark_dir = 'benchmark' # path to store benchmark output
osfullnames = {'linux' : 'linux2 4.19.76-linuxkit', 'win' : 'win32 10'}

devices = data["device_name"].value_counts()
for device_name in track(devices.keys(), description="Generating plots for all GPUs..."):    

    for osname in ['win', 'linux']:
        osfullname = osfullnames[osname]

        df = (data
              .pipe(lambda df: df[(df["os"] == osfullname) & (df["device_name"] == device_name)])
              .assign(platform=lambda df: df["cuda_enabled"].replace({True: "CUDA", False: "OpenCL"}))
             ).reset_index()

        bars = alt.Chart().mark_bar().encode(
            x="platform:N",
            y=alt.Y("mean(perf_ns_per_day):Q", title="mean ns/day"),
            color='platform:N',
        )

        error_bars = alt.Chart().mark_errorbar(extent='ci').encode(
            x=alt.X("platform:N", title=None),
            y=alt.Y("perf_ns_per_day:Q", title=None)
        )

        chart = alt.layer(bars, error_bars, data=df).facet(
            column="run:O",
            title=f'{device_name} ({osname})'
        )

        html_filename = os.path.join(benchmark_dir, f'{device_name} - {osname}.html')
        print(html_filename)
        chart.save(html_filename)    

Output()

benchmark/GeForce RTX 2080 Ti - win.html
benchmark/GeForce RTX 2080 Ti - linux.html
benchmark/Tesla V100-SXM2-16GB - win.html
benchmark/Tesla V100-SXM2-16GB - linux.html
benchmark/A100-SXM4-40GB - win.html
benchmark/A100-SXM4-40GB - linux.html
benchmark/GeForce GTX 1070 - win.html
benchmark/GeForce GTX 1070 - linux.html
benchmark/GeForce GTX 1080 Ti - win.html
benchmark/GeForce GTX 1080 Ti - linux.html
benchmark/Hainan - win.html
benchmark/Hainan - linux.html
benchmark/GeForce GTX 1080 - win.html
benchmark/GeForce GTX 1080 - linux.html
benchmark/Ellesmere - win.html
benchmark/Ellesmere - linux.html
benchmark/Tesla K80 - win.html
benchmark/Tesla K80 - linux.html
benchmark/GeForce RTX 2060 SUPER - win.html
benchmark/GeForce RTX 2060 SUPER - linux.html
benchmark/GeForce RTX 2070 SUPER - win.html
benchmark/GeForce RTX 2070 SUPER - linux.html
benchmark/GeForce RTX 2080 SUPER - win.html
benchmark/GeForce RTX 2080 SUPER - linux.html
benchmark/Tesla M60 - win.html
benchmark/Tesla M60 - linux.h

benchmark/GeForce 940MX - win.html
benchmark/GeForce 940MX - linux.html
benchmark/GeForce 930MX - win.html
benchmark/GeForce 930MX - linux.html
benchmark/GeForce RTX 2070 with Max-Q Design - win.html
benchmark/GeForce RTX 2070 with Max-Q Design - linux.html
benchmark/GeForce GTX 1060 with Max-Q Design - win.html
benchmark/GeForce GTX 1060 with Max-Q Design - linux.html
benchmark/GeForce GTX 680 - win.html
benchmark/GeForce GTX 680 - linux.html
benchmark/Oland - win.html
benchmark/Oland - linux.html
benchmark/Quadro P620 - win.html
benchmark/Quadro P620 - linux.html
benchmark/GeForce MX150 - win.html
benchmark/GeForce MX150 - linux.html
benchmark/Tesla K20m - win.html
benchmark/Tesla K20m - linux.html
benchmark/GeForce GTX 745 - win.html
benchmark/GeForce GTX 745 - linux.html
benchmark/GeForce RTX 2080 with Max-Q Design - win.html
benchmark/GeForce RTX 2080 with Max-Q Design - linux.html
benchmark/GeForce GTX 950M - win.html
benchmark/GeForce GTX 950M - linux.html
benchmark/Intel(R) HD 

benchmark/Raccoons - win.html
benchmark/Raccoons - linux.html
benchmark/Quadro M3000M - win.html
benchmark/Quadro M3000M - linux.html
benchmark/Quadro RTX 5000 with Max-Q Design - win.html
benchmark/Quadro RTX 5000 with Max-Q Design - linux.html
benchmark/GeForce 920M - win.html
benchmark/GeForce 920M - linux.html
benchmark/GRID V100D-16Q - win.html
benchmark/GRID V100D-16Q - linux.html
benchmark/Quadro P2000 with Max-Q Design - win.html
benchmark/Quadro P2000 with Max-Q Design - linux.html
benchmark/Intel(R) UHD Graphics 620 - win.html
benchmark/Intel(R) UHD Graphics 620 - linux.html
benchmark/GRID M60-8Q - win.html
benchmark/GRID M60-8Q - linux.html
benchmark/Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz - win.html
benchmark/Intel(R) Core(TM) i7-4790K CPU @ 4.00GHz - linux.html
benchmark/Quadro M4000M - win.html
benchmark/Quadro M4000M - linux.html
benchmark/Tesla K20c - win.html
benchmark/Tesla K20c - linux.html
benchmark/GeForce RTX 2070 Super - win.html
benchmark/GeForce RTX 2070 Super

benchmark/Quadro K1000M - win.html
benchmark/Quadro K1000M - linux.html
benchmark/GRID T4-8C - win.html
benchmark/GRID T4-8C - linux.html
benchmark/GeForce GT 640M - win.html
benchmark/GeForce GT 640M - linux.html
benchmark/Tesla M4 - win.html
benchmark/Tesla M4 - linux.html
benchmark/GRID T4-16Q - win.html
benchmark/GRID T4-16Q - linux.html
benchmark/GRID V100DX-32C - win.html
benchmark/GRID V100DX-32C - linux.html
benchmark/GeForce 730A - win.html
benchmark/GeForce 730A - linux.html
benchmark/GeForce GT 620 - win.html
benchmark/GeForce GT 620 - linux.html
benchmark/GeForce 830M - win.html
benchmark/GeForce 830M - linux.html
benchmark/GeForce GTX 1060 5GB - win.html
benchmark/GeForce GTX 1060 5GB - linux.html


In [12]:
import os
from rich.progress import track

benchmark_dir = 'benchmark' # path to store benchmark output
osfullnames = {'linux' : 'linux2 4.19.76-linuxkit', 'win' : 'win32 10'}

devices = data["device_name"].value_counts()
for device_name in track(devices.keys(), description="Generating plots for all GPUs..."):    

    for osname in ['win', 'linux']:
        osfullname = osfullnames[osname]

        df = (data
              .pipe(lambda df: df[(df["os"] == osfullname) & (df["device_name"] == device_name)])
              .assign(platform=lambda df: df["cuda_enabled"].replace({True: "CUDA", False: "OpenCL"}))
             ).reset_index()

        bars = alt.Chart().mark_bar().encode(
            x="platform:N",
            y=alt.Y("max(perf_ns_per_day):Q", title="max ns/day"),
            color='platform:N',
        )

        error_bars = alt.Chart().mark_errorbar(extent='ci').encode(
            x=alt.X("platform:N", title=None),
            y=alt.Y("perf_ns_per_day:Q", title=None)
        )

        chart = alt.layer(bars, data=df).facet(
            column="run:O",
            title=f'{device_name} ({osname})'
        )

        html_filename = os.path.join(benchmark_dir, f'{device_name} - {osname} - max.html')
        print(html_filename)
        chart.save(html_filename)    

Output()

benchmark/GeForce RTX 2080 Ti - win - max.html
benchmark/GeForce RTX 2080 Ti - linux - max.html
benchmark/Tesla V100-SXM2-16GB - win - max.html
benchmark/Tesla V100-SXM2-16GB - linux - max.html
benchmark/A100-SXM4-40GB - win - max.html
benchmark/A100-SXM4-40GB - linux - max.html
benchmark/GeForce GTX 1070 - win - max.html
benchmark/GeForce GTX 1070 - linux - max.html
benchmark/GeForce GTX 1080 Ti - win - max.html
benchmark/GeForce GTX 1080 Ti - linux - max.html
benchmark/Hainan - win - max.html
benchmark/Hainan - linux - max.html
benchmark/GeForce GTX 1080 - win - max.html
benchmark/GeForce GTX 1080 - linux - max.html
benchmark/Ellesmere - win - max.html
benchmark/Ellesmere - linux - max.html
benchmark/Tesla K80 - win - max.html
benchmark/Tesla K80 - linux - max.html
benchmark/GeForce RTX 2060 SUPER - win - max.html
benchmark/GeForce RTX 2060 SUPER - linux - max.html
benchmark/GeForce RTX 2070 SUPER - win - max.html
benchmark/GeForce RTX 2070 SUPER - linux - max.html
benchmark/GeForce 

benchmark/P104-100 - linux - max.html
benchmark/Bonaire - win - max.html
benchmark/Bonaire - linux - max.html
benchmark/Quadro K620 - win - max.html
benchmark/Quadro K620 - linux - max.html
benchmark/GeForce GTX 650 Ti BOOST - win - max.html
benchmark/GeForce GTX 650 Ti BOOST - linux - max.html
benchmark/TITAN RTX - win - max.html
benchmark/TITAN RTX - linux - max.html
benchmark/Tesla P4 - win - max.html
benchmark/Tesla P4 - linux - max.html
benchmark/TITAN Xp COLLECTORS EDITION - win - max.html
benchmark/TITAN Xp COLLECTORS EDITION - linux - max.html
benchmark/GeForce RTX 3080 - win - max.html
benchmark/GeForce RTX 3080 - linux - max.html
benchmark/GeForce GTX 660 Ti - win - max.html
benchmark/GeForce GTX 660 Ti - linux - max.html
benchmark/Quadro M1000M - win - max.html
benchmark/Quadro M1000M - linux - max.html
benchmark/GeForce GT 730 - win - max.html
benchmark/GeForce GT 730 - linux - max.html
benchmark/Quadro RTX 3000 - win - max.html
benchmark/Quadro RTX 3000 - linux - max.html


benchmark/Quadro RTX 3000 with Max-Q Design - linux - max.html
benchmark/Tesla V100-DGXS-32GB - win - max.html
benchmark/Tesla V100-DGXS-32GB - linux - max.html
benchmark/Quadro 4000 - win - max.html
benchmark/Quadro 4000 - linux - max.html
benchmark/GeForce GTX 1050 Ti with Max-Q Design - win - max.html
benchmark/GeForce GTX 1050 Ti with Max-Q Design - linux - max.html
benchmark/GRID T4-2Q - win - max.html
benchmark/GRID T4-2Q - linux - max.html
benchmark/Stoney - win - max.html
benchmark/Stoney - linux - max.html
benchmark/GeForce GTX 850M - win - max.html
benchmark/GeForce GTX 850M - linux - max.html
benchmark/Quadro T1000 - win - max.html
benchmark/Quadro T1000 - linux - max.html
benchmark/Tesla T10 - win - max.html
benchmark/Tesla T10 - linux - max.html
benchmark/GeForce GPU - win - max.html
benchmark/GeForce GPU - linux - max.html
benchmark/Kalindi - win - max.html
benchmark/Kalindi - linux - max.html
benchmark/GeForce GTX 690 - win - max.html
benchmark/GeForce GTX 690 - linux - 

benchmark/GeForce MX330 - win - max.html
benchmark/GeForce MX330 - linux - max.html
benchmark/Quadro M520 - win - max.html
benchmark/Quadro M520 - linux - max.html
benchmark/GRID T4-1Q - win - max.html
benchmark/GRID T4-1Q - linux - max.html
benchmark/GeForce GTX 775M - win - max.html
benchmark/GeForce GTX 775M - linux - max.html
benchmark/GRID RTX8000P-4Q - win - max.html
benchmark/GRID RTX8000P-4Q - linux - max.html
benchmark/Intel(R) HD Graphics 610 - win - max.html
benchmark/Intel(R) HD Graphics 610 - linux - max.html
benchmark/Quadro K620M - win - max.html
benchmark/Quadro K620M - linux - max.html
benchmark/GeForce GT 720M - win - max.html
benchmark/GeForce GT 720M - linux - max.html
benchmark/GeForce GTX 480 - win - max.html
benchmark/GeForce GTX 480 - linux - max.html
benchmark/Quadro K3000M - win - max.html
benchmark/Quadro K3000M - linux - max.html
benchmark/GeForce 930A - win - max.html
benchmark/GeForce 930A - linux - max.html
benchmark/Quadro K2000M - win - max.html
benchma

In [13]:
# Generate YAML and JSON files
import os
import yaml
import json
from rich.progress import track


with open('run-metadata.yaml', 'rt') as infile:
    runs = yaml.load(infile.read())

print('Writing metadata...')
yaml_filename = os.path.join(benchmark_dir, f'run-metadata.yaml')
with open(yaml_filename, 'wt') as outfile:
    outfile.write(yaml.dump(runs))
    
json_filename = os.path.join(benchmark_dir, f'run-metadata.json')
with open(json_filename, 'wt') as outfile:
    outfile.write(json.dumps(runs))
        
osfullnames = {'linux' : 'linux2 4.19.76-linuxkit', 'win' : 'win32 10'}
devices = data["device_name"].value_counts()
for device_name in track(devices.keys(), description="Generating plots for all GPUs..."):
    device_df = data[data["device_name"] == device_name].reset_index()
    
    # Drop NaNs
    df.dropna(subset=['perf_ns_per_day'], inplace=True)
        
    # Create data record of summary statistics
    device_data = dict()
    device_data['device_name'] = device_name
    device_data['perf_ns_per_day'] = dict()        
    for run in runs.keys():
        run_index = int(run[3:])
        
        run_df = device_df[device_df["run"] == run_index].reset_index()
        device_data['perf_ns_per_day'][run] = { 
            'name' : runs[run]['name'],
            'num_atoms' : runs[run]['num_atoms'],
        }        
        
        for osname in osfullnames.keys():
            osfullname = osfullnames[osname]
            os_df = run_df[run_df["os"] == osfullname].reset_index()        

            device_data['perf_ns_per_day'][run][osname] = dict()
            for platform in ['CUDA', 'OpenCL']:
                
                cuda_enabled = (platform == 'CUDA')
                #if cuda_enabled not in set(os_df["cuda_enabled"]):
                #    continue
                    
                platform_df = os_df[os_df["cuda_enabled"] == cuda_enabled]              
                perf = platform_df['perf_ns_per_day']
                
                device_data['perf_ns_per_day'][run][osname][platform] = {
                    'mean' : float(perf.mean()),
                    'std' : float(perf.std()),
                    'min' : float(perf.min()),
                    'max' : float(perf.max()),
                    'nsamples' : len(perf),
                }        
            
    yaml_filename = os.path.join(benchmark_dir, f'{device_name}.yaml')
    with open(yaml_filename, 'wt') as outfile:
        outfile.write(yaml.dump(device_data))

    json_filename = os.path.join(benchmark_dir, f'{device_name}.json')
    with open(json_filename, 'wt') as outfile:
        outfile.write(json.dumps(device_data))



Writing metadata...


  if __name__ == '__main__':


Output()

# Generate a simple index.html of all benchmark HTML files

In [14]:
contents = """

<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Folding@home core22 0.0.11-0.0.13 benchmark suite (PROJ1710x): Preliminary analysis</title>
<meta name="viewport" content="width=device-width, initial-scale=1.0, shrink-to-fit=no">
<!--[if lt IE 9]><script src="js/html5shiv-printshiv.js" media="all"></script><![endif]-->
<link href="https://fonts.googleapis.com/css2?family=Roboto&family=Roboto+Mono&display=swap" rel="stylesheet">
<style>
html * {
  font-family: 'Roboto', sans-serif;
}

span.updated {
    color: gray;
    font-style: italic;
}

table {
  border-collapse: collapse;
  width: 100%;
}

td, th {
  border: 1px solid #ddd;
  padding: 8px;
}

table.top20 td {
    font-size: 1.25em;
}

td.smiles {
    font-family: 'Roboto Mono', monospace;
}

td.binding span.estimate {
    white-space: nowrap;
}

td.binding span.point {
    font-weight: bold;
}

td.binding span.point > span.negative {
    color: green;
}

td.binding span.stderr {
    color: #555;
}

td.thumbnail {
    text-align: center;
}

td.thumbnail img {
    height: 100px;
}

tr:nth-child(even){background-color: #f2f2f2;}

tr:hover {background-color: #ddd;}

th {
  padding-top: 12px;
  padding-bottom: 12px;
  text-align: left;
  background-color: #1f77b4;
  color: white;
}

div.progress {
    border-style: solid;
    border-width: 2px;
    border-color: #1f77b4;
    background-color: white;
    padding: 3px;
}

div.progress > div.progress-bar {
    background-color: #ff7f0e;
    background-color: #1f77b4;
    color: white;
    font-size: 1.25em;
    font-weight: bold;
    font-style: italic;
    text-align: right;
    padding: 5px 20px 5px;
}
</style>
</head>
"""

from urllib.parse import quote
from datetime import datetime
contents += "<body>\n"
contents += "<h1>Folding@home core22 0.0.11-0.0.13 benchmark suite (PROJ1710x): Preliminary analysis</h1>\n"
contents += f'<span class="updated">Last updated: {datetime.now()}</span>\n'
contents += f'<p>Metadata: <a href="run-metadata.yaml">[YAML]</a> <a href="run-metadata.yaml">[JSON]</a></p>\n'
contents += f'<p>Pandas dataframe (use pandas.read_feather): <a href="{feather_filename}">[Feather]</a></p>\n'
contents += "<ul>\n"
for device_name in devices.keys():
    win_filename = f'{device_name} - win - max.html'
    linux_filename = f'{device_name} - linux - max.html'
    aggregate_filename = f'{device_name} - max.html'
    contents += f'  <li> <a href="{quote(win_filename)}">[win]</a> <a href="{quote(linux_filename)}">[linux]</a> <a href="{quote(aggregate_filename)}">[combined]</a> <a href="{quote(device_name)}.html">[HTML]</a> <a href="{quote(device_name)}.yaml">[YAML]</a> <a href="{quote(device_name)}.json">[JSON]</a> : {device_name} </li>\n'
contents += "</ul>\n"
contents += "</body>\n"
with open(os.path.join(benchmark_dir, 'index.html'), 'wt') as outfile:
    outfile.write(contents)

# Transfer
```
aws s3 sync benchmark s3://fah-ws3/benchmark --profile jchodera --acl public-read
```    

In [15]:
data = pd.read_feather(f'../data/{feather_filename}').set_index(["project", "run", "clone", "gen"]).sort_index()
data.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 113638 entries, ('PROJ17101', 0, 0, 0) to ('PROJ17102', 16, 1368, 0)
Data columns (total 10 columns):
 #   Column                 Non-Null Count   Dtype  
---  ------                 --------------   -----  
 0   os                     113638 non-null  object 
 1   platform_name          113638 non-null  object 
 2   platform_vendor        113638 non-null  object 
 3   platform_version       113638 non-null  object 
 4   device_name            113638 non-null  object 
 5   device_vendor          113638 non-null  object 
 6   device_version         113638 non-null  object 
 7   device_driver_version  21954 non-null   object 
 8   cuda_enabled           113638 non-null  bool   
 9   perf_ns_per_day        112130 non-null  float64
dtypes: bool(1), float64(1), object(8)
memory usage: 8.8+ MB


In [16]:
# Evaluate average speedup for each GPU
devices = data["device_name"].value_counts()
device_speedups = dict()
nruns = 17
import numpy as np
for device_name in track(devices.keys(), description="Estimating speedups..."):    
    df = data[data["device_name"] == device_name].reset_index()
    # Drop NaNs
    df.dropna(subset=['perf_ns_per_day'], inplace=True)

    ncounts = len(df)    
    if (ncounts < 100):
        continue
    
    cuda_perf = np.zeros([nruns])
    opencl_perf = np.zeros([nruns])
    for run in range(nruns):
        cuda_perf[run] = df[(df["run"]==run) & (df["cuda_enabled"]==True)]['perf_ns_per_day'].max()
        opencl_perf[run] = df[(df["run"]==run) & (df["cuda_enabled"]==False)]['perf_ns_per_day'].max()        
        
    indices = np.isfinite(cuda_perf) & np.isfinite(opencl_perf)
    if sum(indices) == 0:
        continue    
    speedup = cuda_perf[indices] / opencl_perf[indices]

    device_speedups[device_name] = {
        'device_name' : device_name, 
        'max' : speedup.max(),
        'max_percent' : 100*(speedup.max() - 1),
        'mean' : speedup.mean(),
        'mean_percent' : 100*(speedup.mean() - 1),
        'median' : np.median(speedup),
        'median_percent' : 100*(np.median(speedup) - 1),
    }
    
    is_moonshot = [False, False, False, False, False, False, False, False, False, True, True, False, False, False, False, True, True]
    moonshot_indices = np.isfinite(cuda_perf) & np.isfinite(opencl_perf) & is_moonshot
    if sum(moonshot_indices) == 0:
        continue
    moonshot_speedup = cuda_perf[moonshot_indices] / opencl_perf[moonshot_indices]

    device_speedups[device_name]['moonshot'] = moonshot_speedup.max()
    device_speedups[device_name]['moonshot_percent'] = 100*(moonshot_speedup.max() - 1)
              

Output()

In [17]:
device_names = list(device_speedups.keys())
device_names.sort(key=lambda device_name : device_speedups[device_name]['max'], reverse=True)

In [18]:
speedups = pd.DataFrame.from_dict(device_speedups, orient='index', columns=['device_name', 'max', 'mean', 'median', 'moonshot', 'max_percent', 'mean_percent', 'median_percent', 'moonshot_percent'])

In [19]:
alt.Chart(speedups).mark_bar().encode(
    x=alt.X("median_percent", title='% speedup with CUDA'),
    y=alt.Y('device_name:N', sort='-x', title='NVIDIA GPU')
)

In [20]:
alt.Chart(speedups).mark_bar().encode(
    x=alt.X("moonshot_percent", title='% speedup with CUDA for COVID Moonshot Sprints'),
    y=alt.Y('device_name:N', sort='-x', title='NVIDIA GPU')
)

In [21]:
speedup1 = alt.Chart(speedups).mark_bar().encode(
    x=alt.X("median_percent", title='% speedup with CUDA'),
    y=alt.Y('device_name:N', sort='-x', title='NVIDIA GPU')
)

speedup2 = alt.Chart(speedups).mark_bar().encode(
    x=alt.X("moonshot_percent", title='% speedup with CUDA for COVID Moonshot Sprints'),
    y=alt.Y('device_name:N', sort='-x', title='NVIDIA GPU')
)

speedup1 | speedup2

In [22]:
alt.Chart(speedups).mark_bar().encode(
    y=alt.Y("median_percent", title='% speedup with CUDA'),
    x=alt.X('device_name:N', sort='-y', title='NVIDIA GPU')
)

In [23]:
alt.Chart(speedups).mark_bar().encode(
    y=alt.Y("moonshot_percent", title='% speedup with CUDA for COVID Moonshot Sprints'),
    x=alt.X('device_name:N', sort='-y', title='NVIDIA GPU')
)

In [24]:
# Evaluate performance for each GPU for DHFR PME 4 fs
devices = data["device_name"].value_counts()
device_performance = dict()
run = 3
import numpy as np
index = 0
for device_name in track(devices.keys(), description="Estimating speedups..."):    
    df = data[(data["device_name"]==device_name)].reset_index()
    # Drop NaNs
    df.dropna(subset=['perf_ns_per_day'], inplace=True)
    # Select run
    df = df[df["run"]==run].reset_index()

    ncounts = len(df)    
    #if (ncounts < 5):
    #    continue
    
    cuda_perf = np.median(df[df["cuda_enabled"]==True]['perf_ns_per_day'])
    opencl_perf = np.median(df[df["cuda_enabled"]==False]['perf_ns_per_day'])      
        
    if np.isnan(cuda_perf) or np.isnan(opencl_perf):
        continue
        
    device_performance[index] = {
        'device_name' : device_name,
        'ns_per_day' : cuda_perf,
        'platform' : 'CUDA',
    }
    index += 1
                  
    device_performance[index] = {
        'device_name' : device_name,
        'ns_per_day' : opencl_perf,
        'platform' : 'OpenCL',
    }
    index += 1

    

Output()

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **k

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **k

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **k

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **k

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **k

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **k

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **k

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **k

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **k

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **k

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **k

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **k

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **k

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **k

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **k

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **k

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **k

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


In [25]:
device_performance = pd.DataFrame.from_dict(device_performance, orient='index', columns=['device_name', 'ns_per_day', 'platform'])
device_performance

Unnamed: 0,device_name,ns_per_day,platform
0,GeForce RTX 2080 Ti,2635.660,CUDA
1,GeForce RTX 2080 Ti,2268.960,OpenCL
2,A100-SXM4-40GB,3106.320,CUDA
3,A100-SXM4-40GB,2899.230,OpenCL
4,GeForce GTX 1070,1213.630,CUDA
...,...,...,...
77,Quadro M1000M,229.117,OpenCL
78,GeForce RTX 2070 with Max-Q Design,1418.100,CUDA
79,GeForce RTX 2070 with Max-Q Design,1056.400,OpenCL
80,Quadro M1200,343.330,CUDA


In [26]:
alt.Chart(device_performance.reset_index()).mark_bar(opacity=0.5).encode(
    y=alt.Y("ns_per_day:Q", title="DHFR performance (ns/day)", stack=None),
    x=alt.X('device_name:N', sort='-y', title='NVIDIA GPU'),  
    color='platform:N'
)