In [1]:
# Copyright (c) 2021, S. VenkataKeerthy, Rohit Aggarwal
# Department of Computer Science and Engineering, IIT Hyderabad
#
# This software is available under the BSD 4-Clause License. Please see LICENSE
# file in the top-level directory for more details.
#
import pandas as pd
import numpy as np
import heapq
import sys, re
from sklearn.model_selection import KFold
import os
import xgboost as xgb
from scipy.stats import gmean

In [2]:
# Check the data

In [3]:
assert (
    os.path.exists("data/kernels_ir")
    and os.path.exists("data/pact-2014-oracles.csv")
    and os.path.exists("data/pact-2014-runtimes.csv")
), "Dataset is not present. Please down load"

In [4]:
assert os.path.exists("output/embeddings"), "Embeddings are not generated"

# Read data from input file

In [5]:
def readEmd_program(filename):
    lines = [line.strip("\n\t") for line in open(filename)]
    entity = []
    rep = []
    targetLabel = []
    flag = 0
    for line in lines:
        r = line.split("\t")
        targetLabel.append(int(r[0]))
        res = r[1:]
        res_double = [float(val) for val in res]
        rep.append(res_double)
    return rep, targetLabel

In [6]:
_FLAG_TO_DEVICE_NAME = {
    "Cypress": "AMD Radeon HD 5900",
    "Tahiti": "AMD Tahiti 7970",
    "Fermi": "NVIDIA GTX 480",
    "Kepler": "NVIDIA Tesla K20c",
}

device_list = ["Cypress", "Tahiti", "Fermi", "Kepler"]

oracle_file = os.path.join("./data/pact-2014-oracles.csv")
oracles = pd.read_csv(oracle_file)

runtimes_file = os.path.join("./data/pact-2014-runtimes.csv")
df = pd.read_csv(runtimes_file)

# Results from other works

The accuracies and speedups are taken from the results quoted by NCC in their work for the purpose of comparison. For detailed analysis (discussed later), we run these models and the obtained results are stored as pickle files in ./data/prior_art_results. 

In [7]:
magni_sp_vals = [1.21, 1.01, 0.86, 0.94]
magni_sp_mean = [1.005]
deeptune_sp_vals = [1.10, 1.05, 1.10, 0.99]
deeptune_sp_mean = [1.06]
deeptuneTL_sp_vals = [1.17, 1.23, 1.14, 0.93]
deeptuneTL_sp_mean = [1.1175]
ncc_sp_vals = [1.29, 1.07, 0.97, 1.01]
ncc_sp_mean = [1.086]

In [8]:
cfs = np.array([1, 2, 4, 8, 16, 32])
kernel_freq = df["kernel"].value_counts().sort_index().reset_index()

# Classification Model

In [9]:
def find_runtime(df, kernel, cf, platform):
    filter1 = df["kernel"] == kernel
    filter2 = df["cf"] == cf
    return df.where(filter1 & filter2)["runtime_" + platform].dropna()


def evaluate(max_depth, learning_rate, n_estimators):
    inferencetime = []
    raw_embeddings_pd = pd.DataFrame(raw_embeddings, columns=range(1, 301))
    efileNum = pd.DataFrame(fileIndex)
    embeddings = pd.concat([efileNum, raw_embeddings_pd], axis=1)

    llfiles = pd.read_csv("./data/all.txt", sep="\s+")
    fileNum = llfiles["FileNum"]
    filesname = llfiles["ProgramName"]

    oracles["kernel_path"] = str("./") + oracles["kernel"] + str(".ll")

    df["kernel_path"] = str("./") + df["kernel"] + str(".ll")

    resultant_data = pd.DataFrame()
    for i, platform in enumerate(device_list):
        embeddingsData_tmp = embeddings
        embeddingsData_tmp = embeddingsData_tmp.merge(
            llfiles, left_on=0, right_on="FileNum"
        )
        embeddingsData_tmp = pd.merge(
            embeddingsData_tmp, oracles, left_on="ProgramName", right_on="kernel_path"
        )
        embeddingsData_tmp["cf"] = embeddingsData_tmp["cf_" + platform]
        embeddingsData_tmp["device"] = i + 1
        resultant_data = pd.concat([resultant_data, embeddingsData_tmp])

    resultant_data = pd.get_dummies(resultant_data, columns=["device"])
    resultant_data.reset_index(inplace=True)

    targetLabel = np.array(resultant_data["cf"])
    data = resultant_data
    data = data.drop(
        columns=[
            "index",
            0,
            "FileNum",
            "ProgramName",
            "kernel",
            "cf_Fermi",
            "runtime_Fermi",
            "cf_Kepler",
            "runtime_Kepler",
            "cf_Cypress",
            "runtime_Cypress",
            "cf_Tahiti",
            "runtime_Tahiti",
            "kernel_path",
            "cf",
        ]
    )

    embeddings = (data - data.min()) / (data.max() - data.min())
    embeddings = np.array(embeddings)

    data = []
    kf = KFold(n_splits=len(targetLabel), shuffle=False)
    for j, (train_index, test_index) in enumerate(kf.split(targetLabel)):
        kernel = sorted(set(df["kernel"]))[test_index[0] % 17]
        gbc = xgb.XGBClassifier(
            max_depth=max_depth,
            learning_rate=learning_rate,
            n_estimators=n_estimators,
            n_jobs=10,
        )
        gbc.fit(embeddings[train_index], targetLabel[train_index])
        prediction = gbc.predict(embeddings[test_index])[0]

        if embeddings[test_index, 300] == 1:
            platform = device_list[0]
        elif embeddings[test_index, 301] == 1:
            platform = device_list[1]
        elif embeddings[test_index, 302] == 1:
            platform = device_list[2]
        elif embeddings[test_index, 303] == 1:
            platform = device_list[3]

        oracle_runtimes = np.array([float(x) for x in oracles["runtime_" + platform]])
        prediction = min(
            prediction, 2 ** (kernel_freq["kernel"][test_index[0] % 17] - 1)
        )
        oracle = targetLabel[test_index[0]]

        rt_baseline = float(find_runtime(df, kernel, 1, platform))
        rt_pred = float(find_runtime(df, kernel, prediction, platform))
        rt_oracle = float(oracle_runtimes[test_index[0] % 17])
        data.append(
            {
                "Model": "IR2vec",
                "Platform": _FLAG_TO_DEVICE_NAME[platform],
                "Kernel": kernel,
                "Oracle-CF": oracle,
                "Predicted-CF": prediction,
                "Speedup": rt_baseline / rt_pred,
                "Oracle": rt_oracle / rt_pred,
                "OracleSpeedUp": rt_baseline / rt_oracle,
            }
        )
    ir2vec = pd.DataFrame(
        data,
        columns=[
            "Model",
            "Platform",
            "Kernel",
            "Oracle-CF",
            "Predicted-CF",
            "Speedup",
            "Oracle",
            "OracleSpeedUp",
        ],
    )

    print("\nSpeedup Matrix: IR2Vec Vs. others\n")
    ir2vec_sp_vals = ir2vec.groupby(["Platform"])["Speedup"].mean().values
    ir2vec_sp_mean = ir2vec_sp_vals.mean()
    sp_df = pd.DataFrame(
        {
            "Magni et al.": magni_sp_vals + magni_sp_mean,
            "DeepTune": deeptune_sp_vals + deeptune_sp_mean,
            "DeepTune-TL": deeptuneTL_sp_vals + deeptuneTL_sp_mean,
            "NCC": ncc_sp_vals + ncc_sp_mean,
            "IR2Vec": list(ir2vec_sp_vals) + [ir2vec_sp_mean],
        },
        index=[
            "AMD Radeon HD 5900",
            "AMD Tahiti 7970",
            "NVIDIA GTX 480",
            "NVIDIA Tesla K20c",
            "Average",
        ],
    )
    print(sp_df)
    return ir2vec

# IR2Vec Symbolic Vs. Others

In [10]:
raw_embeddings, fileIndex = readEmd_program(
    "./output/embeddings/Thread_Coarsening_Symbolic_llvm12.txt"
)
ir2vec_sym = evaluate(max_depth=1, learning_rate=0.05, n_estimators=140)


Speedup Matrix: IR2Vec Vs. others

                    Magni et al.  DeepTune  DeepTune-TL    NCC    IR2Vec
AMD Radeon HD 5900         1.210      1.10       1.1700  1.290  1.240354
AMD Tahiti 7970            1.010      1.05       1.2300  1.070  1.280314
NVIDIA GTX 480             0.860      1.10       1.1400  0.970  1.229201
NVIDIA Tesla K20c          0.940      0.99       0.9300  1.010  1.153582
Average                    1.005      1.06       1.1175  1.086  1.225863


# IR2Vec Flow-Aware Vs. Others

In [11]:
raw_embeddings, fileIndex = readEmd_program(
    "./output/embeddings/Thread_Coarsening_FlowAware_llvm12.txt"
)
ir2vec_fa = evaluate(max_depth=1, learning_rate=0.05, n_estimators=140)


Speedup Matrix: IR2Vec Vs. others

                    Magni et al.  DeepTune  DeepTune-TL    NCC    IR2Vec
AMD Radeon HD 5900         1.210      1.10       1.1700  1.290  1.240354
AMD Tahiti 7970            1.010      1.05       1.2300  1.070  1.301152
NVIDIA GTX 480             0.860      1.10       1.1400  0.970  1.256957
NVIDIA Tesla K20c          0.940      0.99       0.9300  1.010  1.155468
Average                    1.005      1.06       1.1175  1.086  1.238483


In [12]:
# oracle = ir2vec_fa.copy()
# oracle["Model"] = "Oracle"
# oracle["Speedup"] = oracle["OracleSpeedUp"]
# oracle["Predicted-CF"] = oracle["Oracle-CF"]
# oracle.drop(columns=['OracleSpeedUp'],inplace=True)

# Other related observations
For the comparison, we use the results obtained on training the earlier works  

In [13]:
magni_res = pd.read_pickle("data/prior_art_results/magni_tf.results")
deeptune_res = pd.read_pickle("data/prior_art_results/deeptune_tf.results")
deeptune_tl_res = pd.read_pickle("data/prior_art_results/deeptune_tl_tf.results")
ncc_res = pd.read_pickle("data/prior_art_results/ncc_fix_tf.results")

## Speedup comparison

In [14]:
magni_geomean = gmean(magni_res["Speedup"].values)
deeptune_geomean = gmean(deeptune_res["Speedup"].values)
deeptune_tl_geomean = gmean(deeptune_tl_res["Speedup"].values)
ncc_geomean = gmean(ncc_res["Speedup"].values)
ir2vec_sym_geomean = gmean(ir2vec_sym["Speedup"].values)
ir2vec_fa_geomean = gmean(ir2vec_fa["Speedup"].values)


print(f"Geometric mean of Magni et al. {magni_geomean:.2f}x")
print(f"Geometric mean of DeepTune {deeptune_geomean:.2f}x")
print(f"Geometric mean of Inst2Vec {ncc_geomean:.2f}x")
print(f"Geometric mean of IR2Vec Symbolic {ir2vec_sym_geomean:.2f}x")
print(f"Geometric mean of IR2Vec Flow-Aware {ir2vec_fa_geomean:.2f}x")

Geometric mean of Magni et al. 0.86x
Geometric mean of DeepTune 1.00x
Geometric mean of Inst2Vec 1.04x
Geometric mean of IR2Vec Symbolic 1.17x
Geometric mean of IR2Vec Flow-Aware 1.18x


In [15]:
def calcSpeedup(platform):
    magni_geomean = gmean(
        magni_res[magni_res["Platform"] == platform]["Speedup"].values
    )
    deeptune_geomean = gmean(
        deeptune_res[deeptune_res["Platform"] == platform]["Speedup"].values
    )
    deeptune_tl_geomean = gmean(
        deeptune_tl_res[deeptune_tl_res["Platform"] == platform]["Speedup"].values
    )
    ncc_geomean = gmean(ncc_res[ncc_res["Platform"] == platform]["Speedup"].values)
    ir2vec_sym_geomean = gmean(
        ir2vec_sym[ir2vec_sym["Platform"] == platform]["Speedup"].values
    )
    ir2vec_fa_geomean = gmean(
        ir2vec_fa[ir2vec_fa["Platform"] == platform]["Speedup"].values
    )

    print(f"Geometric mean of Magni et al. {magni_geomean:.2f}x")
    print(f"Geometric mean of DeepTune {deeptune_geomean:.2f}x")
    print(f"Geometric mean of DeepTune-TL {deeptune_tl_geomean:.2f}x")
    print(f"Geometric mean of Inst2Vec {ncc_geomean:.2f}x")
    print(f"Geometric mean of IR2Vec Symbolic {ir2vec_sym_geomean:.3f}x")
    print(f"Geometric mean of IR2Vec Flow-Aware {ir2vec_fa_geomean:.3f}x")

    return (
        round(magni_geomean, 2),
        round(deeptune_geomean, 2),
        round(deeptune_tl_geomean, 2),
        round(ncc_geomean, 2),
        round(ir2vec_sym_geomean, 3),
        round(ir2vec_fa_geomean, 3),
    )

In [16]:
rad_magni, rad_dt, rad_dtTL, rad_ncc, rad_ir2vSym, rad_ir2vFA = calcSpeedup(
    "AMD Radeon HD 5900"
)

Geometric mean of Magni et al. 0.94x
Geometric mean of DeepTune 1.14x
Geometric mean of DeepTune-TL 1.14x
Geometric mean of Inst2Vec 1.15x
Geometric mean of IR2Vec Symbolic 1.200x
Geometric mean of IR2Vec Flow-Aware 1.200x


In [17]:
tah_magni, tah_dt, tah_dtTL, tah_ncc, tah_ir2vSym, tah_ir2vFA = calcSpeedup(
    "AMD Tahiti 7970"
)

Geometric mean of Magni et al. 0.98x
Geometric mean of DeepTune 0.95x
Geometric mean of DeepTune-TL 0.90x
Geometric mean of Inst2Vec 1.04x
Geometric mean of IR2Vec Symbolic 1.204x
Geometric mean of IR2Vec Flow-Aware 1.229x


In [18]:
gtx_magni, gtx_dt, gtx_dtTL, gtx_ncc, gtx_ir2vSym, gtx_ir2vFA = calcSpeedup(
    "NVIDIA GTX 480"
)

Geometric mean of Magni et al. 0.81x
Geometric mean of DeepTune 0.94x
Geometric mean of DeepTune-TL 0.99x
Geometric mean of Inst2Vec 0.95x
Geometric mean of IR2Vec Symbolic 1.132x
Geometric mean of IR2Vec Flow-Aware 1.177x


In [19]:
tes_magni, tes_dt, tes_dtTL, tes_ncc, tes_ir2vSym, tes_ir2vFA = calcSpeedup(
    "NVIDIA Tesla K20c"
)

Geometric mean of Magni et al. 0.74x
Geometric mean of DeepTune 0.98x
Geometric mean of DeepTune-TL 1.01x
Geometric mean of Inst2Vec 1.01x
Geometric mean of IR2Vec Symbolic 1.129x
Geometric mean of IR2Vec Flow-Aware 1.131x


# Percentage of increase in speedup by IR2Vec Flow-Aware encodings over others

In [20]:
def percentage(value1, value2):
    return round((np.abs(value2 - value1) / value2) * 100, 2)


# AMD Radeon HD 5900
print("AMD Radeon HD 5900")
print(" % Increase in SpeedUp over Magni et al - ", percentage(rad_ir2vFA, rad_magni))
print(" % Increase in SpeedUp over DeepTune - ", percentage(rad_ir2vFA, rad_dt))
print(" % Increase in SpeedUp over DeepTune_TL - ", percentage(rad_ir2vFA, rad_dtTL))
print(" % Increase in SpeedUp over Inst2Vec - ", percentage(rad_ir2vFA, rad_ncc))
print(
    " % Increase in SpeedUp over IR2Vec Symbolic - ",
    percentage(rad_ir2vFA, rad_ir2vSym),
)

print("\nAMD Tahiti 7970")
print(" % Increase in SpeedUp over Magni et al - ", percentage(tah_ir2vFA, tah_magni))
print(" % Increase in SpeedUp over DeepTune - ", percentage(tah_ir2vFA, tah_dt))
print(" % Increase in SpeedUp over DeepTune_TL - ", percentage(tah_ir2vFA, tah_dtTL))
print(" % Increase in SpeedUp over Inst2Vec - ", percentage(tah_ir2vFA, tah_ncc))
print(
    " % Increase in SpeedUp over IR2Vec Symbolic - ",
    percentage(tah_ir2vFA, tah_ir2vSym),
)

print("\nNVIDIA GTX 480")
print(" % Increase in SpeedUp over Magni et al - ", percentage(gtx_ir2vFA, gtx_magni))
print(" % Increase in SpeedUp over DeepTune - ", percentage(gtx_ir2vFA, gtx_dt))
print(" % Increase in SpeedUp over DeepTune_TL - ", percentage(gtx_ir2vFA, gtx_dtTL))
print(" % Increase in SpeedUp over Inst2Vec - ", percentage(gtx_ir2vFA, gtx_ncc))
print(
    " % Increase in SpeedUp over IR2Vec Symbolic - ",
    percentage(gtx_ir2vFA, gtx_ir2vSym),
)

print("\nNVIDIA Tesla K20c")
print(" % Increase in SpeedUp over Magni et al - ", percentage(tes_ir2vFA, tes_magni))
print(" % Increase in SpeedUp over DeepTune - ", percentage(tes_ir2vFA, tes_dt))
print(" % Increase in SpeedUp over DeepTune_TL - ", percentage(tes_ir2vFA, tes_dtTL))
print(" % Increase in SpeedUp over Inst2Vec - ", percentage(tes_ir2vFA, tes_ncc))
print(
    " % Increase in SpeedUp over IR2Vec Symbolic - ",
    percentage(tes_ir2vFA, tes_ir2vSym),
)

AMD Radeon HD 5900
 % Increase in SpeedUp over Magni et al -  27.66
 % Increase in SpeedUp over DeepTune -  5.26
 % Increase in SpeedUp over DeepTune_TL -  5.26
 % Increase in SpeedUp over Inst2Vec -  4.35
 % Increase in SpeedUp over IR2Vec Symbolic -  0.0

AMD Tahiti 7970
 % Increase in SpeedUp over Magni et al -  25.41
 % Increase in SpeedUp over DeepTune -  29.37
 % Increase in SpeedUp over DeepTune_TL -  36.56
 % Increase in SpeedUp over Inst2Vec -  18.17
 % Increase in SpeedUp over IR2Vec Symbolic -  2.08

NVIDIA GTX 480
 % Increase in SpeedUp over Magni et al -  45.31
 % Increase in SpeedUp over DeepTune -  25.21
 % Increase in SpeedUp over DeepTune_TL -  18.89
 % Increase in SpeedUp over Inst2Vec -  23.89
 % Increase in SpeedUp over IR2Vec Symbolic -  3.98

NVIDIA Tesla K20c
 % Increase in SpeedUp over Magni et al -  52.84
 % Increase in SpeedUp over DeepTune -  15.41
 % Increase in SpeedUp over DeepTune_TL -  11.98
 % Increase in SpeedUp over Inst2Vec -  11.98
 % Increase in Sp