In [1]:
# after running 2-build_neuralnets, you will obtain the best configuration of an NN model
# rerun the model with "identified best configuration"
# then save the model for future use

In [2]:
#
# for laziness, i copied paste the code in 2-xxx, you can save the data in 2-xxx, to avoid the overhead
#

In [3]:
import os,sys
from sklearn import preprocessing

import json
import codecs
import yaml
import numpy as np
import pandas as pd

sys.path.append('../utils')
from cudaMetrics import *

In [4]:
df_app_scale = pd.read_csv('app_proflingmetrics_scaled.csv')
df_metrics = df_app_scale.drop(df_app_scale.columns[0], axis=1) # drop the 1st column

In [5]:
df_metrics

Unnamed: 0,AppName,inst_control,stall_constant_memory_dependency,gst_efficiency,inst_fp_64,local_store_transactions,stall_not_selected,global_hit_rate,stall_exec_dependency,shared_store_transactions,...,tex_fu_utilization,l2_write_throughput,l2_tex_write_throughput,atomic_transactions,gld_efficiency,sm_efficiency,l2_tex_write_hit_rate,shared_store_transactions_per_request,local_memory_overhead,flop_count_sp_fma
0,rodinia_b+tree,2.309659e-02,0.000236,0.225000,0.000000,0.000000,0.050379,0.684131,0.046910,0.000000,...,0.3,0.006123,0.006123,0.000000,0.631774,0.998709,9.937400e-01,0.000000,0.000000e+00,0.000000e+00
1,cudasdk_threadFenceReduction,2.686329e-04,0.007774,0.125000,0.000000,0.000000,0.009624,0.572213,0.034944,0.000101,...,0.2,0.000500,0.000241,0.000757,1.000000,0.928899,9.696970e-01,0.149817,0.000000e+00,0.000000e+00
2,cudasdk_convolutionFFT2D,1.908833e-03,0.360177,1.000000,0.000000,0.000000,0.139878,0.858320,0.299212,0.031246,...,0.3,0.310734,0.310734,0.000000,1.000000,0.995167,9.995885e-01,0.172893,0.000000e+00,2.685552e-03
3,shoc_lev1BFS,4.382805e-05,0.030554,0.154152,0.000000,0.000000,0.040641,0.822558,0.176929,0.000000,...,0.1,0.001057,0.001007,0.000000,0.133829,0.122310,9.000000e-01,0.000000,0.000000e+00,0.000000e+00
4,rodinia_lavaMD,2.613412e-01,0.000001,0.250000,1.000000,0.000000,0.000211,0.043188,1.000000,0.305451,...,0.1,0.006502,0.006502,0.000000,0.270166,0.997335,1.000000e+00,0.973673,0.000000e+00,0.000000e+00
5,poly_gemm,1.906970e-03,0.000079,1.000000,0.000000,0.000000,0.004901,0.667458,0.027770,0.000000,...,0.8,0.899023,0.899023,0.000000,0.825273,0.978193,1.000000e+00,0.000000,0.000000e+00,1.562500e-02
6,rodinia_hybridsort,9.688398e-03,0.002168,1.000000,0.000000,0.000000,0.032906,0.572213,0.444358,0.063761,...,0.3,0.344550,0.344543,0.096881,1.000000,0.994942,9.990244e-01,0.466314,0.000000e+00,3.417968e-03
7,cudasdk_MCEstimatePiInlineQ,1.696558e-04,0.001200,0.125000,0.000000,0.033385,0.000888,0.273338,0.085343,0.000139,...,0.1,0.283169,0.283169,0.000000,0.125000,0.971919,2.984118e-01,0.172893,2.752721e-01,3.492460e-05
8,cudasdk_MCEstimatePiInlineP,2.303844e-04,0.053062,0.232143,0.000000,0.020093,0.097135,0.831953,0.113852,0.000139,...,0.8,0.031818,0.031818,0.000000,0.250000,0.940444,9.587629e-01,0.172893,2.784494e-02,3.492460e-05
9,cudasdk_shflscan,8.838533e-04,0.119633,1.000000,0.000000,0.000000,0.067034,0.572213,0.331733,0.023229,...,0.3,0.444296,0.444284,0.000000,1.000000,0.981929,1.000000e+00,0.259021,0.000000e+00,0.000000e+00


In [6]:
df_metrics.to_csv('appmetrics_with_appname.csv', encoding='utf-8')

In [7]:

fastdev_dd = np.load("../05_whichgpufast/home-fastdev.npy").item()
targetpps = list(fastdev_dd.keys())

# drop apps not needed
df_metrics_cp = df_metrics.copy()
for index, row in df_metrics_cp.iterrows():
    appName = row['AppName']
    if not appName in targetpps:
        print("[warning] delete current row in the dataframe ({})".format(appName))
        df_metrics.drop(df_metrics[df_metrics.AppName == appName].index, inplace=True)

# add ground truth
df_metrics['best_dev'] = 1 
for index, row in df_metrics.iterrows():
    appName =  row['AppName']
    fastdev = fastdev_dd[appName]
    df_metrics.loc[index, 'best_dev'] = fastdev
    

def gen_model_input(df_dataset):
    df_X = df_dataset.drop(['AppName', 'best_dev'], axis=1)
    df_y = df_dataset['best_dev']
    df_y = df_y.astype('int64')  # convert obj to int
    return df_X, df_y

df_X, df_y = gen_model_input(df_metrics)
df_X = df_X.as_matrix().astype(np.float)
df_y = df_y.as_matrix().astype(np.int32)



In [8]:
from sklearn.model_selection import StratifiedKFold  # StratifiedKFold
from sklearn import metrics
from sklearn.neural_network import MLPClassifier
import pickle

### build NN model with the target param

In [9]:
pkl_filename = "output_model.pkl"

fold_k = 5
kf = StratifiedKFold(n_splits=fold_k, random_state=314159, shuffle=True)

minError = 1.0
for train_index, test_index in kf.split(df_X, df_y):            
    X_train, X_test = df_X[train_index], df_X[test_index]
    y_train, y_test = df_y[train_index], df_y[test_index]

    clsfy = MLPClassifier(hidden_layer_sizes=(100, 100, 100),
                          activation='identity',
                          solver='adam',
                          alpha=1.0,
                          max_iter=1000) # max 1K iterations
            
    clsfy.fit(X_train, y_train)
    err = metrics.mean_absolute_error(y_test, clsfy.predict(X_test))
    result = clsfy.score(X_test, y_test)
    print("error : {}".format(err))
    print("result: {}\n".format(result))
    if err < minError:
        # save the model 
        with open(pkl_filename, 'wb') as file: pickle.dump(clsfy, file)

error : 0.0
result: 1.0

error : 0.0666666666667
result: 0.933333333333

error : 0.0666666666667
result: 0.933333333333

error : 0.0
result: 1.0

error : 0.0714285714286
result: 0.928571428571



In [10]:
# # load the model from disk
# loaded_model = pickle.load(open(filename, 'rb'))
# result = loaded_model.score(X_test, Y_test)
# print(result)

### test the trained model

In [11]:
df_app_metrics = pd.read_csv('appmetrics_with_appname.csv')
df_app_metrics = df_app_metrics.drop(df_app_metrics.columns[0], axis=1) # drop the 1st column

In [12]:
df_app_metrics

Unnamed: 0,AppName,inst_control,stall_constant_memory_dependency,gst_efficiency,inst_fp_64,local_store_transactions,stall_not_selected,global_hit_rate,stall_exec_dependency,shared_store_transactions,...,tex_fu_utilization,l2_write_throughput,l2_tex_write_throughput,atomic_transactions,gld_efficiency,sm_efficiency,l2_tex_write_hit_rate,shared_store_transactions_per_request,local_memory_overhead,flop_count_sp_fma
0,rodinia_b+tree,2.309659e-02,0.000236,0.225000,0.000000,0.000000,0.050379,0.684131,0.046910,0.000000,...,0.3,0.006123,0.006123,0.000000,0.631774,0.998709,9.937400e-01,0.000000,0.000000e+00,0.000000e+00
1,cudasdk_threadFenceReduction,2.686329e-04,0.007774,0.125000,0.000000,0.000000,0.009624,0.572213,0.034944,0.000101,...,0.2,0.000500,0.000241,0.000757,1.000000,0.928899,9.696970e-01,0.149817,0.000000e+00,0.000000e+00
2,cudasdk_convolutionFFT2D,1.908833e-03,0.360177,1.000000,0.000000,0.000000,0.139878,0.858320,0.299212,0.031246,...,0.3,0.310734,0.310734,0.000000,1.000000,0.995167,9.995885e-01,0.172893,0.000000e+00,2.685552e-03
3,shoc_lev1BFS,4.382805e-05,0.030554,0.154152,0.000000,0.000000,0.040641,0.822558,0.176929,0.000000,...,0.1,0.001057,0.001007,0.000000,0.133829,0.122310,9.000000e-01,0.000000,0.000000e+00,0.000000e+00
4,rodinia_lavaMD,2.613412e-01,0.000001,0.250000,1.000000,0.000000,0.000211,0.043188,1.000000,0.305451,...,0.1,0.006502,0.006502,0.000000,0.270166,0.997335,1.000000e+00,0.973673,0.000000e+00,0.000000e+00
5,poly_gemm,1.906970e-03,0.000079,1.000000,0.000000,0.000000,0.004901,0.667458,0.027770,0.000000,...,0.8,0.899023,0.899023,0.000000,0.825273,0.978193,1.000000e+00,0.000000,0.000000e+00,1.562500e-02
6,rodinia_hybridsort,9.688398e-03,0.002168,1.000000,0.000000,0.000000,0.032906,0.572213,0.444358,0.063761,...,0.3,0.344550,0.344543,0.096881,1.000000,0.994942,9.990244e-01,0.466314,0.000000e+00,3.417968e-03
7,cudasdk_MCEstimatePiInlineQ,1.696558e-04,0.001200,0.125000,0.000000,0.033385,0.000888,0.273338,0.085343,0.000139,...,0.1,0.283169,0.283169,0.000000,0.125000,0.971919,2.984118e-01,0.172893,2.752721e-01,3.492460e-05
8,cudasdk_MCEstimatePiInlineP,2.303844e-04,0.053062,0.232143,0.000000,0.020093,0.097135,0.831953,0.113852,0.000139,...,0.8,0.031818,0.031818,0.000000,0.250000,0.940444,9.587629e-01,0.172893,2.784494e-02,3.492460e-05
9,cudasdk_shflscan,8.838533e-04,0.119633,1.000000,0.000000,0.000000,0.067034,0.572213,0.331733,0.023229,...,0.3,0.444296,0.444284,0.000000,1.000000,0.981929,1.000000e+00,0.259021,0.000000e+00,0.000000e+00


In [13]:
# load the model from disk
loaded_model = pickle.load(open('output_model.pkl', 'rb'))

### test 1

In [14]:
df_test1 = df_app_metrics.copy()

df_X1 = df_test1.drop(df_test1.columns[0], axis=1) # drop the 1st column : "AppName"

loaded_model.predict(df_X1)

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1], dtype=int32)

### test2

In [15]:
df_test2 = df_app_metrics.copy()
df_test2 = df_test2.loc[df_test2['AppName'] == 'rodinia_b+tree']

In [16]:
df_test2

Unnamed: 0,AppName,inst_control,stall_constant_memory_dependency,gst_efficiency,inst_fp_64,local_store_transactions,stall_not_selected,global_hit_rate,stall_exec_dependency,shared_store_transactions,...,tex_fu_utilization,l2_write_throughput,l2_tex_write_throughput,atomic_transactions,gld_efficiency,sm_efficiency,l2_tex_write_hit_rate,shared_store_transactions_per_request,local_memory_overhead,flop_count_sp_fma
0,rodinia_b+tree,0.023097,0.000236,0.225,0.0,0.0,0.050379,0.684131,0.04691,0.0,...,0.3,0.006123,0.006123,0.0,0.631774,0.998709,0.99374,0.0,0.0,0.0


In [17]:
df_test2 = df_test2.drop(df_test2.columns[0], axis=1) # drop the 1st column : "AppName"

loaded_model.predict(df_test2)

array([1], dtype=int32)