# SGEMM GPU kernel performance (Multinomial)

In [1]:
#importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sklearn
from sklearn.impute import SimpleImputer

#Importing Concrete Compressive Strength Dataset
dataset = pd.read_csv('C:/Users/Manan/Desktop/Fall 2019/Machine Learning COMP6321/Project/Machine-Learning-Project/regression-models/SGEMM GPU kernel performance/data/sgemm_product.csv',sep=",",header=1)
data = pd.DataFrame(dataset)

X = data.iloc[:, :14].values
Y = data.iloc[:, 14:].values
# Has 4 dependant variables; so taking mean of them
Y = Y.mean(axis=1)
print(data.shape)
print(X.shape)
print(Y.shape)

(241599, 18)
(241599, 14)
(241599,)


In [2]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 241599 entries, 0 to 241598
Data columns (total 18 columns):
16        241599 non-null int64
16.1      241599 non-null int64
16.2      241599 non-null int64
8         241599 non-null int64
8.1       241599 non-null int64
8.2       241599 non-null int64
8.3       241599 non-null int64
2         241599 non-null int64
1         241599 non-null int64
1.1       241599 non-null int64
0         241599 non-null int64
0.1       241599 non-null int64
0.2       241599 non-null int64
0.3       241599 non-null int64
115.26    241599 non-null float64
115.87    241599 non-null float64
118.55    241599 non-null float64
115.8     241599 non-null float64
dtypes: float64(4), int64(14)
memory usage: 33.2 MB


In [10]:
# Check if dataset has null values
result = dataset.isna()
print("-> Contains Missing values                 ",end='')
print(result.values.any())
print("-> Total Number of Missing values:         ",end='')
print(result.sum().sum())
print("-> Number of Missing values by column:     ",end='')
print(result.sum())

-> Contains Missing values                 False
-> Total Number of Missing values:         0
-> Number of Missing values by column:     16        0
16.1      0
16.2      0
8         0
8.1       0
8.2       0
8.3       0
2         0
1         0
1.1       0
0         0
0.1       0
0.2       0
0.3       0
115.26    0
115.87    0
118.55    0
115.8     0
dtype: int64


In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.33, random_state = 0)

In [None]:
#Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler().fit(X_train)
X_train = sc.transform(X_train)
X_test = sc.transform(X_test)

In [None]:
# Model Accuracy
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import r2_score
from sklearn.gaussian_process.kernels import DotProduct, WhiteKernel

#Defining kernel for GaussianProcessRegressor
#kernel=None would take less time to train, but would give less accuracy
kernel = DotProduct() + WhiteKernel()

names = ['SVR', 'DecisionTreeRegressor', 'RandomForestRegressor', 'AdaBoostRegressor','GaussianProcessRegressor','LinearRegression','MLPRegressor']
models = [SVR(gamma='scale', C=1.0, epsilon=0.2),
          DecisionTreeRegressor(min_samples_split=5, max_leaf_nodes=10, max_depth=4),
          RandomForestRegressor(max_depth=2, random_state=0,n_estimators=100),
          AdaBoostRegressor(random_state=0, n_estimators=100),
          GaussianProcessRegressor(kernel=kernel,random_state=0),
          LinearRegression(),
          MLPRegressor(hidden_layer_sizes=(100, ), activation='relu', solver='adam')]

accuracy= np.zeros(7)

for counter, model in enumerate(models):
    model.fit(X_train, Y_train)
    Y_pred=model.predict(X_test)
    accuracy[counter] = r2_score(Y_test, Y_pred)*100
    print("Accuracy for " + names[counter] + ":",accuracy[counter])

In [None]:
#Comparision graph between all models
import seaborn as sns
y_pos = np.arange(len(names))
heights = [accuracy[0],accuracy[1],accuracy[2],accuracy[3],accuracy[4],accuracy[5],accuracy[6]]

fig, ax=plt.subplots(1,1,figsize=(12,6))

plt.xticks(rotation='90')
sns.barplot(x=names, y=heights)
plt.ylabel('accuracy score')
plt.title('Wine Quality Dataset model accuracy')