## Import libraries

In [1]:
import numpy as np
import pandas as pd
import shap
import lime
import os
import pickle


from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn import svm

from sklearn import tree
import xgboost as xgb
from sklearn.datasets import make_regression
import sklearn.metrics as metrics
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score


import matplotlib.pyplot as plt

## Dataset Importing

In [3]:
df = pd.read_csv('/Users/saumenduroy/Documents/PhD Life/Explainability + SOAR/Explainers code/JM1_Single dataset and single file/Data/jm1.csv')
obj_list = df.columns[df.dtypes == 'object'].values.tolist()
for l in obj_list:
    df[l] = pd.to_numeric(df[l], errors='coerce')
df["defects"] = df["defects"].astype(int)
df.dropna(inplace=True)

In [4]:
df.dtypes

loc                  float64
v(g)                 float64
ev(g)                float64
iv(g)                float64
n                    float64
v                    float64
l                    float64
d                    float64
i                    float64
e                    float64
b                    float64
t                    float64
lOCode                 int64
lOComment              int64
lOBlank                int64
locCodeAndComment      int64
uniq_Op              float64
uniq_Opnd            float64
total_Op             float64
total_Opnd           float64
branchCount          float64
defects                int64
dtype: object

## Select a specific row or column from the dataset and dataset splitting

In [5]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1] 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

## Dataset Balancing

In [6]:
from imblearn.over_sampling import RandomOverSampler

In [7]:
from collections import Counter
oversample =  RandomOverSampler()
X_train,y_train = oversample.fit_resample(X_train,y_train)

In [8]:
Counter(y_train)

Counter({0: 6158, 1: 6158})

In [9]:
Counter(y_test)

Counter({0: 2619, 1: 645})

In [10]:
print("X_Train shape: ", X_train.shape)
print("Train label shape: ", y_train.shape)
print("X_Test shape: ", X_test.shape)
print("Test label shape: ", y_test.shape)

X_Train shape:  (12316, 21)
Train label shape:  (12316,)
X_Test shape:  (3264, 21)
Test label shape:  (3264,)


In [11]:
X_train

Unnamed: 0,loc,v(g),ev(g),iv(g),n,v,l,d,i,e,...,t,lOCode,lOComment,lOBlank,locCodeAndComment,uniq_Op,uniq_Opnd,total_Op,total_Opnd,branchCount
0,4.0,1.0,1.0,1.0,5.0,11.61,0.67,1.50,7.74,17.41,...,0.97,2,0,0,0,3.0,2.0,3.0,2.0,1.0
1,7.0,1.0,1.0,1.0,23.0,89.86,0.30,3.33,26.96,299.53,...,16.64,4,0,1,0,6.0,9.0,13.0,10.0,1.0
2,30.0,3.0,3.0,2.0,0.0,0.00,0.00,0.00,0.00,0.00,...,0.00,0,0,0,0,0.0,0.0,0.0,0.0,5.0
3,104.0,13.0,10.0,13.0,265.0,1640.30,0.04,27.14,60.44,44517.84,...,2473.21,74,25,3,0,23.0,50.0,147.0,118.0,25.0
4,17.0,3.0,1.0,3.0,37.0,162.52,0.09,10.67,15.24,1733.50,...,96.31,12,0,3,0,12.0,9.0,21.0,16.0,5.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12311,60.0,7.0,5.0,7.0,141.0,750.39,0.07,14.69,51.10,11019.64,...,612.20,41,8,9,0,13.0,27.0,80.0,61.0,13.0
12312,67.0,5.0,1.0,4.0,309.0,1725.75,0.03,32.04,53.85,55302.55,...,3072.36,52,1,8,0,15.0,32.0,167.0,140.0,9.0
12313,74.0,13.0,6.0,8.0,205.0,1105.43,0.03,29.24,37.81,32322.63,...,1795.70,50,12,10,0,17.0,25.0,119.0,86.0,25.0
12314,190.0,39.0,3.0,35.0,695.0,4757.62,0.03,30.71,154.91,146112.75,...,8117.38,155,0,33,0,18.0,97.0,364.0,331.0,77.0


In [12]:
df.columns

Index(['loc', 'v(g)', 'ev(g)', 'iv(g)', 'n', 'v', 'l', 'd', 'i', 'e', 'b', 't',
       'lOCode', 'lOComment', 'lOBlank', 'locCodeAndComment', 'uniq_Op',
       'uniq_Opnd', 'total_Op', 'total_Opnd', 'branchCount', 'defects'],
      dtype='object')

## Checking the null value

In [13]:
df.isnull().sum()

loc                  0
v(g)                 0
ev(g)                0
iv(g)                0
n                    0
v                    0
l                    0
d                    0
i                    0
e                    0
b                    0
t                    0
lOCode               0
lOComment            0
lOBlank              0
locCodeAndComment    0
uniq_Op              0
uniq_Opnd            0
total_Op             0
total_Opnd           0
branchCount          0
defects              0
dtype: int64

In [72]:
#our_rf_model = RandomForestClassifier(random_state=0)
#our_rf_model.fit(X_train, y_train) 

In [14]:
## For SVM data fitting
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y_train = le.fit_transform(y_train)

## Create classifier object, Train, and Test 

In [15]:
# Create classifer object
our_rf_model = RandomForestClassifier()
our_dt_model = DecisionTreeClassifier()
our_lr_model = LogisticRegression()
our_mlp_model = MLPClassifier()
our_xgb_model = XGBClassifier()
our_svm_model = svm.SVC()

# Train Classifer
our_rf_model = our_rf_model.fit(X_train,y_train)
our_dt_model = our_dt_model.fit(X_train,y_train)
our_lr_model = our_lr_model.fit(X_train,y_train)
our_mlp_model = our_mlp_model.fit(X_train,y_train)
our_xgb_model = our_xgb_model.fit(X_train,y_train)
our_svm_model = our_svm_model.fit(X_train [:500], y_train[:500])

#Predict the response for test dataset

y_pred_our_rf_model = our_rf_model.predict(X_test)
y_pred_our_dt_model = our_dt_model.predict(X_test)
y_pred_our_lr_model = our_lr_model.predict(X_test)
y_pred_our_mlp_model = our_mlp_model.predict(X_test)
y_pred_our_xgb_model = our_xgb_model.predict(X_test)
y_pred_our_svm_model = our_svm_model.predict(X_test)

## Measuring Accuracy

In [16]:
# Model Accuracy, how often is the classifier correct?
print("Accuracy for Random Forest (RF):",metrics.accuracy_score(y_test, y_pred_our_rf_model))
print("Accuracy for Decission Tree (DT):",metrics.accuracy_score(y_test, y_pred_our_dt_model))
print("Accuracy for Logistic Regression (LR):",metrics.accuracy_score(y_test, y_pred_our_lr_model))
print("Accuracy for Multi-Layer Perceptron Neural Network (MLP):",metrics.accuracy_score(y_test, y_pred_our_mlp_model))
print("Accuracy for Gradient Boosting (XGB):",metrics.accuracy_score(y_test, y_pred_our_xgb_model))
print("Accuracy for Support Vector Machine (SVM):",metrics.accuracy_score(y_test, y_pred_our_svm_model))


Accuracy for Random Forest (RF): 0.7931985294117647
Accuracy for Decission Tree (DT): 0.7401960784313726
Accuracy for Logistic Regression (LR): 0.6387867647058824
Accuracy for Multi-Layer Perceptron Neural Network (MLP): 0.6878063725490197
Accuracy for Gradient Boosting (XGB): 0.7601102941176471
Accuracy for Support Vector Machine (SVM): 0.8017769607843137


## Precision, Recall, and f1 Score, and AUC for RF

In [17]:
print("Precision:",metrics.precision_score(y_test, y_pred_our_rf_model, average = "macro"))
print("Recall:",metrics.recall_score(y_test, y_pred_our_rf_model, average = "macro"))
print("f1 score:",metrics.f1_score(y_test,y_pred_our_rf_model, average = "macro"))

Precision: 0.6619503997093024
Recall: 0.6350844011117327
f1 score: 0.6454598602211982


In [18]:
from sklearn import metrics
u_value= list(y_test.unique())
all_auc = []
for i in u_value:
    fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred_our_rf_model, pos_label=i)
    all_auc.append(metrics.auc(fpr, tpr))
print(all_auc)
print("AUC for RF:",sum(all_auc)/len(all_auc))

[0.36491559888826736, 0.6350844011117327]
AUC for RF: 0.5


In [19]:
list(y_test.unique())

[0, 1]

## Precision, Recall, and f1 Score, and AUC for DT

In [20]:
print("Precision:",metrics.precision_score(y_test, y_pred_our_dt_model, average = "macro"))
print("Recall:",metrics.recall_score(y_test, y_pred_our_dt_model, average = "macro"))
print("f1 score:",metrics.f1_score(y_test,y_pred_our_dt_model, average = "macro"))

Precision: 0.5958267885498476
Recall: 0.5991351216956587
f1 score: 0.5973746683424103


In [21]:
from sklearn import metrics
u_value= list(y_test.unique())
all_auc = []
for i in u_value:
    fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred_our_dt_model, pos_label=i)
    all_auc.append(metrics.auc(fpr, tpr))
print(all_auc)
print("AUC for DT:",sum(all_auc)/len(all_auc))

[0.40086487830434125, 0.5991351216956586]
AUC for DT: 0.49999999999999994


## Precision, Recall, and f1 Score, and AUC for LR

In [22]:
print("Precision:",metrics.precision_score(y_test, y_pred_our_lr_model, average = "micro"))
print("Recall:",metrics.recall_score(y_test, y_pred_our_lr_model, average = "macro"))
print("f1 score:",metrics.f1_score(y_test,y_pred_our_lr_model, average = "macro"))

Precision: 0.6387867647058824
Recall: 0.6463722173384125
f1 score: 0.5784336719698184


In [23]:
from sklearn import metrics
u_value= list(y_test.unique())
all_auc = []
for i in u_value:
    fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred_our_lr_model, pos_label=i)
    all_auc.append(metrics.auc(fpr, tpr))
print(all_auc)
print("AUC for LR:",sum(all_auc)/len(all_auc))

[0.35362778266158756, 0.6463722173384124]
AUC for LR: 0.5


## Precision, Recall, and f1 Score, and AUC for MLP

In [24]:
print("Precision:",metrics.precision_score(y_test, y_pred_our_mlp_model, average = "macro"))
print("Recall:",metrics.recall_score(y_test, y_pred_our_mlp_model, average = "macro"))
print("f1 score:",metrics.f1_score(y_test,y_pred_our_mlp_model, average = "macro"))

Precision: 0.5603846117020768
Recall: 0.5746690108953355
f1 score: 0.5628650522541978


In [25]:
from sklearn import metrics
u_value= list(y_test.unique())
all_auc = []
for i in u_value:
    fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred_our_mlp_model, pos_label=i)
    all_auc.append(metrics.auc(fpr, tpr))
print(all_auc)
print("AUC for MLP:",sum(all_auc)/len(all_auc))

[0.4253309891046645, 0.5746690108953355]
AUC for MLP: 0.5


## Precision, Recall, and f1 Score, and AUC for XGB

In [26]:
print("Precision:",metrics.precision_score(y_test, y_pred_our_xgb_model, average = "macro"))
print("Recall:",metrics.recall_score(y_test, y_pred_our_xgb_model, average = "macro"))
print("f1 score:",metrics.f1_score(y_test,y_pred_our_xgb_model, average = "macro"))

Precision: 0.6338762690423934
Recall: 0.6466013124128684
f1 score: 0.6392975671718152


In [27]:
from sklearn import metrics
u_value= list(y_test.unique())
all_auc = []
for i in u_value:
    fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred_our_xgb_model, pos_label=i)
    all_auc.append(metrics.auc(fpr, tpr))
print(all_auc)
print("AUC for XGB:",sum(all_auc)/len(all_auc))

[0.3533986875871316, 0.6466013124128684]
AUC for XGB: 0.5


## Precision, Recall, and f1 Score, and AUC for SVM

In [28]:
print("Precision:",metrics.precision_score(y_test, y_pred_our_svm_model, average = "micro"))
print("Recall:",metrics.recall_score(y_test, y_pred_our_svm_model, average = "micro"))
print("f1 score:",metrics.f1_score(y_test,y_pred_our_svm_model, average = "micro"))

Precision: 0.8017769607843137
Recall: 0.8017769607843137
f1 score: 0.8017769607843137


In [29]:
from sklearn import metrics
u_value= list(y_test.unique())
all_auc = []
for i in u_value:
    fpr, tpr, thresholds = metrics.roc_curve(y_test, y_pred_our_svm_model, pos_label=i)
    all_auc.append(metrics.auc(fpr, tpr))
print(all_auc)
print("AUC for SVM:",sum(all_auc)/len(all_auc))

[0.49979754388769015, 0.5002024561123098]
AUC for SVM: 0.5


## CV Score for RF

In [30]:
scores = cross_val_score(our_rf_model, X_train, y_train, cv=10)
print('Cross-Validation Accuracy Scores for RF', scores)

Cross-Validation Accuracy Scores for RF [0.93181818 0.90746753 0.94155844 0.93425325 0.94074675 0.93262987
 0.93663688 0.93988627 0.93744923 0.94394801]


## After CV: Minimum, Mean, and Maximum Value for RF

In [31]:
scores = pd.Series(scores)
scores.min(), scores.mean(), scores.max()

(0.9074675324675324, 0.9346394415900914, 0.9439480097481722)

In [32]:
print('Mean Accuracy Scores for RF', scores.mean())

Mean Accuracy Scores for RF 0.9346394415900914


## CV Score for DT

In [33]:
scores = cross_val_score(our_dt_model, X_train, y_train, cv=10)
print('Cross-Validation Accuracy Scores for DT', scores)

Cross-Validation Accuracy Scores for DT [0.87662338 0.87581169 0.91314935 0.89772727 0.90909091 0.90584416
 0.89358245 0.89926889 0.88464663 0.90901706]


## After CV: Minimum, Mean, and Maximum Value for DT

In [34]:
scores = pd.Series(scores)
scores.min(), scores.mean(), scores.max()

(0.8758116883116883, 0.8964761781678922, 0.9131493506493507)

In [35]:
print('Mean Accuracy Scores for DT', scores.mean())

Mean Accuracy Scores for DT 0.8964761781678922


## CV Score for LR

In [36]:
scores = cross_val_score(our_lr_model, X_train, y_train, cv=10)
print('Cross-Validation Accuracy Scores for LR', scores)

Cross-Validation Accuracy Scores for LR [0.63311688 0.5        0.55194805 0.53652597 0.66233766 0.5413961
 0.63119415 0.65150284 0.63444354 0.50040617]


lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
lbfgs failed to converge (status=1):
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


## After CV: Minimum, Mean, and Maximum Value for LR

In [37]:
scores = pd.Series(scores)
scores.min(), scores.mean(), scores.max()

(0.5, 0.5842871385316553, 0.6623376623376623)

In [38]:
print('Mean Accuracy Scores for LR', scores.mean())

Mean Accuracy Scores for LR 0.5842871385316553


## CV Score for MLP

In [39]:
scores = cross_val_score(our_mlp_model, X_train, y_train, cv=10)
print('Cross-Validation Accuracy Scores for MLP', scores)

Cross-Validation Accuracy Scores for MLP [0.61769481 0.64042208 0.57386364 0.52922078 0.625      0.50649351
 0.6320065  0.58082859 0.54995938 0.61575955]


## After CV: Minimum, Mean, and Maximum Value for MLP 

In [40]:
scores = pd.Series(scores)
scores.min(), scores.mean(), scores.max()

(0.5064935064935064, 0.5871248826315846, 0.640422077922078)

In [41]:
print('Mean Accuracy Scores for MLP', scores.mean())

Mean Accuracy Scores for MLP 0.5871248826315846


## CV Score for XGB

In [42]:
scores = cross_val_score(our_xgb_model, X_train, y_train, cv=10)
print('Cross-Validation Accuracy Scores for XGB', scores)

Cross-Validation Accuracy Scores for XGB [0.85551948 0.85551948 0.88717532 0.85064935 0.87662338 0.86850649
 0.87164907 0.86515028 0.86271324 0.87327376]


## After CV: Minimum, Mean, and Maximum Value for XGB

In [43]:
scores = pd.Series(scores)
scores.min(), scores.mean(), scores.max()

(0.8506493506493507, 0.86667798590524, 0.8871753246753247)

In [44]:
print('Mean Accuracy Scores for XGB', scores.mean())

Mean Accuracy Scores for XGB 0.86667798590524


## CV Score for SVM

In [45]:
scores = cross_val_score(our_svm_model, X_train, y_train, cv=10)
print('Cross-Validation Accuracy Scores for SVM', scores)

Cross-Validation Accuracy Scores for SVM [0.56899351 0.58035714 0.56412338 0.56899351 0.57142857 0.56493506
 0.5792039  0.58651503 0.58082859 0.56701868]


## After CV: Minimum, Mean, and Maximum Value for SVM

In [46]:
scores = pd.Series(scores)
scores.min(), scores.mean(), scores.max()

(0.5641233766233766, 0.5732397375167481, 0.586515028432169)

In [47]:
print('Mean Accuracy Scores for XGB', scores.mean())

Mean Accuracy Scores for XGB 0.5732397375167481
