In [20]:

import pandas as pd
import numpy as np

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from catboost import CatBoostClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.model_selection import train_test_split

from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, roc_auc_score

import warnings
warnings.filterwarnings("ignore")

# Load data
opcodeDataset = pd.read_csv(r'feature/opcodeFeature.csv',)
accountDataSet = pd.read_csv(r'feature/accountFeature.csv',)

# Merge dataset
dataset = pd.merge(opcodeDataset,accountDataSet,on='addr')

# Split data into train and test sets
X = dataset.iloc[:,1:92]
Y = dataset.iloc[:,92]
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.33,random_state=7)

# Create a model dictionary
models = {"Logistic Regression": LogisticRegression(),
          "K-Nearest Neighbors": KNeighborsClassifier(),
          "Support Vector Machine": SVC(probability=True),
          "Decision Tree": DecisionTreeClassifier(),
          "Random Forest": RandomForestClassifier(),
          "Ada Boost": AdaBoostClassifier(),
          "XGBoost": XGBClassifier(),
          "LightGBM": LGBMClassifier(),
          "CatBoost": CatBoostClassifier(verbose=0),
          "Neural Network": MLPClassifier()
         }

# Fit the models
for name, model in models.items():
    model.fit(X_train, y_train)

[LightGBM] [Info] Number of positive: 140, number of negative: 2401
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.015783 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 8357
[LightGBM] [Info] Number of data points in the train set: 2541, number of used features: 91
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.055096 -> initscore=-2.841998
[LightGBM] [Info] Start training from score -2.841998


In [22]:
# Accuracy
print("Accuracy")
for name, model in models.items():
    print(name + ": {:.3f}".format(accuracy_score(y_test, model.predict(X_test))))

Accuracy
Logistic Regression: 0.950
K-Nearest Neighbors: 0.958
Support Vector Machine: 0.950
Decision Tree: 0.968
Random Forest: 0.984
Ada Boost: 0.970
XGBoost: 0.982
LightGBM: 0.983
CatBoost: 0.981
Neural Network: 0.937


In [23]:
# Precision
print("Precision")
for name, model in models.items():
    print(name + ": {:.3f}".format(precision_score(y_test, model.predict(X_test))))

Precision
Logistic Regression: 0.000
K-Nearest Neighbors: 0.634
Support Vector Machine: 0.000
Decision Tree: 0.672
Random Forest: 0.957
Ada Boost: 0.778
XGBoost: 0.956
LightGBM: 0.977
CatBoost: 0.915
Neural Network: 0.395


In [24]:
# Recall
print("Recall")
for name, model in models.items():
    print(name + ": {:.3f}".format(recall_score(y_test, model.predict(X_test))))

Recall
Logistic Regression: 0.000
K-Nearest Neighbors: 0.413
Support Vector Machine: 0.000
Decision Tree: 0.714
Random Forest: 0.714
Ada Boost: 0.556
XGBoost: 0.683
LightGBM: 0.683
CatBoost: 0.683
Neural Network: 0.476


In [25]:
# F1-score
print("F1-Score")
for name, model in models.items():
    print(name + ": {:.3f}".format(f1_score(y_test, model.predict(X_test))))

F1-Score
Logistic Regression: 0.000
K-Nearest Neighbors: 0.500
Support Vector Machine: 0.000
Decision Tree: 0.692
Random Forest: 0.818
Ada Boost: 0.648
XGBoost: 0.796
LightGBM: 0.804
CatBoost: 0.782
Neural Network: 0.432


In [26]:
# AUC Score
print("AUC Score")
for name, model in models.items():
    print(name + ": {:.3f}".format(roc_auc_score(y_test,model.predict_proba(X_test)[:,1])))

AUC Score
Logistic Regression: 0.441
K-Nearest Neighbors: 0.772
Support Vector Machine: 0.561
Decision Tree: 0.848
Random Forest: 0.947
Ada Boost: 0.931
XGBoost: 0.938
LightGBM: 0.952
CatBoost: 0.940
Neural Network: 0.632
