In [1]:
import numpy as np
import pandas as pd
import seaborn as sns # libreria utile per matrice di confusione
# import plotly.express as px
from collections import Counter
from matplotlib import pyplot as plt
from sklearn import tree
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, fbeta_score, make_scorer, confusion_matrix, precision_recall_curve
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
#import shap
import lightgbm as lgb
import glob
from sklearn.feature_selection import RFE

In [2]:
plt.rcParams["mathtext.fontset"]

'dejavusans'

In [3]:
plt.rcParams.update({'font.size': 28, 'font.family': 'STIXGeneral', 'mathtext.fontset': 'stix'})

In [4]:
from sklearn.metrics import roc_curve, roc_auc_score

def plot_roc_curve(fpr, tpr, auc_score,label=None):
    plt.plot(fpr, tpr, linewidth=2, label=label)
    plt.plot([0, 1], [0, 1], 'k--') # dashed diagonal
    plt.axis([0, 1, 0, 1])
    plt.xlabel('False Positive Rate (Fall-Out)', fontsize=16)
    plt.ylabel('True Positive Rate (Recall)', fontsize=16)
    plt.title('AUC Score: {}'.format(auc_score))
    plt.grid(True)
    plt.show()

In [5]:
def best_classifiers_cv(df_cv,data,X_train,y_train,X_test,y_test):
    df_cv['classifier'] = df_cv.param_classifier.apply(lambda x: x.__str__().split('(')[0])
    df_cv['ranking_by_classifier'] = df_cv.sort_values(by='rank_test_score').groupby(['classifier'])['rank_test_score'].cumcount() + 1
    for row in df_cv[df_cv['ranking_by_classifier']==1].iterrows():
        cls = row[1]['param_classifier']
        name_classifier = row[1]['classifier']
        print(row[1]['classifier'])
        cls.fit(X_train,y_train.ravel())
        y_pred_train = cls.predict(X_train)
        data['precision_training_{}'.format(name_classifier)] = [precision_score(y_train, y_pred_train)] # TP / (FP + TP)
        data['recall_training_{}'.format(name_classifier)] = [recall_score(y_train, y_pred_train)]	     # TP / (FN + TP)
        data['accuracy_training_{}'.format(name_classifier)] = [accuracy_score(y_train, y_pred_train)]	 # (TP + TN)/ (TP + FN + TN + FP)
        data['f1_training_{}'.format(name_classifier)] = [f1_score(y_train, y_pred_train)]

        # Confusion matrix
        y_pred = cls.predict(X_test)
        data['precision_test_{}'.format(name_classifier)] = [precision_score(y_test, y_pred)] # TP / (FP + TP)
        data['recall_test_{}'.format(name_classifier)] = [recall_score(y_test, y_pred)]	     # TP / (FN + TP)
        data['accuracy_test_{}'.format(name_classifier)] = [accuracy_score(y_test, y_pred)]	 # (TP + TN)/ (TP + FN + TN + FP)
        data['f1_test_{}'.format(name_classifier)] = [f1_score(y_test, y_pred)]

        y_pred_proba = cls.predict_proba(X_test)[:, 1]

        fpr, tpr, thresholds = roc_curve(y_test,y_pred_proba)
        auc_score = roc_auc_score(y_test,y_pred_proba)

        data['auc_score_test_{}'.format(name_classifier)] = auc_score

In [6]:
transaction_frequency_opcode = ['address', 'balance', 'lifetime', 'tx_in', 'tx_out', 'investment_in',
 'payment_out', 'investment_to_contract/tx_in', 'payment_from_contract/tx_out',
 '#addresses_paying_contract', '#addresses_paid_by_contract', 'mean_v1', 'sdev_v1', 'mean_v2', 'sdev_v2', 'paid_rate',
 'paid_one', 'percentage_some_tx_in', 'sdev_tx_in', 'percentage_some_tx_out', 'sdev_tx_out', 'owner_gets_eth_Wo_investing',
 'owner_gets_eth_investing', 'owner_no_eth', 'PUSH', 'DUP', 'JUMPDEST', 'STOP', 'MSTORE', 'JUMPI', 'REVERT', 'CALLVALUE',
 'ISZERO', 'CODECOPY', 'RETURN', 'LOG', 'SHA3', 'MSTORE8', 'SWAP', 'POP', 'ADD', 'MLOAD', 'AND', 'SUB', 'CALLDATALOAD', 'EXP',
 'MUL', 'SLOAD', 'EQ', 'JUMP', 'DIV', 'CALLER', 'CALLDATACOPY', 'SSTORE', 'NOT', 'CALL', 'LT', 'GT', 'OR', 'ADDRESS', 'TIMESTAMP',
 'GASLIMIT', 'GAS', 'ORIGIN', 'BALANCE', 'CALLDATASIZE', 'SAR', 'MSIZE', 'CODESIZE', 'COINBASE', 'CREATE2', 'EXTCODESIZE', 'CALLCODE', 'SHL',
 'BLOCKHASH', 'RETURNDATASIZE', 'SHR', 'GETPC', 'DELEGATECALL', 'MOD', 'ADDMOD', 'NUMBER', 'XOR', 'SLT', 'EXTCODECOPY', 'MULMOD', 'CREATE', 'SELFDESTRUCT',
 'STATICCALL', 'RETURNDATACOPY', 'SGT', 'DIFFICULTY', 'SMOD', 'BYTE', 'SIGNEXTEND', 'CHAINID', 'SELFBALANCE', 'GASPRICE', 'EXTCODEHASH', 'SDIV', 'target']

In [7]:
only_opcode = [ 'address','PUSH', 'DUP', 'JUMPDEST', 'STOP', 'MSTORE', 'JUMPI', 'REVERT', 'CALLVALUE', 'ISZERO', 'CODECOPY', 'RETURN', 'LOG',
 'SHA3', 'MSTORE8', 'SWAP', 'POP', 'ADD', 'MLOAD', 'AND', 'SUB', 'CALLDATALOAD', 'EXP', 'MUL', 'SLOAD', 'EQ', 'JUMP', 'DIV', 'CALLER', 'CALLDATACOPY',
 'SSTORE', 'NOT', 'CALL', 'LT', 'GT', 'OR', 'ADDRESS', 'TIMESTAMP', 'GASLIMIT', 'GAS', 'ORIGIN', 'BALANCE', 'CALLDATASIZE', 'SAR', 'MSIZE', 'CODESIZE',
 'COINBASE', 'CREATE2', 'EXTCODESIZE', 'CALLCODE', 'SHL', 'BLOCKHASH', 'RETURNDATASIZE', 'SHR', 'GETPC', 'DELEGATECALL', 'MOD', 'ADDMOD', 'NUMBER', 'XOR',
 'SLT', 'EXTCODECOPY', 'MULMOD', 'CREATE', 'SELFDESTRUCT', 'STATICCALL', 'RETURNDATACOPY', 'SGT', 'DIFFICULTY', 'SMOD', 'BYTE', 'SIGNEXTEND', 'CHAINID',
 'SELFBALANCE', 'GASPRICE', 'EXTCODEHASH', 'SDIV',
 'target']

In [8]:
pd.set_option('display.max_columns', None)

In [9]:
ds_full_path= (r"C:\Users\lucap\OneDrive\Desktop\Scam on blockchain\Smart Ponzi\DS_full_bytecode.csv")
db = pd.read_csv(ds_full_path, header = 0, delimiter = ',')
db = db.fillna(0)
db

Unnamed: 0,address,balance,lifetime,tx_in,tx_out,investment_in,payment_out,investment_to_contract/tx_in,payment_from_contract/tx_out,#addresses_paying_contract,#addresses_paid_by_contract,mean_v1,sdev_v1,mean_v2,sdev_v2,paid_rate,paid_one,percentage_some_tx_in,sdev_tx_in,percentage_some_tx_out,sdev_tx_out,owner_gets_eth_Wo_investing,owner_gets_eth_investing,owner_no_eth,PUSH,DUP,JUMPDEST,STOP,MSTORE,JUMPI,REVERT,CALLVALUE,ISZERO,CODECOPY,RETURN,LOG,SHA3,MSTORE8,SWAP,POP,ADD,MLOAD,AND,SUB,CALLDATALOAD,EXP,MUL,SLOAD,EQ,JUMP,DIV,CALLER,CALLDATACOPY,SSTORE,NOT,CALL,LT,GT,OR,ADDRESS,TIMESTAMP,GASLIMIT,GAS,ORIGIN,BALANCE,CALLDATASIZE,SAR,MSIZE,CODESIZE,COINBASE,CREATE2,EXTCODESIZE,CALLCODE,SHL,BLOCKHASH,RETURNDATASIZE,SHR,GETPC,DELEGATECALL,MOD,ADDMOD,NUMBER,XOR,SLT,EXTCODECOPY,MULMOD,CREATE,SELFDESTRUCT,STATICCALL,RETURNDATACOPY,SGT,DIFFICULTY,SMOD,BYTE,SIGNEXTEND,CHAINID,SELFBALANCE,GASPRICE,EXTCODEHASH,SDIV,target
0,0x0006157838d5a6b33ab66588a6a693a57c869999,0.000691,117,2,0,1,0,0.5000,0.0,1,0,-1.000000,0.000000,-0.000691,0.000000,0.0000,0.0,0.0171,0.000000,0.0000,0.000000,0,0,1,13.0,3.0,3.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0x001a589dda0d6be37632925eaf1256986b2c6ad0,29.408568,318,11346,4,9685,4,0.8536,1.0,39,1,-242.025000,509.123977,-0.735203,7.530951,0.0004,0.0,0.5252,92.723961,0.0126,0.000000,0,0,1,1040.0,1126.0,90.0,2.0,166.0,86.0,0.0,4.0,71.0,5.0,3.0,3.0,33.0,0.0,913.0,468.0,345.0,105.0,97.0,92.0,91.0,79.0,67.0,60.0,54.0,52.0,45.0,41.0,34.0,21.0,20.0,15.0,15.0,9.0,6.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0x00674045bb7c17f0aa1cde34780d6c51af548728,151.824186,1366,29,12,2,12,0.0690,1.0,2,7,1.111111,1.448712,-16.869354,765.672046,6.0000,0.0,0.0124,0.455645,0.0088,0.000000,0,0,1,1175.0,720.0,340.0,151.0,159.0,165.0,49.0,29.0,153.0,2.0,19.0,13.0,58.0,0.0,547.0,199.0,193.0,109.0,114.0,109.0,28.0,78.0,33.0,112.0,34.0,110.0,17.0,12.0,1.0,33.0,21.0,1.0,40.0,20.0,12.0,5.0,3.0,0.0,1.0,0.0,0.0,1.0,0.0,8.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0x006bea43baa3f7a6f765f14f10a1a1b08334ef45,0.000000,21,9708,0,0,0,0.0000,0.0,0,0,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,1.0476,236.751919,0.0000,0.000000,0,0,1,826.0,570.0,216.0,8.0,133.0,103.0,14.0,21.0,105.0,5.0,15.0,12.0,31.0,0.0,464.0,127.0,118.0,95.0,123.0,112.0,20.0,67.0,15.0,55.0,32.0,79.0,16.0,15.0,0.0,28.0,21.0,2.0,7.0,13.0,9.0,7.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0x00a9f7d093c46d95f0318e4a6ffc6ed68f73044c,0.000000,0,78,1,76,1,0.9744,1.0,65,1,-1.136364,0.488857,0.000000,97.843007,0.0132,0.0,1.0000,0.000000,1.0000,0.000000,0,0,1,660.0,353.0,188.0,211.0,60.0,102.0,18.0,31.0,107.0,2.0,19.0,7.0,5.0,0.0,311.0,76.0,87.0,86.0,79.0,74.0,3.0,44.0,15.0,72.0,28.0,67.0,17.0,14.0,0.0,29.0,13.0,8.0,9.0,8.0,9.0,3.0,3.0,0.0,6.0,1.0,2.0,1.0,0.0,2.0,0.0,0.0,0.0,6.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7441,0xfd784da5c740c617aafb80399fa81b86e1da99a5,0.000000,461,9964,0,0,0,0.0000,0.0,0,0,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.9718,19.446353,0.0000,0.000000,0,0,1,422.0,231.0,94.0,8.0,65.0,42.0,6.0,14.0,37.0,1.0,8.0,4.0,21.0,0.0,178.0,95.0,81.0,30.0,84.0,21.0,12.0,10.0,6.0,18.0,20.0,34.0,6.0,12.0,0.0,11.0,5.0,0.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7442,0xfd7e33bc01a7493b189ddfdcc047500463be573a,0.000000,1,3,0,0,0,0.0000,0.0,0,0,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,2.0000,0.500000,0.0000,0.000000,0,0,1,549.0,352.0,92.0,3.0,84.0,56.0,27.0,17.0,43.0,1.0,6.0,7.0,23.0,0.0,257.0,68.0,69.0,46.0,86.0,68.0,18.0,44.0,13.0,34.0,19.0,50.0,12.0,12.0,1.0,18.0,7.0,2.0,7.0,4.0,5.0,1.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7443,0xfd82ffc0d08dcfd902cbf6e48550033b01f919cc,0.000000,48,8,2,2,2,0.2500,1.0,2,1,0.000000,1.414214,0.000000,0.350660,1.0000,0.0,0.1458,0.349927,0.0417,0.000000,0,0,1,406.0,200.0,136.0,10.0,29.0,58.0,25.0,21.0,49.0,1.0,11.0,2.0,5.0,0.0,139.0,55.0,41.0,33.0,45.0,42.0,9.0,23.0,6.0,35.0,23.0,56.0,7.0,8.0,0.0,21.0,6.0,1.0,8.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7444,0xfd9683e9f2c62e08b6bf68123e18e527efa8fbbc,0.000000,1483,6,2,2,2,0.3333,1.0,2,2,0.000000,0.000000,0.000000,0.500000,1.0000,1.0,0.0027,0.866025,0.0013,0.000000,0,1,0,1593.0,719.0,414.0,37.0,166.0,183.0,84.0,38.0,178.0,2.0,31.0,15.0,45.0,0.0,583.0,181.0,94.0,125.0,243.0,171.0,23.0,158.0,18.0,120.0,68.0,144.0,41.0,39.0,0.0,41.0,23.0,6.0,9.0,41.0,20.0,1.0,14.0,1.0,3.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# Recall Optimization

In [10]:
def get_grid_search_recall(dataset,name):
    data = {}
    data['test'] = name
    X = dataset.iloc[:, 1:-1].values # rimuovo la prima colonna che contiene gli indirizzi
    y = dataset.loc[:, ['target']].values


    print(dataset.target.value_counts())
    print(X.shape,y.shape,dataset.columns)

    # divido in train set (TR) e test set (TS)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state = 42, stratify=y)

    flat_list_train = [item for sublist in y_train.tolist() for item in sublist]
    print('Number of smart contract in train set::', flat_list_train.count(0))
    print('Number of smart Ponzi in train set::', flat_list_train.count(1))
    flat_list_test = [item for sublist in y_test.tolist() for item in sublist]
    print('Number of smart contract in test set::', flat_list_test.count(0))
    print('Number of smart Ponzi in test set::', flat_list_test.count(1))
    pipeline = Pipeline(steps=[('classifier',RandomForestClassifier(random_state=42))])

    params =[
      {
          'classifier': [DecisionTreeClassifier(random_state=42)],
          'classifier__criterion' : ['entropy', 'gini'],
          'classifier__max_depth' : [5, 9],
          'classifier__min_samples_split' : [5, 15], # il valore tipico sta fra 1 e 40
          'classifier__max_features' : range(6,X_train.shape[1],3),
         'classifier__class_weight' : [{0:1, 1:4}, {0:1, 1:4.5}] # pesi, il valore raccomandato e' sum(negative instances)/sum(positive instances)
     },

     {
         'classifier': [RandomForestClassifier(random_state=42)],
         'classifier__n_estimators':[150, 200, 250],
         'classifier__min_samples_split' : [5,15],
         'classifier__criterion':['gini', 'entropy'],
         'classifier__class_weight' : [{0:1, 1:5.5}],
         'classifier__bootstrap': [True, False]
         },

      {
          'classifier': [lgb.LGBMClassifier(boosting_type='gbdt', n_jobs=4, importance_type='split',random_state=42)],
          'classifier__learning_rate' : [0.1,0.01],
          'classifier__n_estimators' : [80, 100, 120, 140],
          'classifier__max_depth' : [10,15,20],
          'classifier__colsample_bytree' : [0.5,0.8,1],
          'classifier__reg_alpha' : [0, 0.1, 0.2],
          'classifier__reg_lambda' : [1,10,15]
      }

         ]
    grid_search = GridSearchCV(pipeline, params, cv=10,
                               scoring='recall',
                               return_train_score=True,n_jobs=4,verbose=2)
    grid_search.fit(X_train, y_train.ravel())
    df_cv = pd.DataFrame(grid_search.cv_results_)
    best_classifiers_cv(df_cv,data,X_train,y_train,X_test,y_test)

    data['best hyperparameters'] = [grid_search.best_params_]
    data['best score'] = [grid_search.best_score_]

    y_pred_train = grid_search.best_estimator_.predict(X_train)
    data['best_precision_training'] = [precision_score(y_train, y_pred_train)] # TP / (FP + TP)
    data['best_recall_training'] = [recall_score(y_train, y_pred_train)]	     # TP / (FN + TP)
    data['best_accuracy_training'] = [accuracy_score(y_train, y_pred_train)]	 # (TP + TN)/ (TP + FN + TN + FP)
    data['best_f1_training'] = [f1_score(y_train, y_pred_train)]

    # Confusion matrix
    y_pred = grid_search.best_estimator_.predict(X_test)
    data['best_precision_test'] = [precision_score(y_test, y_pred)] # TP / (FP + TP)
    data['best_recall_test'] = [recall_score(y_test, y_pred)]	     # TP / (FN + TP)
    data['best_accuracy_test'] = [accuracy_score(y_test, y_pred)]	 # (TP + TN)/ (TP + FN + TN + FP)
    data['best_f1_test'] = [f1_score(y_test, y_pred)]
    return grid_search,data

df_transaction_frequency_opcode

In [11]:
df_trans_freq_dpl_opcode =  db[transaction_frequency_opcode].copy()
df_trans_freq_dpl_opcode = df_trans_freq_dpl_opcode.drop_duplicates(subset='address')

In [12]:
grid_search_transaction_frequency_opcode,data_transaction_frequency_opcode = get_grid_search_recall(df_trans_freq_dpl_opcode,'trans_freq_dpl_opcode')

target
0.0    6566
1.0     880
Name: count, dtype: int64
(7446, 99) (7446, 1) Index(['address', 'balance', 'lifetime', 'tx_in', 'tx_out', 'investment_in',
       'payment_out', 'investment_to_contract/tx_in',
       'payment_from_contract/tx_out', '#addresses_paying_contract',
       ...
       'DIFFICULTY', 'SMOD', 'BYTE', 'SIGNEXTEND', 'CHAINID', 'SELFBALANCE',
       'GASPRICE', 'EXTCODEHASH', 'SDIV', 'target'],
      dtype='object', length=101)
Number of smart contract in train set:: 5581
Number of smart Ponzi in train set:: 748
Number of smart contract in test set:: 985
Number of smart Ponzi in test set:: 132
Fitting 10 folds for each of 1168 candidates, totalling 11680 fits


DecisionTreeClassifier
RandomForestClassifier
LGBMClassifier


In [13]:
df_results_depl = pd.DataFrame(data_transaction_frequency_opcode)
df_results_depl

Unnamed: 0,test,precision_training_DecisionTreeClassifier,recall_training_DecisionTreeClassifier,accuracy_training_DecisionTreeClassifier,f1_training_DecisionTreeClassifier,precision_test_DecisionTreeClassifier,recall_test_DecisionTreeClassifier,accuracy_test_DecisionTreeClassifier,f1_test_DecisionTreeClassifier,auc_score_test_DecisionTreeClassifier,precision_training_RandomForestClassifier,recall_training_RandomForestClassifier,accuracy_training_RandomForestClassifier,f1_training_RandomForestClassifier,precision_test_RandomForestClassifier,recall_test_RandomForestClassifier,accuracy_test_RandomForestClassifier,f1_test_RandomForestClassifier,auc_score_test_RandomForestClassifier,precision_training_LGBMClassifier,recall_training_LGBMClassifier,accuracy_training_LGBMClassifier,f1_training_LGBMClassifier,precision_test_LGBMClassifier,recall_test_LGBMClassifier,accuracy_test_LGBMClassifier,f1_test_LGBMClassifier,auc_score_test_LGBMClassifier,best hyperparameters,best score,best_precision_training,best_recall_training,best_accuracy_training,best_f1_training,best_precision_test,best_recall_test,best_accuracy_test,best_f1_test
0,trans_freq_dpl_opcode,0.688385,0.974599,0.944857,0.806862,0.542714,0.818182,0.897046,0.652568,0.888498,1.0,1.0,1.0,1.0,0.94898,0.704545,0.960609,0.808696,0.970378,0.997333,1.0,0.999684,0.998665,0.924528,0.742424,0.962399,0.823529,0.973404,{'classifier': DecisionTreeClassifier(class_we...,0.816937,0.688385,0.974599,0.944857,0.806862,0.542714,0.818182,0.897046,0.652568


df_only_opcode

In [11]:
df_only_opcode = db[only_opcode].copy()

In [12]:
df_only_opcode

Unnamed: 0,address,PUSH,DUP,JUMPDEST,STOP,MSTORE,JUMPI,REVERT,CALLVALUE,ISZERO,CODECOPY,RETURN,LOG,SHA3,MSTORE8,SWAP,POP,ADD,MLOAD,AND,SUB,CALLDATALOAD,EXP,MUL,SLOAD,EQ,JUMP,DIV,CALLER,CALLDATACOPY,SSTORE,NOT,CALL,LT,GT,OR,ADDRESS,TIMESTAMP,GASLIMIT,GAS,ORIGIN,BALANCE,CALLDATASIZE,SAR,MSIZE,CODESIZE,COINBASE,CREATE2,EXTCODESIZE,CALLCODE,SHL,BLOCKHASH,RETURNDATASIZE,SHR,GETPC,DELEGATECALL,MOD,ADDMOD,NUMBER,XOR,SLT,EXTCODECOPY,MULMOD,CREATE,SELFDESTRUCT,STATICCALL,RETURNDATACOPY,SGT,DIFFICULTY,SMOD,BYTE,SIGNEXTEND,CHAINID,SELFBALANCE,GASPRICE,EXTCODEHASH,SDIV,target
0,0x0006157838d5a6b33ab66588a6a693a57c869999,13.0,3.0,3.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0x001a589dda0d6be37632925eaf1256986b2c6ad0,1040.0,1126.0,90.0,2.0,166.0,86.0,0.0,4.0,71.0,5.0,3.0,3.0,33.0,0.0,913.0,468.0,345.0,105.0,97.0,92.0,91.0,79.0,67.0,60.0,54.0,52.0,45.0,41.0,34.0,21.0,20.0,15.0,15.0,9.0,6.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0x00674045bb7c17f0aa1cde34780d6c51af548728,1175.0,720.0,340.0,151.0,159.0,165.0,49.0,29.0,153.0,2.0,19.0,13.0,58.0,0.0,547.0,199.0,193.0,109.0,114.0,109.0,28.0,78.0,33.0,112.0,34.0,110.0,17.0,12.0,1.0,33.0,21.0,1.0,40.0,20.0,12.0,5.0,3.0,0.0,1.0,0.0,0.0,1.0,0.0,8.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0x006bea43baa3f7a6f765f14f10a1a1b08334ef45,826.0,570.0,216.0,8.0,133.0,103.0,14.0,21.0,105.0,5.0,15.0,12.0,31.0,0.0,464.0,127.0,118.0,95.0,123.0,112.0,20.0,67.0,15.0,55.0,32.0,79.0,16.0,15.0,0.0,28.0,21.0,2.0,7.0,13.0,9.0,7.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0x00a9f7d093c46d95f0318e4a6ffc6ed68f73044c,660.0,353.0,188.0,211.0,60.0,102.0,18.0,31.0,107.0,2.0,19.0,7.0,5.0,0.0,311.0,76.0,87.0,86.0,79.0,74.0,3.0,44.0,15.0,72.0,28.0,67.0,17.0,14.0,0.0,29.0,13.0,8.0,9.0,8.0,9.0,3.0,3.0,0.0,6.0,1.0,2.0,1.0,0.0,2.0,0.0,0.0,0.0,6.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7441,0xfd784da5c740c617aafb80399fa81b86e1da99a5,422.0,231.0,94.0,8.0,65.0,42.0,6.0,14.0,37.0,1.0,8.0,4.0,21.0,0.0,178.0,95.0,81.0,30.0,84.0,21.0,12.0,10.0,6.0,18.0,20.0,34.0,6.0,12.0,0.0,11.0,5.0,0.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7442,0xfd7e33bc01a7493b189ddfdcc047500463be573a,549.0,352.0,92.0,3.0,84.0,56.0,27.0,17.0,43.0,1.0,6.0,7.0,23.0,0.0,257.0,68.0,69.0,46.0,86.0,68.0,18.0,44.0,13.0,34.0,19.0,50.0,12.0,12.0,1.0,18.0,7.0,2.0,7.0,4.0,5.0,1.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7443,0xfd82ffc0d08dcfd902cbf6e48550033b01f919cc,406.0,200.0,136.0,10.0,29.0,58.0,25.0,21.0,49.0,1.0,11.0,2.0,5.0,0.0,139.0,55.0,41.0,33.0,45.0,42.0,9.0,23.0,6.0,35.0,23.0,56.0,7.0,8.0,0.0,21.0,6.0,1.0,8.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7444,0xfd9683e9f2c62e08b6bf68123e18e527efa8fbbc,1593.0,719.0,414.0,37.0,166.0,183.0,84.0,38.0,178.0,2.0,31.0,15.0,45.0,0.0,583.0,181.0,94.0,125.0,243.0,171.0,23.0,158.0,18.0,120.0,68.0,144.0,41.0,39.0,0.0,41.0,23.0,6.0,9.0,41.0,20.0,1.0,14.0,1.0,3.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
subset = df_only_opcode.iloc[:, 1:78]
duplicati = df_only_opcode[subset.duplicated(keep=False)]
df_only_opcode_clean = df_only_opcode.drop(duplicati.index)
df_only_opcode_clean

Unnamed: 0,address,PUSH,DUP,JUMPDEST,STOP,MSTORE,JUMPI,REVERT,CALLVALUE,ISZERO,CODECOPY,RETURN,LOG,SHA3,MSTORE8,SWAP,POP,ADD,MLOAD,AND,SUB,CALLDATALOAD,EXP,MUL,SLOAD,EQ,JUMP,DIV,CALLER,CALLDATACOPY,SSTORE,NOT,CALL,LT,GT,OR,ADDRESS,TIMESTAMP,GASLIMIT,GAS,ORIGIN,BALANCE,CALLDATASIZE,SAR,MSIZE,CODESIZE,COINBASE,CREATE2,EXTCODESIZE,CALLCODE,SHL,BLOCKHASH,RETURNDATASIZE,SHR,GETPC,DELEGATECALL,MOD,ADDMOD,NUMBER,XOR,SLT,EXTCODECOPY,MULMOD,CREATE,SELFDESTRUCT,STATICCALL,RETURNDATACOPY,SGT,DIFFICULTY,SMOD,BYTE,SIGNEXTEND,CHAINID,SELFBALANCE,GASPRICE,EXTCODEHASH,SDIV,target
0,0x0006157838d5a6b33ab66588a6a693a57c869999,13.0,3.0,3.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0x001a589dda0d6be37632925eaf1256986b2c6ad0,1040.0,1126.0,90.0,2.0,166.0,86.0,0.0,4.0,71.0,5.0,3.0,3.0,33.0,0.0,913.0,468.0,345.0,105.0,97.0,92.0,91.0,79.0,67.0,60.0,54.0,52.0,45.0,41.0,34.0,21.0,20.0,15.0,15.0,9.0,6.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0x00674045bb7c17f0aa1cde34780d6c51af548728,1175.0,720.0,340.0,151.0,159.0,165.0,49.0,29.0,153.0,2.0,19.0,13.0,58.0,0.0,547.0,199.0,193.0,109.0,114.0,109.0,28.0,78.0,33.0,112.0,34.0,110.0,17.0,12.0,1.0,33.0,21.0,1.0,40.0,20.0,12.0,5.0,3.0,0.0,1.0,0.0,0.0,1.0,0.0,8.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0x006bea43baa3f7a6f765f14f10a1a1b08334ef45,826.0,570.0,216.0,8.0,133.0,103.0,14.0,21.0,105.0,5.0,15.0,12.0,31.0,0.0,464.0,127.0,118.0,95.0,123.0,112.0,20.0,67.0,15.0,55.0,32.0,79.0,16.0,15.0,0.0,28.0,21.0,2.0,7.0,13.0,9.0,7.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0x00a9f7d093c46d95f0318e4a6ffc6ed68f73044c,660.0,353.0,188.0,211.0,60.0,102.0,18.0,31.0,107.0,2.0,19.0,7.0,5.0,0.0,311.0,76.0,87.0,86.0,79.0,74.0,3.0,44.0,15.0,72.0,28.0,67.0,17.0,14.0,0.0,29.0,13.0,8.0,9.0,8.0,9.0,3.0,3.0,0.0,6.0,1.0,2.0,1.0,0.0,2.0,0.0,0.0,0.0,6.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7441,0xfd784da5c740c617aafb80399fa81b86e1da99a5,422.0,231.0,94.0,8.0,65.0,42.0,6.0,14.0,37.0,1.0,8.0,4.0,21.0,0.0,178.0,95.0,81.0,30.0,84.0,21.0,12.0,10.0,6.0,18.0,20.0,34.0,6.0,12.0,0.0,11.0,5.0,0.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7442,0xfd7e33bc01a7493b189ddfdcc047500463be573a,549.0,352.0,92.0,3.0,84.0,56.0,27.0,17.0,43.0,1.0,6.0,7.0,23.0,0.0,257.0,68.0,69.0,46.0,86.0,68.0,18.0,44.0,13.0,34.0,19.0,50.0,12.0,12.0,1.0,18.0,7.0,2.0,7.0,4.0,5.0,1.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7443,0xfd82ffc0d08dcfd902cbf6e48550033b01f919cc,406.0,200.0,136.0,10.0,29.0,58.0,25.0,21.0,49.0,1.0,11.0,2.0,5.0,0.0,139.0,55.0,41.0,33.0,45.0,42.0,9.0,23.0,6.0,35.0,23.0,56.0,7.0,8.0,0.0,21.0,6.0,1.0,8.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7444,0xfd9683e9f2c62e08b6bf68123e18e527efa8fbbc,1593.0,719.0,414.0,37.0,166.0,183.0,84.0,38.0,178.0,2.0,31.0,15.0,45.0,0.0,583.0,181.0,94.0,125.0,243.0,171.0,23.0,158.0,18.0,120.0,68.0,144.0,41.0,39.0,0.0,41.0,23.0,6.0,9.0,41.0,20.0,1.0,14.0,1.0,3.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [14]:
subset2 = df_only_opcode_clean.iloc[:, 1:77]
duplicati2 = df_only_opcode_clean[subset2.duplicated(keep=False)]
duplicati2 = duplicati2.sort_values(by='PUSH', ascending=False)
duplicati2

df_only_opcode_clean = df_only_opcode_clean.drop(duplicati2.index)
df_only_opcode_clean

Unnamed: 0,address,PUSH,DUP,JUMPDEST,STOP,MSTORE,JUMPI,REVERT,CALLVALUE,ISZERO,CODECOPY,RETURN,LOG,SHA3,MSTORE8,SWAP,POP,ADD,MLOAD,AND,SUB,CALLDATALOAD,EXP,MUL,SLOAD,EQ,JUMP,DIV,CALLER,CALLDATACOPY,SSTORE,NOT,CALL,LT,GT,OR,ADDRESS,TIMESTAMP,GASLIMIT,GAS,ORIGIN,BALANCE,CALLDATASIZE,SAR,MSIZE,CODESIZE,COINBASE,CREATE2,EXTCODESIZE,CALLCODE,SHL,BLOCKHASH,RETURNDATASIZE,SHR,GETPC,DELEGATECALL,MOD,ADDMOD,NUMBER,XOR,SLT,EXTCODECOPY,MULMOD,CREATE,SELFDESTRUCT,STATICCALL,RETURNDATACOPY,SGT,DIFFICULTY,SMOD,BYTE,SIGNEXTEND,CHAINID,SELFBALANCE,GASPRICE,EXTCODEHASH,SDIV,target
0,0x0006157838d5a6b33ab66588a6a693a57c869999,13.0,3.0,3.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0x001a589dda0d6be37632925eaf1256986b2c6ad0,1040.0,1126.0,90.0,2.0,166.0,86.0,0.0,4.0,71.0,5.0,3.0,3.0,33.0,0.0,913.0,468.0,345.0,105.0,97.0,92.0,91.0,79.0,67.0,60.0,54.0,52.0,45.0,41.0,34.0,21.0,20.0,15.0,15.0,9.0,6.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0x00674045bb7c17f0aa1cde34780d6c51af548728,1175.0,720.0,340.0,151.0,159.0,165.0,49.0,29.0,153.0,2.0,19.0,13.0,58.0,0.0,547.0,199.0,193.0,109.0,114.0,109.0,28.0,78.0,33.0,112.0,34.0,110.0,17.0,12.0,1.0,33.0,21.0,1.0,40.0,20.0,12.0,5.0,3.0,0.0,1.0,0.0,0.0,1.0,0.0,8.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0x006bea43baa3f7a6f765f14f10a1a1b08334ef45,826.0,570.0,216.0,8.0,133.0,103.0,14.0,21.0,105.0,5.0,15.0,12.0,31.0,0.0,464.0,127.0,118.0,95.0,123.0,112.0,20.0,67.0,15.0,55.0,32.0,79.0,16.0,15.0,0.0,28.0,21.0,2.0,7.0,13.0,9.0,7.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0x00a9f7d093c46d95f0318e4a6ffc6ed68f73044c,660.0,353.0,188.0,211.0,60.0,102.0,18.0,31.0,107.0,2.0,19.0,7.0,5.0,0.0,311.0,76.0,87.0,86.0,79.0,74.0,3.0,44.0,15.0,72.0,28.0,67.0,17.0,14.0,0.0,29.0,13.0,8.0,9.0,8.0,9.0,3.0,3.0,0.0,6.0,1.0,2.0,1.0,0.0,2.0,0.0,0.0,0.0,6.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7441,0xfd784da5c740c617aafb80399fa81b86e1da99a5,422.0,231.0,94.0,8.0,65.0,42.0,6.0,14.0,37.0,1.0,8.0,4.0,21.0,0.0,178.0,95.0,81.0,30.0,84.0,21.0,12.0,10.0,6.0,18.0,20.0,34.0,6.0,12.0,0.0,11.0,5.0,0.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7442,0xfd7e33bc01a7493b189ddfdcc047500463be573a,549.0,352.0,92.0,3.0,84.0,56.0,27.0,17.0,43.0,1.0,6.0,7.0,23.0,0.0,257.0,68.0,69.0,46.0,86.0,68.0,18.0,44.0,13.0,34.0,19.0,50.0,12.0,12.0,1.0,18.0,7.0,2.0,7.0,4.0,5.0,1.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7443,0xfd82ffc0d08dcfd902cbf6e48550033b01f919cc,406.0,200.0,136.0,10.0,29.0,58.0,25.0,21.0,49.0,1.0,11.0,2.0,5.0,0.0,139.0,55.0,41.0,33.0,45.0,42.0,9.0,23.0,6.0,35.0,23.0,56.0,7.0,8.0,0.0,21.0,6.0,1.0,8.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7444,0xfd9683e9f2c62e08b6bf68123e18e527efa8fbbc,1593.0,719.0,414.0,37.0,166.0,183.0,84.0,38.0,178.0,2.0,31.0,15.0,45.0,0.0,583.0,181.0,94.0,125.0,243.0,171.0,23.0,158.0,18.0,120.0,68.0,144.0,41.0,39.0,0.0,41.0,23.0,6.0,9.0,41.0,20.0,1.0,14.0,1.0,3.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
grid_search_only_opcode, data_only_opcode = get_grid_search_recall(df_only_opcode_clean,'only_opcode')

target
0.0    6258
1.0     763
Name: count, dtype: int64
(7021, 76) (7021, 1) Index(['address', 'PUSH', 'DUP', 'JUMPDEST', 'STOP', 'MSTORE', 'JUMPI',
       'REVERT', 'CALLVALUE', 'ISZERO', 'CODECOPY', 'RETURN', 'LOG', 'SHA3',
       'MSTORE8', 'SWAP', 'POP', 'ADD', 'MLOAD', 'AND', 'SUB', 'CALLDATALOAD',
       'EXP', 'MUL', 'SLOAD', 'EQ', 'JUMP', 'DIV', 'CALLER', 'CALLDATACOPY',
       'SSTORE', 'NOT', 'CALL', 'LT', 'GT', 'OR', 'ADDRESS', 'TIMESTAMP',
       'GASLIMIT', 'GAS', 'ORIGIN', 'BALANCE', 'CALLDATASIZE', 'SAR', 'MSIZE',
       'CODESIZE', 'COINBASE', 'CREATE2', 'EXTCODESIZE', 'CALLCODE', 'SHL',
       'BLOCKHASH', 'RETURNDATASIZE', 'SHR', 'GETPC', 'DELEGATECALL', 'MOD',
       'ADDMOD', 'NUMBER', 'XOR', 'SLT', 'EXTCODECOPY', 'MULMOD', 'CREATE',
       'SELFDESTRUCT', 'STATICCALL', 'RETURNDATACOPY', 'SGT', 'DIFFICULTY',
       'SMOD', 'BYTE', 'SIGNEXTEND', 'CHAINID', 'SELFBALANCE', 'GASPRICE',
       'EXTCODEHASH', 'SDIV', 'target'],
      dtype='object')
Number of smart contr

In [46]:
df_results_depl = pd.concat([df_results_depl,pd.DataFrame(data_only_opcode)])

df_transaction_weighted_opcode

In [21]:
df_transaction_weighted_opcode = db[transaction_frequency_opcode].copy()

In [22]:
y = df_transaction_weighted_opcode.loc[:, ['target']].values
y = pd.DataFrame(y,columns=['target'])

In [23]:
dataset_account = db.iloc[:, 0:24]
dataset_account

Unnamed: 0,address,balance,lifetime,tx_in,tx_out,investment_in,payment_out,investment_to_contract/tx_in,payment_from_contract/tx_out,#addresses_paying_contract,#addresses_paid_by_contract,mean_v1,sdev_v1,mean_v2,sdev_v2,paid_rate,paid_one,percentage_some_tx_in,sdev_tx_in,percentage_some_tx_out,sdev_tx_out,owner_gets_eth_Wo_investing,owner_gets_eth_investing,owner_no_eth
0,0x0006157838d5a6b33ab66588a6a693a57c869999,0.000691,117,2,0,1,0,0.5000,0.0,1,0,-1.000000,0.000000,-0.000691,0.000000,0.0000,0.0,0.0171,0.000000,0.0000,0.000000,0,0,1
1,0x001a589dda0d6be37632925eaf1256986b2c6ad0,29.408568,318,11346,4,9685,4,0.8536,1.0,39,1,-242.025000,509.123977,-0.735203,7.530951,0.0004,0.0,0.5252,92.723961,0.0126,0.000000,0,0,1
2,0x00674045bb7c17f0aa1cde34780d6c51af548728,151.824186,1366,29,12,2,12,0.0690,1.0,2,7,1.111111,1.448712,-16.869354,765.672046,6.0000,0.0,0.0124,0.455645,0.0088,0.000000,0,0,1
3,0x006bea43baa3f7a6f765f14f10a1a1b08334ef45,0.000000,21,9708,0,0,0,0.0000,0.0,0,0,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,1.0476,236.751919,0.0000,0.000000,0,0,1
4,0x00a9f7d093c46d95f0318e4a6ffc6ed68f73044c,0.000000,0,78,1,76,1,0.9744,1.0,65,1,-1.136364,0.488857,0.000000,97.843007,0.0132,0.0,1.0000,0.000000,1.0000,0.000000,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7441,0xfd784da5c740c617aafb80399fa81b86e1da99a5,0.000000,461,9964,0,0,0,0.0000,0.0,0,0,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.9718,19.446353,0.0000,0.000000,0,0,1
7442,0xfd7e33bc01a7493b189ddfdcc047500463be573a,0.000000,1,3,0,0,0,0.0000,0.0,0,0,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,2.0000,0.500000,0.0000,0.000000,0,0,1
7443,0xfd82ffc0d08dcfd902cbf6e48550033b01f919cc,0.000000,48,8,2,2,2,0.2500,1.0,2,1,0.000000,1.414214,0.000000,0.350660,1.0000,0.0,0.1458,0.349927,0.0417,0.000000,0,0,1
7444,0xfd9683e9f2c62e08b6bf68123e18e527efa8fbbc,0.000000,1483,6,2,2,2,0.3333,1.0,2,2,0.000000,0.000000,0.000000,0.500000,1.0000,1.0,0.0027,0.866025,0.0013,0.000000,0,1,0


In [24]:
dataset_opcode = db.iloc[:, 24:100]
dataset_opcode

Unnamed: 0,PUSH,DUP,JUMPDEST,STOP,MSTORE,JUMPI,REVERT,CALLVALUE,ISZERO,CODECOPY,RETURN,LOG,SHA3,MSTORE8,SWAP,POP,ADD,MLOAD,AND,SUB,CALLDATALOAD,EXP,MUL,SLOAD,EQ,JUMP,DIV,CALLER,CALLDATACOPY,SSTORE,NOT,CALL,LT,GT,OR,ADDRESS,TIMESTAMP,GASLIMIT,GAS,ORIGIN,BALANCE,CALLDATASIZE,SAR,MSIZE,CODESIZE,COINBASE,CREATE2,EXTCODESIZE,CALLCODE,SHL,BLOCKHASH,RETURNDATASIZE,SHR,GETPC,DELEGATECALL,MOD,ADDMOD,NUMBER,XOR,SLT,EXTCODECOPY,MULMOD,CREATE,SELFDESTRUCT,STATICCALL,RETURNDATACOPY,SGT,DIFFICULTY,SMOD,BYTE,SIGNEXTEND,CHAINID,SELFBALANCE,GASPRICE,EXTCODEHASH,SDIV
0,13.0,3.0,3.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1040.0,1126.0,90.0,2.0,166.0,86.0,0.0,4.0,71.0,5.0,3.0,3.0,33.0,0.0,913.0,468.0,345.0,105.0,97.0,92.0,91.0,79.0,67.0,60.0,54.0,52.0,45.0,41.0,34.0,21.0,20.0,15.0,15.0,9.0,6.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1175.0,720.0,340.0,151.0,159.0,165.0,49.0,29.0,153.0,2.0,19.0,13.0,58.0,0.0,547.0,199.0,193.0,109.0,114.0,109.0,28.0,78.0,33.0,112.0,34.0,110.0,17.0,12.0,1.0,33.0,21.0,1.0,40.0,20.0,12.0,5.0,3.0,0.0,1.0,0.0,0.0,1.0,0.0,8.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,826.0,570.0,216.0,8.0,133.0,103.0,14.0,21.0,105.0,5.0,15.0,12.0,31.0,0.0,464.0,127.0,118.0,95.0,123.0,112.0,20.0,67.0,15.0,55.0,32.0,79.0,16.0,15.0,0.0,28.0,21.0,2.0,7.0,13.0,9.0,7.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,660.0,353.0,188.0,211.0,60.0,102.0,18.0,31.0,107.0,2.0,19.0,7.0,5.0,0.0,311.0,76.0,87.0,86.0,79.0,74.0,3.0,44.0,15.0,72.0,28.0,67.0,17.0,14.0,0.0,29.0,13.0,8.0,9.0,8.0,9.0,3.0,3.0,0.0,6.0,1.0,2.0,1.0,0.0,2.0,0.0,0.0,0.0,6.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7441,422.0,231.0,94.0,8.0,65.0,42.0,6.0,14.0,37.0,1.0,8.0,4.0,21.0,0.0,178.0,95.0,81.0,30.0,84.0,21.0,12.0,10.0,6.0,18.0,20.0,34.0,6.0,12.0,0.0,11.0,5.0,0.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7442,549.0,352.0,92.0,3.0,84.0,56.0,27.0,17.0,43.0,1.0,6.0,7.0,23.0,0.0,257.0,68.0,69.0,46.0,86.0,68.0,18.0,44.0,13.0,34.0,19.0,50.0,12.0,12.0,1.0,18.0,7.0,2.0,7.0,4.0,5.0,1.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
7443,406.0,200.0,136.0,10.0,29.0,58.0,25.0,21.0,49.0,1.0,11.0,2.0,5.0,0.0,139.0,55.0,41.0,33.0,45.0,42.0,9.0,23.0,6.0,35.0,23.0,56.0,7.0,8.0,0.0,21.0,6.0,1.0,8.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7444,1593.0,719.0,414.0,37.0,166.0,183.0,84.0,38.0,178.0,2.0,31.0,15.0,45.0,0.0,583.0,181.0,94.0,125.0,243.0,171.0,23.0,158.0,18.0,120.0,68.0,144.0,41.0,39.0,0.0,41.0,23.0,6.0,9.0,41.0,20.0,1.0,14.0,1.0,3.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [25]:
dataset_opcode['all_opcode'] = dataset_opcode[list(dataset_opcode.columns)].sum(axis=1) #generate all_opcode
dataset_opcode = dataset_opcode[:].div(dataset_opcode['all_opcode'], axis=0)
dataset_opcode = dataset_opcode.drop(['all_opcode'], axis = 1)
dataset_opcode

Unnamed: 0,PUSH,DUP,JUMPDEST,STOP,MSTORE,JUMPI,REVERT,CALLVALUE,ISZERO,CODECOPY,RETURN,LOG,SHA3,MSTORE8,SWAP,POP,ADD,MLOAD,AND,SUB,CALLDATALOAD,EXP,MUL,SLOAD,EQ,JUMP,DIV,CALLER,CALLDATACOPY,SSTORE,NOT,CALL,LT,GT,OR,ADDRESS,TIMESTAMP,GASLIMIT,GAS,ORIGIN,BALANCE,CALLDATASIZE,SAR,MSIZE,CODESIZE,COINBASE,CREATE2,EXTCODESIZE,CALLCODE,SHL,BLOCKHASH,RETURNDATASIZE,SHR,GETPC,DELEGATECALL,MOD,ADDMOD,NUMBER,XOR,SLT,EXTCODECOPY,MULMOD,CREATE,SELFDESTRUCT,STATICCALL,RETURNDATACOPY,SGT,DIFFICULTY,SMOD,BYTE,SIGNEXTEND,CHAINID,SELFBALANCE,GASPRICE,EXTCODEHASH,SDIV
0,0.371429,0.085714,0.085714,0.085714,0.057143,0.057143,0.057143,0.028571,0.028571,0.028571,0.028571,0.028571,0.028571,0.028571,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
1,0.197306,0.213622,0.017075,0.000379,0.031493,0.016316,0.000000,0.000759,0.013470,0.000949,0.000569,0.000569,0.006261,0.000000,0.173212,0.088788,0.065452,0.019920,0.018403,0.017454,0.017264,0.014988,0.012711,0.011383,0.010245,0.009865,0.008537,0.007778,0.006450,0.003984,0.003794,0.002846,0.002846,0.001707,0.001138,0.000569,0.000379,0.000379,0.000379,0.000190,0.000190,0.000190,0.00019,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
2,0.240976,0.147662,0.069729,0.030968,0.032609,0.033839,0.010049,0.005947,0.031378,0.000410,0.003897,0.002666,0.011895,0.000000,0.112182,0.040812,0.039582,0.022354,0.023380,0.022354,0.005742,0.015997,0.006768,0.022970,0.006973,0.022559,0.003486,0.002461,0.000205,0.006768,0.004307,0.000205,0.008203,0.004102,0.002461,0.001025,0.000615,0.000000,0.000205,0.000000,0.000000,0.000205,0.00000,0.001641,0.000205,0.000205,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
3,0.236541,0.163230,0.061856,0.002291,0.038087,0.029496,0.004009,0.006014,0.030069,0.001432,0.004296,0.003436,0.008877,0.000000,0.132875,0.036369,0.033792,0.027205,0.035223,0.032073,0.005727,0.019187,0.004296,0.015750,0.009164,0.022623,0.004582,0.004296,0.000000,0.008018,0.006014,0.000573,0.002005,0.003723,0.002577,0.002005,0.000000,0.000000,0.000573,0.000000,0.000000,0.000286,0.00000,0.000000,0.000000,0.000000,0.000573,0.000286,0.000286,0.000286,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
4,0.232476,0.124340,0.066221,0.074322,0.021134,0.035928,0.006340,0.010919,0.037689,0.000704,0.006692,0.002466,0.001761,0.000000,0.109546,0.026770,0.030645,0.030292,0.027827,0.026066,0.001057,0.015498,0.005284,0.025361,0.009863,0.023600,0.005988,0.004931,0.000000,0.010215,0.004579,0.002818,0.003170,0.002818,0.003170,0.001057,0.001057,0.000000,0.002113,0.000352,0.000704,0.000352,0.00000,0.000704,0.000000,0.000000,0.000000,0.002113,0.000000,0.000000,0.000352,0.000352,0.000352,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7441,0.265576,0.145374,0.059157,0.005035,0.040906,0.026432,0.003776,0.008811,0.023285,0.000629,0.005035,0.002517,0.013216,0.000000,0.112020,0.059786,0.050975,0.018880,0.052863,0.013216,0.007552,0.006293,0.003776,0.011328,0.012587,0.021397,0.003776,0.007552,0.000000,0.006923,0.003147,0.000000,0.001888,0.001888,0.001888,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.001888,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000629,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
7442,0.260313,0.166904,0.043623,0.001422,0.039829,0.026553,0.012802,0.008061,0.020389,0.000474,0.002845,0.003319,0.010906,0.000000,0.121859,0.032243,0.032717,0.021811,0.040778,0.032243,0.008535,0.020863,0.006164,0.016121,0.009009,0.023708,0.005690,0.005690,0.000474,0.008535,0.003319,0.000948,0.003319,0.001897,0.002371,0.000474,0.000000,0.000000,0.000948,0.000000,0.000000,0.000474,0.00000,0.000000,0.000000,0.000000,0.000000,0.000948,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000474,0.0,0.000474,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000474,0.0,0.0,0.0,0.0,0.0,0.0
7443,0.265533,0.130804,0.088947,0.006540,0.018967,0.037933,0.016351,0.013734,0.032047,0.000654,0.007194,0.001308,0.003270,0.000000,0.090909,0.035971,0.026815,0.021583,0.029431,0.027469,0.005886,0.015043,0.003924,0.022891,0.015043,0.036625,0.004578,0.005232,0.000000,0.013734,0.003924,0.000654,0.005232,0.003924,0.003924,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000654,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.002616,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000654,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0
7444,0.280310,0.126518,0.072849,0.006511,0.029210,0.032201,0.014781,0.006687,0.031321,0.000352,0.005455,0.002639,0.007918,0.000000,0.102587,0.031849,0.016541,0.021995,0.042759,0.030090,0.004047,0.027802,0.003167,0.021116,0.011966,0.025339,0.007214,0.006863,0.000000,0.007214,0.004047,0.001056,0.001584,0.007214,0.003519,0.000176,0.002463,0.000176,0.000528,0.000000,0.000176,0.000352,0.00000,0.000176,0.000000,0.000000,0.000176,0.000528,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000176,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000176,0.0,0.0,0.0,0.000000,0.000176,0.000000,0.0,0.0,0.0,0.0,0.0,0.0


In [26]:
df_transaction_weighted_opcode = pd.concat([dataset_account, dataset_opcode, y], axis=1)
df_transaction_weighted_opcode = df_transaction_weighted_opcode.fillna(0)

In [27]:
df_transaction_weighted_opcode

Unnamed: 0,address,balance,lifetime,tx_in,tx_out,investment_in,payment_out,investment_to_contract/tx_in,payment_from_contract/tx_out,#addresses_paying_contract,#addresses_paid_by_contract,mean_v1,sdev_v1,mean_v2,sdev_v2,paid_rate,paid_one,percentage_some_tx_in,sdev_tx_in,percentage_some_tx_out,sdev_tx_out,owner_gets_eth_Wo_investing,owner_gets_eth_investing,owner_no_eth,PUSH,DUP,JUMPDEST,STOP,MSTORE,JUMPI,REVERT,CALLVALUE,ISZERO,CODECOPY,RETURN,LOG,SHA3,MSTORE8,SWAP,POP,ADD,MLOAD,AND,SUB,CALLDATALOAD,EXP,MUL,SLOAD,EQ,JUMP,DIV,CALLER,CALLDATACOPY,SSTORE,NOT,CALL,LT,GT,OR,ADDRESS,TIMESTAMP,GASLIMIT,GAS,ORIGIN,BALANCE,CALLDATASIZE,SAR,MSIZE,CODESIZE,COINBASE,CREATE2,EXTCODESIZE,CALLCODE,SHL,BLOCKHASH,RETURNDATASIZE,SHR,GETPC,DELEGATECALL,MOD,ADDMOD,NUMBER,XOR,SLT,EXTCODECOPY,MULMOD,CREATE,SELFDESTRUCT,STATICCALL,RETURNDATACOPY,SGT,DIFFICULTY,SMOD,BYTE,SIGNEXTEND,CHAINID,SELFBALANCE,GASPRICE,EXTCODEHASH,SDIV,target
0,0x0006157838d5a6b33ab66588a6a693a57c869999,0.000691,117,2,0,1,0,0.5000,0.0,1,0,-1.000000,0.000000,-0.000691,0.000000,0.0000,0.0,0.0171,0.000000,0.0000,0.000000,0,0,1,0.371429,0.085714,0.085714,0.085714,0.057143,0.057143,0.057143,0.028571,0.028571,0.028571,0.028571,0.028571,0.028571,0.028571,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0x001a589dda0d6be37632925eaf1256986b2c6ad0,29.408568,318,11346,4,9685,4,0.8536,1.0,39,1,-242.025000,509.123977,-0.735203,7.530951,0.0004,0.0,0.5252,92.723961,0.0126,0.000000,0,0,1,0.197306,0.213622,0.017075,0.000379,0.031493,0.016316,0.000000,0.000759,0.013470,0.000949,0.000569,0.000569,0.006261,0.000000,0.173212,0.088788,0.065452,0.019920,0.018403,0.017454,0.017264,0.014988,0.012711,0.011383,0.010245,0.009865,0.008537,0.007778,0.006450,0.003984,0.003794,0.002846,0.002846,0.001707,0.001138,0.000569,0.000379,0.000379,0.000379,0.000190,0.000190,0.000190,0.00019,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0x00674045bb7c17f0aa1cde34780d6c51af548728,151.824186,1366,29,12,2,12,0.0690,1.0,2,7,1.111111,1.448712,-16.869354,765.672046,6.0000,0.0,0.0124,0.455645,0.0088,0.000000,0,0,1,0.240976,0.147662,0.069729,0.030968,0.032609,0.033839,0.010049,0.005947,0.031378,0.000410,0.003897,0.002666,0.011895,0.000000,0.112182,0.040812,0.039582,0.022354,0.023380,0.022354,0.005742,0.015997,0.006768,0.022970,0.006973,0.022559,0.003486,0.002461,0.000205,0.006768,0.004307,0.000205,0.008203,0.004102,0.002461,0.001025,0.000615,0.000000,0.000205,0.000000,0.000000,0.000205,0.00000,0.001641,0.000205,0.000205,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0x006bea43baa3f7a6f765f14f10a1a1b08334ef45,0.000000,21,9708,0,0,0,0.0000,0.0,0,0,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,1.0476,236.751919,0.0000,0.000000,0,0,1,0.236541,0.163230,0.061856,0.002291,0.038087,0.029496,0.004009,0.006014,0.030069,0.001432,0.004296,0.003436,0.008877,0.000000,0.132875,0.036369,0.033792,0.027205,0.035223,0.032073,0.005727,0.019187,0.004296,0.015750,0.009164,0.022623,0.004582,0.004296,0.000000,0.008018,0.006014,0.000573,0.002005,0.003723,0.002577,0.002005,0.000000,0.000000,0.000573,0.000000,0.000000,0.000286,0.00000,0.000000,0.000000,0.000000,0.000573,0.000286,0.000286,0.000286,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0x00a9f7d093c46d95f0318e4a6ffc6ed68f73044c,0.000000,0,78,1,76,1,0.9744,1.0,65,1,-1.136364,0.488857,0.000000,97.843007,0.0132,0.0,1.0000,0.000000,1.0000,0.000000,0,0,1,0.232476,0.124340,0.066221,0.074322,0.021134,0.035928,0.006340,0.010919,0.037689,0.000704,0.006692,0.002466,0.001761,0.000000,0.109546,0.026770,0.030645,0.030292,0.027827,0.026066,0.001057,0.015498,0.005284,0.025361,0.009863,0.023600,0.005988,0.004931,0.000000,0.010215,0.004579,0.002818,0.003170,0.002818,0.003170,0.001057,0.001057,0.000000,0.002113,0.000352,0.000704,0.000352,0.00000,0.000704,0.000000,0.000000,0.000000,0.002113,0.000000,0.000000,0.000352,0.000352,0.000352,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7441,0xfd784da5c740c617aafb80399fa81b86e1da99a5,0.000000,461,9964,0,0,0,0.0000,0.0,0,0,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,0.9718,19.446353,0.0000,0.000000,0,0,1,0.265576,0.145374,0.059157,0.005035,0.040906,0.026432,0.003776,0.008811,0.023285,0.000629,0.005035,0.002517,0.013216,0.000000,0.112020,0.059786,0.050975,0.018880,0.052863,0.013216,0.007552,0.006293,0.003776,0.011328,0.012587,0.021397,0.003776,0.007552,0.000000,0.006923,0.003147,0.000000,0.001888,0.001888,0.001888,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.001888,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000629,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7442,0xfd7e33bc01a7493b189ddfdcc047500463be573a,0.000000,1,3,0,0,0,0.0000,0.0,0,0,0.000000,0.000000,0.000000,0.000000,0.0000,0.0,2.0000,0.500000,0.0000,0.000000,0,0,1,0.260313,0.166904,0.043623,0.001422,0.039829,0.026553,0.012802,0.008061,0.020389,0.000474,0.002845,0.003319,0.010906,0.000000,0.121859,0.032243,0.032717,0.021811,0.040778,0.032243,0.008535,0.020863,0.006164,0.016121,0.009009,0.023708,0.005690,0.005690,0.000474,0.008535,0.003319,0.000948,0.003319,0.001897,0.002371,0.000474,0.000000,0.000000,0.000948,0.000000,0.000000,0.000474,0.00000,0.000000,0.000000,0.000000,0.000000,0.000948,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000474,0.0,0.000474,0.000000,0.000000,0.0,0.0,0.0,0.000000,0.000000,0.000474,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7443,0xfd82ffc0d08dcfd902cbf6e48550033b01f919cc,0.000000,48,8,2,2,2,0.2500,1.0,2,1,0.000000,1.414214,0.000000,0.350660,1.0000,0.0,0.1458,0.349927,0.0417,0.000000,0,0,1,0.265533,0.130804,0.088947,0.006540,0.018967,0.037933,0.016351,0.013734,0.032047,0.000654,0.007194,0.001308,0.003270,0.000000,0.090909,0.035971,0.026815,0.021583,0.029431,0.027469,0.005886,0.015043,0.003924,0.022891,0.015043,0.036625,0.004578,0.005232,0.000000,0.013734,0.003924,0.000654,0.005232,0.003924,0.003924,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000654,0.00000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.0,0.0,0.002616,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.0,0.0,0.0,0.000654,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7444,0xfd9683e9f2c62e08b6bf68123e18e527efa8fbbc,0.000000,1483,6,2,2,2,0.3333,1.0,2,2,0.000000,0.000000,0.000000,0.500000,1.0000,1.0,0.0027,0.866025,0.0013,0.000000,0,1,0,0.280310,0.126518,0.072849,0.006511,0.029210,0.032201,0.014781,0.006687,0.031321,0.000352,0.005455,0.002639,0.007918,0.000000,0.102587,0.031849,0.016541,0.021995,0.042759,0.030090,0.004047,0.027802,0.003167,0.021116,0.011966,0.025339,0.007214,0.006863,0.000000,0.007214,0.004047,0.001056,0.001584,0.007214,0.003519,0.000176,0.002463,0.000176,0.000528,0.000000,0.000176,0.000352,0.00000,0.000176,0.000000,0.000000,0.000176,0.000528,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000176,0.0,0.0,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000176,0.0,0.0,0.0,0.000000,0.000176,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
grid_transaction_weighted_opcode, data_transaction_weighted_opcode= get_grid_search_recall(df_transaction_weighted_opcode,'weighted_opcode')

target
0.0    6566
1.0     880
Name: count, dtype: int64
(7446, 99) (7446, 1) Index(['address', 'balance', 'lifetime', 'tx_in', 'tx_out', 'investment_in',
       'payment_out', 'investment_to_contract/tx_in',
       'payment_from_contract/tx_out', '#addresses_paying_contract',
       ...
       'DIFFICULTY', 'SMOD', 'BYTE', 'SIGNEXTEND', 'CHAINID', 'SELFBALANCE',
       'GASPRICE', 'EXTCODEHASH', 'SDIV', 'target'],
      dtype='object', length=101)
Number of smart contract in train set:: 5581
Number of smart Ponzi in train set:: 748
Number of smart contract in test set:: 985
Number of smart Ponzi in test set:: 132
Fitting 10 folds for each of 1168 candidates, totalling 11680 fits
DecisionTreeClassifier
RandomForestClassifier
LGBMClassifier


In [29]:
df_results_depl = pd.concat([df_results_depl,pd.DataFrame(data_transaction_weighted_opcode)])

In [48]:
df_melted_deployed = pd.melt(df_results_depl,id_vars='test',value_vars=['precision_test_DecisionTreeClassifier',
       'recall_test_DecisionTreeClassifier',
       'accuracy_test_DecisionTreeClassifier',
       'f1_test_DecisionTreeClassifier',
       'auc_score_test_DecisionTreeClassifier','precision_test_RandomForestClassifier',
       'recall_test_RandomForestClassifier',
       'accuracy_test_RandomForestClassifier',
       'f1_test_RandomForestClassifier',
       'auc_score_test_RandomForestClassifier','precision_test_LGBMClassifier', 'recall_test_LGBMClassifier',
       'accuracy_test_LGBMClassifier', 'f1_test_LGBMClassifier',
       'auc_score_test_LGBMClassifier'],var_name='metric_classifier',value_name='score')

df_melted_deployed['classifier'] = df_melted_deployed['metric_classifier'].apply(lambda x: x.split('_')[-1])
df_melted_deployed['metric'] = df_melted_deployed['metric_classifier'].apply(lambda x: x.split('_')[0])

In [49]:
df_pivot_recall = df_melted_deployed.pivot(index=['test','classifier'],columns='metric',values='score').sort_values(['test','classifier','auc'])
df_pivot_recall

ValueError: Index contains duplicate entries, cannot reshape

In [32]:
s_recall = df_pivot_recall.to_latex(float_format="%.3f")
with open('comparison_table.tex','w') as fout:
    fout.write(s_recall)

In [33]:
print(s_recall)

\begin{tabular}{llrrrrr}
\toprule
 & metric & accuracy & auc & f1 & precision & recall \\
test & classifier &  &  &  &  &  \\
\midrule
\multirow[t]{3}{*}{only_opcode} & DecisionTreeClassifier & 0.914 & 0.919 & 0.686 & 0.603 & 0.795 \\
 & LGBMClassifier & 0.964 & 0.970 & 0.833 & 0.926 & 0.758 \\
 & RandomForestClassifier & 0.964 & 0.966 & 0.826 & 0.969 & 0.720 \\
\cline{1-7}
\multirow[t]{3}{*}{trans_freq_dpl_opcode} & DecisionTreeClassifier & 0.897 & 0.888 & 0.653 & 0.543 & 0.818 \\
 & LGBMClassifier & 0.962 & 0.973 & 0.824 & 0.925 & 0.742 \\
 & RandomForestClassifier & 0.961 & 0.970 & 0.809 & 0.949 & 0.705 \\
\cline{1-7}
\multirow[t]{3}{*}{weighted_opcode} & DecisionTreeClassifier & 0.879 & 0.875 & 0.602 & 0.493 & 0.773 \\
 & LGBMClassifier & 0.965 & 0.976 & 0.840 & 0.919 & 0.773 \\
 & RandomForestClassifier & 0.962 & 0.980 & 0.812 & 0.959 & 0.705 \\
\cline{1-7}
\bottomrule
\end{tabular}



# AUC Optimization

In [18]:
def get_grid_search_auc(dataset,name):
    data = {}
    data['test'] = name
    X = dataset.iloc[:, 1:-1].values # rimuovo la prima colonna che contiene gli indirizzi
    y = dataset.loc[:, ['target']].values


    print(dataset.target.value_counts())
    print(X.shape,y.shape,dataset.columns)

    # divido in train set (TR) e test set (TS)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state = 42, stratify=y)

    flat_list_train = [item for sublist in y_train.tolist() for item in sublist]
    print('Number of smart contract in train set::', flat_list_train.count(0))
    print('Number of smart Ponzi in train set::', flat_list_train.count(1))
    flat_list_test = [item for sublist in y_test.tolist() for item in sublist]
    print('Number of smart contract in test set::', flat_list_test.count(0))
    print('Number of smart Ponzi in test set::', flat_list_test.count(1))
    pipeline = Pipeline(steps=[('classifier',RandomForestClassifier(random_state=42))])

    params =[
   {
          'classifier': [DecisionTreeClassifier(random_state=42)],
          'classifier__criterion' : ['entropy', 'gini'],
          'classifier__max_depth' : [5, 9],
          'classifier__min_samples_split' : [5, 15], # il valore tipico sta fra 1 e 40
          'classifier__max_features' : range(6,X_train.shape[1],3),
         'classifier__class_weight' : [{0:1, 1:4}, {0:1, 1:4.5}] # pesi, il valore raccomandato e' sum(negative instances)/sum(positive instances)
     },

     {
         'classifier': [RandomForestClassifier(random_state=42)],
         'classifier__n_estimators':[150, 200, 250],
         'classifier__min_samples_split' : [5,15],
         'classifier__criterion':['gini', 'entropy'],
         'classifier__class_weight' : [{0:1, 1:5.5}],
         'classifier__bootstrap': [True, False]
         },

      {
          'classifier': [lgb.LGBMClassifier(boosting_type='gbdt', n_jobs=4, importance_type='split',random_state=42)],
          'classifier__learning_rate' : [0.1,0.01],
          'classifier__n_estimators' : [80, 100, 120, 140],
          'classifier__max_depth' : [10,15,20],
          'classifier__colsample_bytree' : [0.5,0.8,1],
          'classifier__reg_alpha' : [0, 0.1, 0.2],
          'classifier__reg_lambda' : [1,10,15]
      }


         ]
    grid_search = GridSearchCV(pipeline, params, cv=10,
                               scoring='roc_auc',
                               return_train_score=True,n_jobs=4,verbose=2)
    grid_search.fit(X_train, y_train.ravel())
    df_cv = pd.DataFrame(grid_search.cv_results_)
    best_classifiers_cv(df_cv,data,X_train,y_train,X_test,y_test)

    data['best hyperparameters'] = [grid_search.best_params_]
    data['best score'] = [grid_search.best_score_]

    y_pred_train = grid_search.best_estimator_.predict(X_train)
    data['best_precision_training'] = [precision_score(y_train, y_pred_train)] # TP / (FP + TP)
    data['best_recall_training'] = [recall_score(y_train, y_pred_train)]	     # TP / (FN + TP)
    data['best_accuracy_training'] = [accuracy_score(y_train, y_pred_train)]	 # (TP + TN)/ (TP + FN + TN + FP)
    data['best_f1_training'] = [f1_score(y_train, y_pred_train)]

    # Confusion matrix
    y_pred = grid_search.best_estimator_.predict(X_test)
    data['best_precision_test'] = [precision_score(y_test, y_pred)] # TP / (FP + TP)
    data['best_recall_test'] = [recall_score(y_test, y_pred)]	     # TP / (FN + TP)
    data['best_accuracy_test'] = [accuracy_score(y_test, y_pred)]	 # (TP + TN)/ (TP + FN + TN + FP)
    data['best_f1_test'] = [f1_score(y_test, y_pred)]
    return grid_search,data

In [35]:
grid_search_transaction_frequency_opcode,data_transaction_frequency_opcode = get_grid_search_auc(df_trans_freq_dpl_opcode,'frequency_opcode')

target
0.0    6566
1.0     880
Name: count, dtype: int64
(7446, 99) (7446, 1) Index(['address', 'balance', 'lifetime', 'tx_in', 'tx_out', 'investment_in',
       'payment_out', 'investment_to_contract/tx_in',
       'payment_from_contract/tx_out', '#addresses_paying_contract',
       ...
       'DIFFICULTY', 'SMOD', 'BYTE', 'SIGNEXTEND', 'CHAINID', 'SELFBALANCE',
       'GASPRICE', 'EXTCODEHASH', 'SDIV', 'target'],
      dtype='object', length=101)
Number of smart contract in train set:: 5581
Number of smart Ponzi in train set:: 748
Number of smart contract in test set:: 985
Number of smart Ponzi in test set:: 132
Fitting 10 folds for each of 1168 candidates, totalling 11680 fits
DecisionTreeClassifier
RandomForestClassifier
LGBMClassifier


In [36]:
df_results_crea = pd.DataFrame(data_transaction_frequency_opcode)

In [44]:
df_only_opcode

Unnamed: 0,address,PUSH,DUP,JUMPDEST,STOP,MSTORE,JUMPI,REVERT,CALLVALUE,ISZERO,CODECOPY,RETURN,LOG,SHA3,MSTORE8,SWAP,POP,ADD,MLOAD,AND,SUB,CALLDATALOAD,EXP,MUL,SLOAD,EQ,JUMP,DIV,CALLER,CALLDATACOPY,SSTORE,NOT,CALL,LT,GT,OR,ADDRESS,TIMESTAMP,GASLIMIT,GAS,ORIGIN,BALANCE,CALLDATASIZE,SAR,MSIZE,CODESIZE,COINBASE,CREATE2,EXTCODESIZE,CALLCODE,SHL,BLOCKHASH,RETURNDATASIZE,SHR,GETPC,DELEGATECALL,MOD,ADDMOD,NUMBER,XOR,SLT,EXTCODECOPY,MULMOD,CREATE,SELFDESTRUCT,STATICCALL,RETURNDATACOPY,SGT,DIFFICULTY,SMOD,BYTE,SIGNEXTEND,CHAINID,SELFBALANCE,GASPRICE,EXTCODEHASH,SDIV,target
0,0x0006157838d5a6b33ab66588a6a693a57c869999,13.0,3.0,3.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0x001a589dda0d6be37632925eaf1256986b2c6ad0,1040.0,1126.0,90.0,2.0,166.0,86.0,0.0,4.0,71.0,5.0,3.0,3.0,33.0,0.0,913.0,468.0,345.0,105.0,97.0,92.0,91.0,79.0,67.0,60.0,54.0,52.0,45.0,41.0,34.0,21.0,20.0,15.0,15.0,9.0,6.0,3.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0x00674045bb7c17f0aa1cde34780d6c51af548728,1175.0,720.0,340.0,151.0,159.0,165.0,49.0,29.0,153.0,2.0,19.0,13.0,58.0,0.0,547.0,199.0,193.0,109.0,114.0,109.0,28.0,78.0,33.0,112.0,34.0,110.0,17.0,12.0,1.0,33.0,21.0,1.0,40.0,20.0,12.0,5.0,3.0,0.0,1.0,0.0,0.0,1.0,0.0,8.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0x006bea43baa3f7a6f765f14f10a1a1b08334ef45,826.0,570.0,216.0,8.0,133.0,103.0,14.0,21.0,105.0,5.0,15.0,12.0,31.0,0.0,464.0,127.0,118.0,95.0,123.0,112.0,20.0,67.0,15.0,55.0,32.0,79.0,16.0,15.0,0.0,28.0,21.0,2.0,7.0,13.0,9.0,7.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0x00a9f7d093c46d95f0318e4a6ffc6ed68f73044c,660.0,353.0,188.0,211.0,60.0,102.0,18.0,31.0,107.0,2.0,19.0,7.0,5.0,0.0,311.0,76.0,87.0,86.0,79.0,74.0,3.0,44.0,15.0,72.0,28.0,67.0,17.0,14.0,0.0,29.0,13.0,8.0,9.0,8.0,9.0,3.0,3.0,0.0,6.0,1.0,2.0,1.0,0.0,2.0,0.0,0.0,0.0,6.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7441,0xfd784da5c740c617aafb80399fa81b86e1da99a5,422.0,231.0,94.0,8.0,65.0,42.0,6.0,14.0,37.0,1.0,8.0,4.0,21.0,0.0,178.0,95.0,81.0,30.0,84.0,21.0,12.0,10.0,6.0,18.0,20.0,34.0,6.0,12.0,0.0,11.0,5.0,0.0,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7442,0xfd7e33bc01a7493b189ddfdcc047500463be573a,549.0,352.0,92.0,3.0,84.0,56.0,27.0,17.0,43.0,1.0,6.0,7.0,23.0,0.0,257.0,68.0,69.0,46.0,86.0,68.0,18.0,44.0,13.0,34.0,19.0,50.0,12.0,12.0,1.0,18.0,7.0,2.0,7.0,4.0,5.0,1.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7443,0xfd82ffc0d08dcfd902cbf6e48550033b01f919cc,406.0,200.0,136.0,10.0,29.0,58.0,25.0,21.0,49.0,1.0,11.0,2.0,5.0,0.0,139.0,55.0,41.0,33.0,45.0,42.0,9.0,23.0,6.0,35.0,23.0,56.0,7.0,8.0,0.0,21.0,6.0,1.0,8.0,6.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7444,0xfd9683e9f2c62e08b6bf68123e18e527efa8fbbc,1593.0,719.0,414.0,37.0,166.0,183.0,84.0,38.0,178.0,2.0,31.0,15.0,45.0,0.0,583.0,181.0,94.0,125.0,243.0,171.0,23.0,158.0,18.0,120.0,68.0,144.0,41.0,39.0,0.0,41.0,23.0,6.0,9.0,41.0,20.0,1.0,14.0,1.0,3.0,0.0,1.0,2.0,0.0,1.0,0.0,0.0,1.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [19]:
grid_search_only_opcode, data_only_opcode = get_grid_search_auc(df_only_opcode_clean,'only_opcode')

target
0.0    6258
1.0     763
Name: count, dtype: int64
(7021, 76) (7021, 1) Index(['address', 'PUSH', 'DUP', 'JUMPDEST', 'STOP', 'MSTORE', 'JUMPI',
       'REVERT', 'CALLVALUE', 'ISZERO', 'CODECOPY', 'RETURN', 'LOG', 'SHA3',
       'MSTORE8', 'SWAP', 'POP', 'ADD', 'MLOAD', 'AND', 'SUB', 'CALLDATALOAD',
       'EXP', 'MUL', 'SLOAD', 'EQ', 'JUMP', 'DIV', 'CALLER', 'CALLDATACOPY',
       'SSTORE', 'NOT', 'CALL', 'LT', 'GT', 'OR', 'ADDRESS', 'TIMESTAMP',
       'GASLIMIT', 'GAS', 'ORIGIN', 'BALANCE', 'CALLDATASIZE', 'SAR', 'MSIZE',
       'CODESIZE', 'COINBASE', 'CREATE2', 'EXTCODESIZE', 'CALLCODE', 'SHL',
       'BLOCKHASH', 'RETURNDATASIZE', 'SHR', 'GETPC', 'DELEGATECALL', 'MOD',
       'ADDMOD', 'NUMBER', 'XOR', 'SLT', 'EXTCODECOPY', 'MULMOD', 'CREATE',
       'SELFDESTRUCT', 'STATICCALL', 'RETURNDATACOPY', 'SGT', 'DIFFICULTY',
       'SMOD', 'BYTE', 'SIGNEXTEND', 'CHAINID', 'SELFBALANCE', 'GASPRICE',
       'EXTCODEHASH', 'SDIV', 'target'],
      dtype='object')
Number of smart contr

In [53]:
df_results_crea = pd.concat([df_results_crea,pd.DataFrame(data_only_opcode)])

In [39]:
grid_transaction_weighted_opcode, data_transaction_weighted_opcode= get_grid_search_auc(df_transaction_weighted_opcode,'weighted_opcode')

target
0.0    6566
1.0     880
Name: count, dtype: int64
(7446, 99) (7446, 1) Index(['address', 'balance', 'lifetime', 'tx_in', 'tx_out', 'investment_in',
       'payment_out', 'investment_to_contract/tx_in',
       'payment_from_contract/tx_out', '#addresses_paying_contract',
       ...
       'DIFFICULTY', 'SMOD', 'BYTE', 'SIGNEXTEND', 'CHAINID', 'SELFBALANCE',
       'GASPRICE', 'EXTCODEHASH', 'SDIV', 'target'],
      dtype='object', length=101)
Number of smart contract in train set:: 5581
Number of smart Ponzi in train set:: 748
Number of smart contract in test set:: 985
Number of smart Ponzi in test set:: 132
Fitting 10 folds for each of 1168 candidates, totalling 11680 fits
DecisionTreeClassifier
RandomForestClassifier
LGBMClassifier


In [40]:
df_results_crea = pd.concat([df_results_crea,pd.DataFrame(data_transaction_weighted_opcode)])

In [41]:
df_melted = pd.melt(df_results_crea,id_vars='test',value_vars=['precision_test_DecisionTreeClassifier',
       'recall_test_DecisionTreeClassifier',
       'accuracy_test_DecisionTreeClassifier',
       'f1_test_DecisionTreeClassifier',
       'auc_score_test_DecisionTreeClassifier','precision_test_RandomForestClassifier',
       'recall_test_RandomForestClassifier',
       'accuracy_test_RandomForestClassifier',
       'f1_test_RandomForestClassifier',
       'auc_score_test_RandomForestClassifier','precision_test_LGBMClassifier', 'recall_test_LGBMClassifier',
       'accuracy_test_LGBMClassifier', 'f1_test_LGBMClassifier',
       'auc_score_test_LGBMClassifier'],var_name='metric_classifier',value_name='score')

df_melted['classifier'] = df_melted['metric_classifier'].apply(lambda x: x.split('_')[-1])
df_melted['metric'] = df_melted['metric_classifier'].apply(lambda x: x.split('_')[0])

df_pivot_auc= df_melted.pivot(index=['test','classifier'],columns='metric',values='score').sort_values(['test','classifier','auc'])
df_pivot_auc

Unnamed: 0_level_0,metric,accuracy,auc,f1,precision,recall
test,classifier,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
frequency_opcode,DecisionTreeClassifier,0.93017,0.832476,0.704545,0.704545,0.704545
frequency_opcode,LGBMClassifier,0.962399,0.973404,0.823529,0.924528,0.742424
frequency_opcode,RandomForestClassifier,0.962399,0.974396,0.817391,0.959184,0.712121
only_opcode,DecisionTreeClassifier,0.940913,0.874542,0.759124,0.732394,0.787879
only_opcode,LGBMClassifier,0.96419,0.970174,0.833333,0.925926,0.757576
only_opcode,RandomForestClassifier,0.96419,0.974639,0.827586,0.96,0.727273
weighted_opcode,DecisionTreeClassifier,0.941808,0.848916,0.747082,0.768,0.727273
weighted_opcode,LGBMClassifier,0.965085,0.97565,0.839506,0.918919,0.772727
weighted_opcode,RandomForestClassifier,0.965085,0.979172,0.83682,0.934579,0.757576


In [42]:
s_auc = df_pivot_auc.to_latex(float_format="%.3f")
with open('comparison_table_AUC.tex','w') as fout:
    fout.write(s_auc)

In [43]:
print(s_auc)

\begin{tabular}{llrrrrr}
\toprule
 & metric & accuracy & auc & f1 & precision & recall \\
test & classifier &  &  &  &  &  \\
\midrule
\multirow[t]{3}{*}{frequency_opcode} & DecisionTreeClassifier & 0.930 & 0.832 & 0.705 & 0.705 & 0.705 \\
 & LGBMClassifier & 0.962 & 0.973 & 0.824 & 0.925 & 0.742 \\
 & RandomForestClassifier & 0.962 & 0.974 & 0.817 & 0.959 & 0.712 \\
\cline{1-7}
\multirow[t]{3}{*}{only_opcode} & DecisionTreeClassifier & 0.941 & 0.875 & 0.759 & 0.732 & 0.788 \\
 & LGBMClassifier & 0.964 & 0.970 & 0.833 & 0.926 & 0.758 \\
 & RandomForestClassifier & 0.964 & 0.975 & 0.828 & 0.960 & 0.727 \\
\cline{1-7}
\multirow[t]{3}{*}{weighted_opcode} & DecisionTreeClassifier & 0.942 & 0.849 & 0.747 & 0.768 & 0.727 \\
 & LGBMClassifier & 0.965 & 0.976 & 0.840 & 0.919 & 0.773 \\
 & RandomForestClassifier & 0.965 & 0.979 & 0.837 & 0.935 & 0.758 \\
\cline{1-7}
\bottomrule
\end{tabular}

