In [None]:
import sys
print(sys.executable)

In [None]:
!{sys.executable} -m pip install watermark

In [None]:
%env TF_CPP_MIN_LOG_LEVEL=3

In [None]:
# Imports
import sklearn
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import tensorflow as tf
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras import Input
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
import warnings
warnings.filterwarnings("ignore")

In [None]:
df_dataset = pd.read_csv("dataset.csv")

df_dataset.columns = [x.lower() for x in df_dataset.columns]

cols_to_drop = [
    " erc20 most sent token type",
    " erc20_most_rec_token_type",
    "address",
    "index",
    "unnamed: 0"
]

features = [x for x in df_dataset.columns if (x != "flag" and x not in cols_to_drop)]
unique_values = df_dataset.nunique()
features = [x for x in features if x in unique_values.loc[(unique_values > 1)]]

In [None]:
class PipeSteps(BaseEstimator, TransformerMixin):
    def __init__(self, columns=[]):
        self.columns = columns

    def fit(self, X, y = None):
        
        return self
    
    def transform(self, X):
        X = X.copy()
        
        return X
    
class SelectColumns(PipeSteps):
    def transform(self, X):
        X = X.copy()

        return X[self.columns]
    
class FillData(PipeSteps):
    def fit(self, X, y = None):
        self.means = { col: X[col].mean() for col in self.columns }
        
        return self

    def transform(self, X):
        X = X.copy()
        for col in self.columns:
            X[col] = X[col].fillna(self.means[col])
        
        return X
    
class StandardizeData(PipeSteps):
    def fit(self, X, y = None):
        self.scaler = StandardScaler()
        self.scaler.fit(X[self.columns])
        
        return self

    def transform(self, X):
        X = X.copy()
        X[self.columns] = self.scaler.transform(X[self.columns])
        
        return X
    

class GetData(PipeSteps):
    def transform(self, X):
        X = X.copy()

        return X.values

In [None]:
process_pipe = Pipeline([
    ("feature_selection", SelectColumns(features)),
    ("fill_missing", FillData(features)),
    ("standard_scaling", StandardizeData(features)),
    ("returnValues", GetData())]
)


X = df_dataset[features]
y = df_dataset["flag"]
y = to_categorical(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

X_train = process_pipe.fit_transform(X_train)
X_test = process_pipe.transform(X_test)

In [None]:
model = Sequential()

model.add(Input(shape=(len(features),)))

model.add(Dense(len(features), activation="relu"))

model.add(Dense(20, activation="relu"))

model.add(Dense(5, activation="relu"))

model.add(Dense(2, activation="softmax"))

model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

model.summary()

In [None]:
%%time
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10)

In [None]:
test_predicts = [np.argmax(x) for x in model.predict(X_test)]

acc = metrics.accuracy_score(test_predicts, [np.argmax(y) for y in y_test])
print(f"Test Accuracy - {acc:,.2%}")

auc = metrics.roc_auc_score([np.argmax(y) for y in y_test], model.predict(X_test)[:,1])
print(f"The AUC on the Test set - {auc:,.2%}")

In [None]:
new_data = pd.read_csv("new_data.csv")
new_data_processed = process_pipe.transform(new_data)

predict = [np.argmax(x) for x in model.predict(new_data_processed)]

if predict[0] == 0:
    print("According to the model, this transaction does not represent a Fraud.")
else:
    print("According to the model, this transaction may represent a Fraud. Trigger human verification!")