In [12]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score, roc_auc_score, recall_score, accuracy_score, precision_score
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
matplotlib.rcParams['figure.figsize'] = (11.0, 8.0)

import warnings
warnings.filterwarnings('ignore')


In [13]:
#Importing Data
df = pd.read_csv('./address_data_combined.csv')
X = df.drop(columns=['Address', 'FLAG'])
y = df['FLAG']
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)


columns = ['Avg min between sent tnx', 'Avg min between received tnx',
       'Time Diff between first and last (Mins)',
       'Unique Received From Addresses', 'min value received',
       'max value received ', 'avg val received', 'min val sent',
       'avg val sent', 'total transactions (including tnx to create contract',
       'total ether received', 'total ether balance']
    
scaler = MinMaxScaler()

In [14]:
# Log for Skewed Data
for c in columns:
    X_train_full[c] = X_train_full[c].apply(lambda x: np.log(x) if x > 0 else 0)
    X_test[c] = X_test[c].apply(lambda x: np.log(x) if x > 0 else 0)

# Scaling
X_train_full = scaler.fit_transform(X_train_full)
X_test = scaler.transform(X_test)

np.isnan(X_train_full)

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [15]:
X_train_full_tensor = tf.convert_to_tensor(X_train_full, dtype=tf.float32)
X_test_tensor = tf.convert_to_tensor(X_test, dtype=tf.float32)
y_train_full_tensor = tf.convert_to_tensor(y_train_full.values, dtype=tf.float32)
y_test_tensor = tf.convert_to_tensor(y_test.values, dtype=tf.float32)

In [16]:
# TensorFlow setup for logistic regression
def build_logistic_regression_model(input_dim):
    model = tf.keras.Sequential([
        tf.keras.layers.InputLayer(input_shape=(input_dim,)),
        tf.keras.layers.Dense(1, activation='sigmoid')
    ])
    
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# Build the model
model = build_logistic_regression_model(X_train_full_tensor.shape[1])

In [17]:
# Train the model
history = model.fit(X_train_full_tensor, y_train_full_tensor, validation_split=0.2, epochs=100, batch_size=32, verbose=2)

Epoch 1/100
248/248 - 1s - 4ms/step - accuracy: 0.5405 - loss: 0.6830 - val_accuracy: 0.5328 - val_loss: 0.6797
Epoch 2/100
248/248 - 0s - 2ms/step - accuracy: 0.6012 - loss: 0.6670 - val_accuracy: 0.6670 - val_loss: 0.6681
Epoch 3/100
248/248 - 0s - 2ms/step - accuracy: 0.6826 - loss: 0.6571 - val_accuracy: 0.6615 - val_loss: 0.6576
Epoch 4/100
248/248 - 1s - 2ms/step - accuracy: 0.6703 - loss: 0.6474 - val_accuracy: 0.6695 - val_loss: 0.6495
Epoch 5/100
248/248 - 0s - 1ms/step - accuracy: 0.6755 - loss: 0.6386 - val_accuracy: 0.6680 - val_loss: 0.6396
Epoch 6/100
248/248 - 0s - 2ms/step - accuracy: 0.6798 - loss: 0.6300 - val_accuracy: 0.6665 - val_loss: 0.6292
Epoch 7/100
248/248 - 0s - 2ms/step - accuracy: 0.6893 - loss: 0.6216 - val_accuracy: 0.6695 - val_loss: 0.6212
Epoch 8/100
248/248 - 0s - 2ms/step - accuracy: 0.6948 - loss: 0.6139 - val_accuracy: 0.6791 - val_loss: 0.6126
Epoch 9/100
248/248 - 0s - 2ms/step - accuracy: 0.7099 - loss: 0.6062 - val_accuracy: 0.6831 - val_loss:

In [18]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test_tensor, y_test_tensor)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

# Predictions
y_pred_prob = model.predict(X_test_tensor).flatten()
y_pred = (y_pred_prob > 0.5).astype(int)

# Calculate additional metrics
f1 = f1_score(y_test_tensor, y_pred)
roc_auc = roc_auc_score(y_test_tensor, y_pred_prob)
recall = recall_score(y_test_tensor, y_pred)
precision = precision_score(y_test_tensor, y_pred)
accuracy = accuracy_score(y_test_tensor, y_pred)

[1m133/133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.8230 - loss: 0.4221
Test Loss: 0.41279780864715576, Test Accuracy: 0.8262302875518799
[1m133/133[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [19]:
print(f"F1 Score: {f1}")
print(f"ROC AUC Score: {roc_auc}")
print(f"Recall: {recall}")
print(f"Precision: {precision}")
print(f"Accuracy: {accuracy}")

# Save the model
model.save('fraud-eth-account-detect-model.h5')

# Load the model if needed
loaded_model = tf.keras.models.load_model('fraud-eth-account-detect-model.h5')



F1 Score: 0.8052770448548813
ROC AUC Score: 0.9044443715472387
Recall: 0.7801635991820041
Precision: 0.8320610687022901
Accuracy: 0.8262302801977867


In [21]:
import pandas as pd

# Load transactions from a CSV file
transactions_df = pd.read_csv('./transactions.csv')

# Convert the 'value' column to integers for calculation purposes
# This assumes all values in the 'value' column are numeric and can be converted to integers
transactions_df['value'] = transactions_df['value'].astype(int)

# Convert timestamps from string to datetime
transactions_df['timeStamp'] = pd.to_datetime(transactions_df['timeStamp'], unit='s')

# Calculate time differences in minutes for subsequent transactions
transactions_df.sort_values('timeStamp', inplace=True)
transactions_df['time_diff'] = transactions_df['timeStamp'].diff().dt.total_seconds() / 60

# Filter out sent and received transactions
sent_transactions = transactions_df[transactions_df['from'].str.lower() == '0x9A23A93905a0CEf884ACB6F6488BC5D7b964EF73'.lower()]
received_transactions = transactions_df[transactions_df['to'].str.lower() == '0x9A23A93905a0CEf884ACB6F6488BC5D7b964EF73'.lower()]

# Calculate various metrics with conversions from Wei to Ether
metrics = {
    'Avg min between sent tnx': sent_transactions['time_diff'].mean(),
    'Avg min between received tnx': received_transactions['time_diff'].mean(),
    'Time Diff between first and last (Mins)': (transactions_df['timeStamp'].iloc[-1] - transactions_df['timeStamp'].iloc[0]).total_seconds() / 60,
    'Unique Received From Addresses': received_transactions['from'].nunique(),
    'min value received': received_transactions['value'].min() / 1e18,
    'max value received ': received_transactions['value'].max() / 1e18,
    'avg val received': received_transactions['value'].mean() / 1e18,
    'min val sent': sent_transactions['value'].min() / 1e18,
    'avg val sent': sent_transactions['value'].mean() / 1e18,
    'total transactions (including tnx to create contract': transactions_df.shape[0],
    'total ether received': received_transactions['value'].sum() / 1e18,
    'total ether balance': transactions_df['value'].sum() / 1e18  # Simplistic balance calculation
}

In [22]:
metrics_df = pd.DataFrame([metrics])

In [23]:
for c in metrics_df.columns:
    if metrics_df[c].iloc[0] > 0:
        metrics_df[c] = np.log(metrics_df[c] + 1)

In [24]:
metrics_scaled = scaler.transform(metrics_df)

In [25]:
metrics_scaled_tensor = tf.convert_to_tensor(metrics_scaled, dtype=tf.float32)

In [26]:
# Make predictions
prediction = model.predict(metrics_scaled_tensor)

# Output the prediction
print(f"Prediction: {prediction[0][0]}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
Prediction: 0.1777561902999878


In [27]:
model.output_names = ['output']
model.build(input_shape=(None, X_train_full_tensor.shape[1]))

In [28]:
# conversion based on the expected input shape
input_spec = [tf.TensorSpec([None, X_train_full_tensor.shape[1]], tf.float32, name='input')]

In [29]:
import onnxmltools
from giza_actions.model import GizaModel
from giza_actions.task import task
from giza_actions.action import Action, action
import tf2onnx

In [30]:
# Convert the model to ONNX format
onnx_model, _ = tf2onnx.convert.from_keras(model=model, input_signature=input_spec, opset=13)

2024-06-07 22:09:17.660424: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:02:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-07 22:09:17.660505: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2024-06-07 22:09:17.660710: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-06-07 22:09:17.661624: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:02:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-07 22:09:17.661655: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gp

In [31]:
onnx_file_path = './fraud-eth-account-detect-model-100-eps.onnx'
with open(onnx_file_path, "wb") as f:
    f.write(onnx_model.SerializeToString())

In [32]:
@task(name='Prediction with ONNX 1')
def prediction(eval_numpy):
    model = GizaModel(model_path="./fraud-eth-account-detect-model-100-eps.onnx")
    result = model.predict(
        input_feed={"input": eval_numpy}, verifiable=False
    )
    return result

In [33]:
@action(name='Execution: Prediction with ONNX 1', log_prints=True)
def execution():
    eval_numpy = metrics_scaled_tensor.numpy()
    unverifiable_predictions = prediction(eval_numpy)
    return unverifiable_predictions

In [34]:
unverifiable_predictions = execution()

In [35]:
print(f"unVerifiablePrediction: {unverifiable_predictions[0][0]}")

unVerifiablePrediction: 0.1777561902999878
