In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score, roc_auc_score, recall_score, accuracy_score, precision_score
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
matplotlib.rcParams['figure.figsize'] = (11.0, 8.0)

import warnings
warnings.filterwarnings('ignore')


2024-06-08 12:30:04.453398: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-08 12:30:04.456369: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-08 12:30:04.497342: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
#Importing Data
df = pd.read_csv('./address_data_combined.csv')
X = df.drop(columns=['Address', 'FLAG'])
y = df['FLAG']
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)


columns = ['Avg min between sent tnx', 'Avg min between received tnx',
       'Time Diff between first and last (Mins)',
       'Unique Received From Addresses', 'min value received',
       'max value received ', 'avg val received', 'min val sent',
       'avg val sent', 'total transactions (including tnx to create contract',
       'total ether received', 'total ether balance']
    
scaler = MinMaxScaler()

In [5]:
# Log for Skewed Data
for c in columns:
    X_train_full[c] = X_train_full[c].apply(lambda x: np.log(x) if x > 0 else 0)
    X_test[c] = X_test[c].apply(lambda x: np.log(x) if x > 0 else 0)

# Scaling
X_train_full = scaler.fit_transform(X_train_full)
X_test = scaler.transform(X_test)

np.isnan(X_train_full)

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       ...,
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

In [6]:
import xgboost as xgb

In [7]:
# Create the DMatrix from NumPy arrays
dtrain = xgb.DMatrix(X_train_full, label=y_train_full)

# Set up parameters for XGBoost:
# - binary:logistic is used for binary classification and outputs probability.
# - learning_rate and other parameters can be tuned as needed.
params = {
    'objective': 'binary:logistic',
    'learning_rate': 0.1,  # You can adjust this and other parameters
    'max_depth': 3,
    'min_child_weight': 1,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'n_estimators': 100
}

# Specify number of rounds (similar to epochs in deep learning)
num_rounds = 100

In [8]:
# Train the model
bst = xgb.train(params, dtrain, num_boost_round=num_rounds)

In [10]:
# Make predictions
dpredict = xgb.DMatrix(X_test)  # Assuming X_test is your test set
y_pred_proba = bst.predict(dpredict)  # These are probabilities
y_pred = (y_pred_proba >= 0.5).astype(int)  # Convert probabilities to binary output

# Evaluate predictions
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)  # Assuming y_test is your true labels
print(f"Accuracy: {accuracy * 100:.2f}%")


Accuracy: 92.02%


In [12]:
import pandas as pd

# Load transactions from a CSV file
transactions_df = pd.read_csv('./transactions.csv')

# Convert the 'value' column to integers for calculation purposes
# This assumes all values in the 'value' column are numeric and can be converted to integers
transactions_df['value'] = transactions_df['value'].astype(int)

# Convert timestamps from string to datetime
transactions_df['timeStamp'] = pd.to_datetime(transactions_df['timeStamp'], unit='s')

# Calculate time differences in minutes for subsequent transactions
transactions_df.sort_values('timeStamp', inplace=True)
transactions_df['time_diff'] = transactions_df['timeStamp'].diff().dt.total_seconds() / 60

# Filter out sent and received transactions
sent_transactions = transactions_df[transactions_df['from'].str.lower() == '0x9A23A93905a0CEf884ACB6F6488BC5D7b964EF73'.lower()]
received_transactions = transactions_df[transactions_df['to'].str.lower() == '0x9A23A93905a0CEf884ACB6F6488BC5D7b964EF73'.lower()]

# Calculate various metrics with conversions from Wei to Ether
metrics = {
    'Avg min between sent tnx': sent_transactions['time_diff'].mean(),
    'Avg min between received tnx': received_transactions['time_diff'].mean(),
    'Time Diff between first and last (Mins)': (transactions_df['timeStamp'].iloc[-1] - transactions_df['timeStamp'].iloc[0]).total_seconds() / 60,
    'Unique Received From Addresses': received_transactions['from'].nunique(),
    'min value received': received_transactions['value'].min() / 1e18,
    'max value received ': received_transactions['value'].max() / 1e18,
    'avg val received': received_transactions['value'].mean() / 1e18,
    'min val sent': sent_transactions['value'].min() / 1e18,
    'avg val sent': sent_transactions['value'].mean() / 1e18,
    'total transactions (including tnx to create contract': transactions_df.shape[0],
    'total ether received': received_transactions['value'].sum() / 1e18,
    'total ether balance': transactions_df['value'].sum() / 1e18  # Simplistic balance calculation
}

In [13]:
metrics_df = pd.DataFrame([metrics])

In [14]:
for c in metrics_df.columns:
    if metrics_df[c].iloc[0] > 0:
        metrics_df[c] = np.log(metrics_df[c] + 1)

In [18]:
metrics_scaled = scaler.transform(metrics_df)

metrics_dnatrix = xgb.DMatrix(metrics_scaled)

In [20]:
# Make predictions
prediction = bst.predict(metrics_dnatrix) 

# Output the prediction
print(f"Prediction: {prediction}")

Prediction: [0.26647422]


In [23]:
from giza.zkcook import serialize_model
import xgboost as xgb


# Adjusting the custom serialization function:
def serialize_model(booster, output_path):
    model_bytes = booster.save_raw()  
    with open(output_path, 'wb') as file:
        file.write(model_bytes)

# Serialize directly without the need for get_booster()
serialize_model(bst, "xgb_fraud_eth_account_detect.json")


In [41]:
# conversion based on the expected input shape
input_spec = [tf.TensorSpec([None, X_train_full_tensor.shape[1]], tf.float32, name='input')]

In [42]:
import onnxmltools
from giza_actions.model import GizaModel
from giza_actions.task import task
from giza_actions.action import Action, action
import tf2onnx

In [43]:
# Convert the model to ONNX format
onnx_model, _ = tf2onnx.convert.from_keras(model=model, input_signature=input_spec, opset=13)

2024-06-08 10:35:56.186696: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:02:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-08 10:35:56.186794: I tensorflow/core/grappler/devices.cc:66] Number of eligible GPUs (core count >= 8, compute capability >= 0.0): 1
2024-06-08 10:35:56.187031: I tensorflow/core/grappler/clusters/single_machine.cc:361] Starting new session
2024-06-08 10:35:56.188219: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:984] could not open file to read NUMA node: /sys/bus/pci/devices/0000:02:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-06-08 10:35:56.188252: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2251] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gp

In [44]:
onnx_file_path = './fraud-eth-account-detect-model-100-eps.onnx'
with open(onnx_file_path, "wb") as f:
    f.write(onnx_model.SerializeToString())

In [45]:
@task(name='Prediction with ONNX 1')
def prediction(eval_numpy):
    model = GizaModel(model_path="./fraud-eth-account-detect-model-100-eps.onnx")
    result = model.predict(
        input_feed={"input": eval_numpy}, verifiable=False
    )
    return result

In [46]:
@action(name='Execution: Prediction with ONNX 1', log_prints=True)
def execution():
    eval_numpy = metrics_scaled_tensor.numpy()
    unverifiable_predictions = prediction(eval_numpy)
    return unverifiable_predictions

In [47]:
unverifiable_predictions = execution()

In [48]:
print(f"unVerifiablePrediction: {unverifiable_predictions[0][0]}")

unVerifiablePrediction: 0.1620265245437622


In [117]:
unverifiable_predictions

array([[0.16202652]], dtype=float32)

In [90]:
metrics_scaled_tensor = tf.convert_to_tensor(metrics_scaled, dtype=tf.float16)

In [132]:
@task(name='Prediction with Cairo 8')
def prediction(data, model_id, version_id):
    # Initialize a GizaModel with model and version id.
    model = GizaModel(
        id=model_id,
        version=version_id
    )
    # Call the predict function. 
    #Set `verifiable` to True, and define the expecting output datatype.
    (result, request_id) = model.predict(
        input_feed={"node_input": data}, 
        verifiable=True,
        output_dtype="arr_fixed_point"
    )
    return result, request_id

In [133]:
@action(name='Execution: Prediction with Cairo 8', log_prints=True)
def execution():
    data = metrics_scaled.astype(np.float32)
    model_id = 738
    version_id = 2
    (result, request_id) = prediction(data, model_id, version_id)
    return result, request_id



In [134]:
(result, request_id) = execution()

HTTPError: 500 Server Error: Internal Server Error for url: https://endpoint-kagwep-738-2-ef0a579e-7i3yxzspbq-ew.a.run.app/cairo_run

In [None]:
(result, request_id)

In [131]:
from giza.agents.model import GizaModel

MODEL_ID = 738  # Update with your model ID
VERSION_ID = 1  # Update with your version ID

def prediction(input, model_id, version_id):
    model = GizaModel(id=model_id, version=version_id)

    (result, proof_id) = model.predict(
        input_feed={'input': input}, verifiable=True
    )

    return result, proof_id

def execution():
    # The input data type should match the model's expected input
    input = metrics_scaled.astype(np.float16)

    (result, proof_id) = prediction(input, MODEL_ID, VERSION_ID)

    print(
        f"Predicted value for input {input.flatten()[0]} is {result[0].flatten()[0]}")

    return result, proof_id


execution()

TypeError: collect_dataclass_fields() got an unexpected keyword argument 'config_wrapper'