In [30]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from pipeline.features import FeatureEngineering
import joblib
from tensorflow.keras.models import load_model

2024-05-25 17:42:32.807923: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
fe = FeatureEngineering()

In [10]:
# Load class information
classes = np.load('../models/classes.npy', allow_pickle=True)

In [20]:
# Define models to train
models = {
    "RandomForest": RandomForestClassifier(n_estimators=150, random_state=62),
    # "LogisticRegression": LogisticRegression(max_iter=1000, random_state=58),
    "SVC": SVC(random_state=42),
    # "DecisionTree": DecisionTreeClassifier(random_state=23),
    "NaiveBayes": GaussianNB()
}

In [31]:
model = load_model("../models/neural_network_model.keras")

In [23]:
training_columns = ['length', 'num_alpha', 'num_digits', 'prefix_0x',
       'prefix_1', 'prefix_3', 'prefix_X', 'prefix_bc1', 'prefix_unknown',
       'checksum_base58check', 'checksum_keccak-256', 'checksum_unknown',
       'checksum_x11']

In [26]:
def preprocess_data(address):
    # Extract features
    address_features = fe.extract_features(address)
    df = pd.DataFrame([address_features])

    # Convert categorical features to numerical values
    # le = LabelEncoder()
    # df['prefix'] = le.fit_transform(df['prefix'])
    # df['checksum'] = le.fit_transform(df['checksum']

    df = pd.get_dummies(df, columns=['prefix', 'checksum'])
    
    # Add missing columns
    missing_columns = list(set(training_columns) - set(df.columns))
    for col in missing_columns:
        df[col] = 0

    # Scale features
    ss = StandardScaler()
    df_scaled = ss.fit_transform(df)
    return df_scaled

In [27]:
# Function to perform inference
def models_inference(address):
    # Preprocess data
    df_scaled = preprocess_data(address)
    # Load models and perform inference
    results = {}
    for model_name in models.keys():
        model = joblib.load(f"../models/{model_name}.pkl")
        prediction = model.predict(df_scaled)[0]
        predicted_class = classes[prediction]
        results[model_name] = predicted_class

    return results

In [32]:
# Function to make a prediction
def nn_inference(address):
    # Preprocess the address
    address_preprocessed = preprocess_data(address)

    # Make prediction
    prediction = model.predict(address_preprocessed)
    predicted_class = np.argmax(prediction, axis=1)

    # Convert numerical prediction back to original class
    predicted_label = classes[predicted_class][0]

    return predicted_label

In [29]:
# Test address
test_address = "0x6466f1ab7a0ddea74aa38a4d99da2b98412e680e"
results = models_inference(test_address)
print("Inference results for address:", test_address)
for model_name, predicted_class in results.items():
    print(f"{model_name}: {predicted_class}")

Inference results for address: 0x6466f1ab7a0ddea74aa38a4d99da2b98412e680e
RandomForest: bitcoin
SVC: dash
NaiveBayes: bitcoin


In [33]:
result = nn_inference(test_address)
print(result)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 87ms/step
bitcoin
