In [3]:
import certifi
import os
import polars as pl
import pandas as pd
import numpy as np
import torch 
os.environ['SSL_CERT_FILE'] = certifi.where()
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import numpy as np
import logging
from scipy.ndimage import zoom
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.data import Dataset, DataLoader

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
from giza.datasets import DatasetsLoader, DatasetsHub

In [24]:
class NeuralNet(nn.Module):
    def __init__(self, input_size):
        super(NeuralNet, self).__init__()
        self.layer1 = nn.Linear(input_size, 32)  # First hidden layer
        self.layer2 = nn.Linear(32, 16)          # Second hidden layer
        self.output_layer = nn.Linear(16, 1)     # Output layer

    def forward(self, x):
        x = F.relu(self.layer1(x))              # Activation function for the first hidden layer
        x = F.relu(self.layer2(x))              # Activation function for the second hidden layer
        x = torch.sigmoid(self.output_layer(x)) # Sigmoid activation for output layer
        return x

In [25]:
# Load data
df = pd.read_csv('./address_data_combined.csv')
X = df.drop(columns=['Address', 'FLAG'])
y = df['FLAG']

# Define columns to transform
columns = ['Avg min between sent tnx', 'Avg min between received tnx',
           'Time Diff between first and last (Mins)',
           'Unique Received From Addresses', 'min value received',
           'max value received ', 'avg val received', 'min val sent',
           'avg val sent', 'total transactions (including tnx to create contract',
           'total ether received', 'total ether balance']

# Log Transformation for Skewed Data
for c in columns:
    X[c] = X[c].apply(lambda x: np.log(x) if x > 0 else 0)

In [26]:
# Log Transformation for Skewed Data
for c in columns:
    X[c] = X[c].apply(lambda x: np.log(x) if x > 0 else 0)

# Continue with your data preprocessing...
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

scaler = MinMaxScaler()
X_train_full, X_test, y_train_full, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)

X_train_full = scaler.fit_transform(X_train_full)
X_test = scaler.transform(X_test)

In [27]:
print("Type of X_train_full:", type(X_train_full))
print("Shape of X_train_full:", X_train_full.shape)
print("Type of y_train_full:", type(y_train_full))
print("Shape of y_train_full:", y_train_full.shape)

Type of X_train_full: <class 'numpy.ndarray'>
Shape of X_train_full: (10616, 12)
Type of y_train_full: <class 'pandas.core.series.Series'>
Shape of y_train_full: (10616,)


In [28]:
# Convert y_train_full to a NumPy array if it's a Pandas Series
if isinstance(y_train_full, pd.Series):
    y_train_full = y_train_full.values  # Convert Series to NumPy array

# Now y_train_full is a NumPy array, and you can safely convert it to a tensor
train_dataset = TensorDataset(torch.tensor(X_train_full.astype('float32')), torch.tensor(y_train_full.astype('float32')).unsqueeze(1))
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)




In [29]:
# Assuming X_test and y_test have similar type issues
if isinstance(y_test, pd.Series):
    y_test = y_test.values

test_dataset = TensorDataset(torch.tensor(X_test.astype('float32')), torch.tensor(y_test.astype("float32")).unsqueeze(1))
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [30]:
model = NeuralNet(input_size=X_train_full.shape[1])
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss for binary classification
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer


In [31]:
num_epochs = 100

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    for inputs, labels in train_loader:
        optimizer.zero_grad()      # Clear the gradients
        outputs = model(inputs)    # Forward pass: compute the output
        loss = criterion(outputs, labels)  # Compute the loss
        loss.backward()            # Backward pass: compute gradient of the loss with respect to model parameters
        optimizer.step()           # Perform a single optimization step (parameter update)

    if (epoch+1) % 10 == 0:        # Print loss every 10 epochs
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


Epoch [10/100], Loss: 0.3177
Epoch [20/100], Loss: 0.2217
Epoch [30/100], Loss: 0.2127
Epoch [40/100], Loss: 0.3519
Epoch [50/100], Loss: 0.3333
Epoch [60/100], Loss: 0.2311
Epoch [70/100], Loss: 0.2147
Epoch [80/100], Loss: 0.2826
Epoch [90/100], Loss: 0.3579
Epoch [100/100], Loss: 0.5227


In [32]:
model.eval()  # Set the model to evaluation mode
with torch.no_grad():
    correct = 0
    total = 0
    for inputs, labels in test_loader:
        outputs = model(inputs)
        predicted = outputs.round()  # Convert probabilities to 0 or 1
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy: {accuracy:.2f}%')


Accuracy: 88.53%


In [33]:
import torch.onnx


In [36]:
# Create a sample input tensor with the appropriate size and data type
sample_input = torch.randn(1, X_train_full.shape[1], dtype=torch.float32)



In [37]:
sample_input 

tensor([[ 0.2848,  1.2071, -1.1686,  0.0279,  0.9669, -0.7236,  0.6373,  0.8096,
          0.0031,  1.1041,  0.5836, -0.8584]])

In [38]:
torch.onnx.export(
    model,                       
    sample_input,                 
    "fraud_eth__account_detect_model_nn.onnx",                 
    export_params=True,           
    opset_version=10,             
    do_constant_folding=True  
)
