In [34]:
import torch
from torch import nn, Tensor
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import math
import numpy as np
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.preprocessing import StandardScaler



In [12]:
class BinaryClassification(nn.Module):
  def __init__(self, embed_size, device):
    super(BinaryClassification, self).__init__()
    # Number of input features is embed_size.
    self.layer_1 = nn.Linear(embed_size, 64)
    self.layer_2 = nn.Linear(64, 64)
    self.layer_out = nn.Linear(64, 1)

    self.relu = nn.ReLU()
    self.dropout = nn.Dropout(p=0.1)
    self.batchnorm1 = nn.BatchNorm1d(64)
    self.batchnorm2 = nn.BatchNorm1d(64)
    self.device = device

  def forward(self, inputs):
    # print("start binary classification")
    # print(inputs.shape)
    # print(inputs)
    x = self.relu(self.layer_1(inputs))
    x = self.batchnorm1(x)
    x = self.relu(self.layer_2(x))
    x = self.batchnorm2(x)
    x = self.dropout(x)
    x = self.layer_out(x)
    #if math.isnan (x[0][0]):
    #  print(src)

    return x

In [13]:
class Classifier(nn.Module):
  def __init__(self, d_model, seq_len, nhead, dim_feedforward, nlayers, device, dropout = 0.5):
    super(Classifier, self).__init__()
    self.d_model = d_model
    self.seq_len = seq_len
    self.nhead = nhead
    self.dim_feedforward = dim_feedforward
    self.nlayers = nlayers
    self.device = device
    
    self.position_embedding = nn.Embedding(seq_len, d_model)
    encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout, batch_first=True)
    self.encoder = TransformerEncoder(encoder_layer, nlayers)
    self.binary_classifier = BinaryClassification(seq_len*d_model, device)

  def forward(self, src: Tensor) -> Tensor:

    """
    Args:
        src: Tensor, shape [seq_len, batch_size]
        src_mask: Tensor, shape [seq_len, seq_len]
    Returns:
        output Tensor of shape [seq_len, batch_size, ntoken]
    """
    N, seq_length, embed_size = src.shape
    positions = torch.arange(0, seq_length).expand(N, seq_length).to(self.device)
    src_ = src + self.position_embedding(positions)
    output1 = self.encoder(src_)
    # print(output1.shape)
    # print(output1)
    output = self.binary_classifier(torch.reshape(output1, (N, seq_length*embed_size))) 

    return output, output1

In [14]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [15]:
#load test
X_test = np.load("X_test.npy", allow_pickle=True)
y_test = np.load("y_test.npy", allow_pickle=True)

In [16]:
## test data
class TestData(Dataset):

    def __init__(self, X_data, y_data):
        self.X_data = X_data
        self.y_data = y_data

    def __getitem__(self, index):
        return self.X_data[index], self.y_data[index]

    def __len__ (self):
        return len(self.X_data)


test_data = TestData(X_test, y_test)


test_loader = DataLoader(dataset=test_data, batch_size=2)

In [17]:
PATH = './PonziShield_tr_v1.pth'
loaded_model = torch.load(PATH)
loaded_model.eval()

Classifier(
  (position_embedding): Embedding(108, 11)
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-7): 8 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=11, out_features=11, bias=True)
        )
        (linear1): Linear(in_features=11, out_features=8, bias=True)
        (dropout): Dropout(p=0.5, inplace=False)
        (linear2): Linear(in_features=8, out_features=11, bias=True)
        (norm1): LayerNorm((11,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((11,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.5, inplace=False)
        (dropout2): Dropout(p=0.5, inplace=False)
      )
    )
  )
  (binary_classifier): BinaryClassification(
    (layer_1): Linear(in_features=1188, out_features=64, bias=True)
    (layer_2): Linear(in_features=64, out_features=64, bias=True)
    (layer_out): Linear(in_features=64, out_features=1, bias=True)
    

In [21]:
y_pred = []
y_true = []
sigmoid = nn.Sigmoid()

for X_batch, y_batch in test_loader:
        #print("w.requires_grad:",X_batch.requires_grad)
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)
        print(X_batch.shape)
        print(y_batch.shape)
        results,result_before_bin_classifier = loaded_model(X_batch.float())
        print(results.shape)
        y_batch_pred = torch.round(sigmoid(results))
        y_pred.extend(y_batch_pred.cpu().detach().numpy())
        y_true.extend(y_batch.cpu().detach().numpy())

print(y_pred[0])

torch.Size([2, 108, 11])
torch.Size([2])
torch.Size([2, 1])
torch.Size([2, 108, 11])
torch.Size([2])
torch.Size([2, 1])
torch.Size([2, 108, 11])
torch.Size([2])
torch.Size([2, 1])
torch.Size([2, 108, 11])
torch.Size([2])
torch.Size([2, 1])
torch.Size([2, 108, 11])
torch.Size([2])
torch.Size([2, 1])
torch.Size([2, 108, 11])
torch.Size([2])
torch.Size([2, 1])
torch.Size([2, 108, 11])
torch.Size([2])
torch.Size([2, 1])
torch.Size([2, 108, 11])
torch.Size([2])
torch.Size([2, 1])
torch.Size([2, 108, 11])
torch.Size([2])
torch.Size([2, 1])
torch.Size([2, 108, 11])
torch.Size([2])
torch.Size([2, 1])
torch.Size([2, 108, 11])
torch.Size([2])
torch.Size([2, 1])
torch.Size([2, 108, 11])
torch.Size([2])
torch.Size([2, 1])
torch.Size([2, 108, 11])
torch.Size([2])
torch.Size([2, 1])
torch.Size([2, 108, 11])
torch.Size([2])
torch.Size([2, 1])
torch.Size([2, 108, 11])
torch.Size([2])
torch.Size([2, 1])
torch.Size([1, 108, 11])
torch.Size([1])
torch.Size([1, 1])
[0.]


In [10]:
count_true = 0
for i in range(len(y_pred)):
  if y_true[i]==y_pred[i]:
    count_true+=1
acc = count_true/len(y_pred)
print(len(y_test))
print(acc)

31
0.7741935483870968


In [36]:

def create_tensor_inputs(embedding_dir,contract_address):
    all_data = []
    all_labels = []
    for i in range(len(contract_address)):
        
        # print(filtered_df.loc[i, "address"], filtered_df.loc[i, "label"])
        fileNameToRead = embedding_dir + str(contract_address[i]) + '.csv'
        data = pd.read_csv(fileNameToRead)
        # Extract the relevant data (assuming the label column is named 'label')
        features = data.iloc[:, :11].to_numpy()
        label = data['label'][1]
        # print(features[1], labels[1])
        all_data.append((features))
        all_labels.append(label)
        # print("-----------------------------------------------------------------------")
    data_array = np.array(all_data)
    labels_array = np.array(all_labels)

    # Reshape the array to (301*108, 11) for normalization
    reshaped_data = data_array.reshape((-1, 11))
    # Initialize the StandardScaler
    scaler = StandardScaler()
    # Fit the scaler on the reshaped data and transform it
    normalized_data = scaler.fit_transform(reshaped_data)
    # Reshape the normalized data back to the original shape
    normalized_data_array = normalized_data.reshape(data_array.shape)

    data_tensor = torch.tensor(normalized_data_array, dtype=torch.float32)
    labels_tensor = torch.tensor(labels_array, dtype=torch.float32)

    print(data_tensor.shape)
    print(labels_tensor.shape)
    return data_tensor, labels_tensor

In [37]:
contract_addresses = [
    "0x6e38a457c722c6011b2dfa06d49240e797844d66",
    "0x109c4f2ccc82c4d77bde15f306707320294aea3f",
    "0x793ae8c1b1a160bfc07bfb0d04f85eab1a71f4f2",
    "0x5fe5b7546d1628f7348b023a0393de1fc825a4fd",
    "0xd79b4c6791784184e2755b2fc1659eaab0f80456",
    "0x273930d21e01ee25e4c219b63259d214872220a2",
    "0xd07ce4329b27eb8896c51458468d98a0e4c0394c"
]

create_tensor_inputs('./data/data_set/',contract_addresses)


torch.Size([7, 108, 11])
torch.Size([7])


(tensor([[[-0.2207, -0.2227, -0.1379,  ..., -0.6683,  3.1237, -0.7103],
          [-0.2207, -0.2227, -0.1379,  ..., -0.6683,  1.1492, -0.7103],
          [-0.2207, -0.2227, -0.1379,  ..., -0.6683,  1.1492, -0.7103],
          ...,
          [-0.2207, -0.2227, -0.1379,  ..., -0.6683, -0.8253, -0.7103],
          [-0.2207, -0.2227, -0.1379,  ..., -0.6683, -0.8253, -0.7103],
          [-0.2207, -0.2227, -0.1379,  ..., -0.6683, -0.8253, -0.7103]],
 
         [[ 3.1714,  3.1648,  1.7379,  ...,  1.3607,  3.1237,  1.4198],
          [ 5.2067,  5.1973,  0.0358,  ...,  1.6143,  1.1492,  1.4198],
          [ 2.8806,  2.8745,  0.1921,  ...,  1.1071,  1.1492,  1.4198],
          ...,
          [-0.2207, -0.2227, -0.1379,  ...,  1.1071, -0.8253,  1.4198],
          [-0.2207, -0.2227, -0.1379,  ...,  1.6143, -0.8253,  1.4198],
          [-0.2207, -0.2227, -0.1379,  ..., -0.1610, -0.8253,  1.2068]],
 
         [[-0.2207, -0.2227, -0.1379,  ...,  1.8679,  1.1492,  1.4198],
          [-0.2207, -0.2227,

In [29]:
class TransactionModality(nn.Module):
    def __init__(
        self,
        device,
        inductor=True,
        embedding_dir='./data/data_set/',
        model_path='./PonziShield_tr_v1.pth',
        ):
        super(TransactionModality, self).__init__()

        self.device = device
        self.embedding_dir=embedding_dir
        self.model = torch.load(model_path)

    def forward(self, dapp_addresses, train):

        if train==False:
            # do realtime prediction
            return
            
        
        # create 3d tensor [dapp_count,sequence_length,features]
        data_tensor, labels_tensor= create_tensor_inputs(self.embedding_dir,dapp_addresses)
        data_tensor = data_tensor.to(self.device)
        results,result_before_bin_classifier = self.model(data_tensor.float())

        # results shape = [dapp_count,1], result_before_bin_classifier = [dapp_count,sequence_length,features]
        return results,result_before_bin_classifier 