In [1]:
import pandas as pd

df = pd.read_csv("/kaggle/input/transaction-dataset-csv/transaction_dataset.csv")  

# Preview the dataset
print(df.head())


   Unnamed: 0  Index                                     Address  FLAG  \
0           0      1  0x00009277775ac7d0d59eaad8fee3d10ac6c805e8     0   
1           1      2  0x0002b44ddb1476db43c868bd494422ee4c136fed     0   
2           2      3  0x0002bda54cb772d040f779e88eb453cac0daa244     0   
3           3      4  0x00038e6ba2fd5c09aedb96697c8d7b8fa6632e5e     0   
4           4      5  0x00062d1dd1afb6fb02540ddad9cdebfe568e0d89     0   

   Avg min between sent tnx  Avg min between received tnx  \
0                    844.26                       1093.71   
1                  12709.07                       2958.44   
2                 246194.54                       2434.02   
3                  10219.60                      15785.09   
4                     36.61                      10707.77   

   Time Diff between first and last (Mins)  Sent tnx  Received Tnx  \
0                                704785.63       721            89   
1                               1218216.73      

In [2]:
# Dropping unnecessary columns, but keep the target column (e.g., FLAG)
columns_to_remove = ["Address"]  
df = df.drop(columns=columns_to_remove, axis=1)

print(f"Remaining columns: {list(df.columns)}")


Remaining columns: ['Unnamed: 0', 'Index', 'FLAG', 'Avg min between sent tnx', 'Avg min between received tnx', 'Time Diff between first and last (Mins)', 'Sent tnx', 'Received Tnx', 'Number of Created Contracts', 'Unique Received From Addresses', 'Unique Sent To Addresses', 'min value received', 'max value received ', 'avg val received', 'min val sent', 'max val sent', 'avg val sent', 'min value sent to contract', 'max val sent to contract', 'avg value sent to contract', 'total transactions (including tnx to create contract', 'total Ether sent', 'total ether received', 'total ether sent contracts', 'total ether balance', ' Total ERC20 tnxs', ' ERC20 total Ether received', ' ERC20 total ether sent', ' ERC20 total Ether sent contract', ' ERC20 uniq sent addr', ' ERC20 uniq rec addr', ' ERC20 uniq sent addr.1', ' ERC20 uniq rec contract addr', ' ERC20 avg time between sent tnx', ' ERC20 avg time between rec tnx', ' ERC20 avg time between rec 2 tnx', ' ERC20 avg time between contract tnx',

In [3]:
# handling missing values 
print(df.isnull().sum())

# Option 1: Drop columns with too many missing values
df = df.dropna(axis=1, thresh=int(0.9 * len(df)))  # Remove columns with more than 10% missing values


df = df.fillna(0)  

Unnamed: 0                                                 0
Index                                                      0
FLAG                                                       0
Avg min between sent tnx                                   0
Avg min between received tnx                               0
Time Diff between first and last (Mins)                    0
Sent tnx                                                   0
Received Tnx                                               0
Number of Created Contracts                                0
Unique Received From Addresses                             0
Unique Sent To Addresses                                   0
min value received                                         0
max value received                                         0
avg val received                                           0
min val sent                                               0
max val sent                                               0
avg val sent            

In [4]:
print(f"Dataset shape: {df.shape}")


Dataset shape: (9841, 49)


In [5]:
df = df.drop(['Unnamed: 0', 'Index'], axis=1, errors='ignore')  # Dropping irrelevant columns if present


In [6]:
print(df.dtypes)


FLAG                                                      int64
Avg min between sent tnx                                float64
Avg min between received tnx                            float64
Time Diff between first and last (Mins)                 float64
Sent tnx                                                  int64
Received Tnx                                              int64
Number of Created Contracts                               int64
Unique Received From Addresses                            int64
Unique Sent To Addresses                                  int64
min value received                                      float64
max value received                                      float64
avg val received                                        float64
min val sent                                            float64
max val sent                                            float64
avg val sent                                            float64
min value sent to contract              

In [7]:
print(df['FLAG'].value_counts())


FLAG
0    7662
1    2179
Name: count, dtype: int64


In [8]:
df = pd.get_dummies(df, drop_first=True)


In [9]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
numerical_features = df.select_dtypes(include=['float64', 'int64']).columns
df[numerical_features] = scaler.fit_transform(df[numerical_features])


In [10]:
X = df.drop('FLAG', axis=1)  # Removing target variable from features
y = df['FLAG']  # Target variable


In [11]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [12]:
!pip install reservoirpy numpy pandas scikit-learn matplotlib


Collecting reservoirpy
  Downloading reservoirpy-0.3.12-py3-none-any.whl.metadata (13 kB)
Downloading reservoirpy-0.3.12-py3-none-any.whl (202 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m202.5/202.5 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: reservoirpy
Successfully installed reservoirpy-0.3.12


In [13]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler


X = df.drop(columns=["FLAG"]).values  # Features
y = df["FLAG"].values  # Target

# Normalize features to [0, 1] for stability
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape)


Train shape: (7872, 511)
Test shape: (1969, 511)


In [14]:
from reservoirpy.nodes import Reservoir
from sklearn.linear_model import Ridge

# Define ESN hyperparameters
reservoir_size = 500
spectral_radius = 0.9

# Initialize the ESN reservoir
reservoir = Reservoir(
    units=reservoir_size,
    sr=spectral_radius,
    input_scaling=0.1,
    bias_scaling=0.2
)

# Step 1: Get the reservoir states (hidden states) for the training and test data using the run() method
X_reservoir_train = reservoir.run(X_train)  # Get the hidden states for training
X_reservoir_test = reservoir.run(X_test)  # Get the hidden states for testing

# Step 2: Train the readout layer (Ridge regression) on the training reservoir states
readout = Ridge(alpha=1e-6)  # Initialize Ridge with regularization
readout.fit(X_reservoir_train, y_train)  # Fit Ridge regression on the training data

# Step 3: Make predictions on the test data using the trained readout
predictions = readout.predict(X_reservoir_test)

# Step 4: Evaluate the model (using RMSE here)
rmse = np.sqrt(np.mean((predictions - y_test) ** 2))
print(f"Root Mean Squared Error: {rmse}")


Running Reservoir-0: 100%|██████████| 7872/7872 [00:01<00:00, 6011.45it/s]
Running Reservoir-0: 100%|██████████| 1969/1969 [00:00<00:00, 6063.99it/s]


Root Mean Squared Error: 0.22627681874386854


In [15]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Convert continuous predictions to binary values (for binary classification)
y_pred_binary = (predictions >= 0.5).astype(int)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred_binary)
print(f"Accuracy: {accuracy}")

# Confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred_binary))

# Classification report
print("Classification Report:")
print(classification_report(y_test, y_pred_binary))



Accuracy: 0.9558151345860844
Confusion Matrix:
[[1490   52]
 [  35  392]]
Classification Report:
              precision    recall  f1-score   support

         0.0       0.98      0.97      0.97      1542
         1.0       0.88      0.92      0.90       427

    accuracy                           0.96      1969
   macro avg       0.93      0.94      0.94      1969
weighted avg       0.96      0.96      0.96      1969

