In [15]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Input
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression

In [3]:
# Load Preprocessed Data
features = pd.read_csv("elliptic_txs_features.csv", header=None)
classes = pd.read_csv("elliptic_txs_classes.csv")

In [4]:
# Preprocessing (as per original notebook)
classes['class'] = classes['class'].map({'unknown': -1, '1': 1, '2': 0})  # 1=Illicit, 0=Licit
filtered_data = classes[classes['class'] != -1]

features = features.iloc[:, 1:]  # Drop transaction ID
features = features.loc[filtered_data.index]
labels = filtered_data['class']


In [5]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42, stratify=labels)


In [6]:
# Normalize Data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [16]:
# Define MLP Model with explicit input layer
input_layer = Input(shape=(X_train_scaled.shape[1],))  # Define input layer
x = Dense(128, activation='relu')(input_layer)
x = Dropout(0.3)(x)
x = Dense(64, activation='relu')(x)
x = Dropout(0.3)(x)
x = Dense(32, activation='relu')(x)
x = Dense(16, activation='relu')(x)
x = Dense(8, activation='relu')(x)
output_layer = Dense(2, activation='softmax')(x)  # Binary classification (0=Licit, 1=Illicit)


In [17]:
mlp_model = Model(inputs=input_layer, outputs=output_layer)  # Create the model


In [19]:
# Train MLP (Ensure Model is Called Before Extracting Embeddings)
mlp_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # Add this line to compile the model
mlp_model.fit(X_train_scaled, y_train, epochs=20, batch_size=32, verbose=1, validation_data=(X_test_scaled, y_test))

# Perform a forward pass to initialize the model (IMPORTANT FIX)
_ = mlp_model.predict(X_train_scaled[:1]) # This line ensures the model is called and the input attribute is defined.

Epoch 1/20
[1m1165/1165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 3ms/step - accuracy: 0.9358 - loss: 0.1864 - val_accuracy: 0.9720 - val_loss: 0.0950
Epoch 2/20
[1m1165/1165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9708 - loss: 0.0987 - val_accuracy: 0.9735 - val_loss: 0.0856
Epoch 3/20
[1m1165/1165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9729 - loss: 0.0867 - val_accuracy: 0.9758 - val_loss: 0.0804
Epoch 4/20
[1m1165/1165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 0.9747 - loss: 0.0799 - val_accuracy: 0.9768 - val_loss: 0.0762
Epoch 5/20
[1m1165/1165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 4ms/step - accuracy: 0.9753 - loss: 0.0769 - val_accuracy: 0.9791 - val_loss: 0.0716
Epoch 6/20
[1m1165/1165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 3ms/step - accuracy: 0.9789 - loss: 0.0694 - val_accuracy: 0.9785 - val_loss: 0.0718
Epoch 7/20
[1m1

In [20]:
# Extract MLP Embeddings (Intermediate Layer Output)
intermediate_layer_model = tf.keras.Model(inputs=mlp_model.input, outputs=mlp_model.layers[-3].output)
X_train_embedded = intermediate_layer_model.predict(X_train_scaled)
X_test_embedded = intermediate_layer_model.predict(X_test_scaled)

[1m1165/1165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step
[1m292/292[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [21]:
# Train Logistic Regression on MLP Embeddings
logreg = LogisticRegression()
logreg.fit(X_train_embedded, y_train)
y_pred = logreg.predict(X_test_embedded)

In [22]:
# Evaluate Model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Micro Average F1-Score
m_f1 = f1_score(y_test, y_pred, average='micro')

In [23]:
# Print Results
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')
print(f'Micro Average F1 Score: {m_f1:.4f}')

Accuracy: 0.9795
Precision: 0.9233
Recall: 0.8614
F1 Score: 0.8913
Micro Average F1 Score: 0.9795
