In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, confusion_matrix
DATA_PATH_FEATURES = '../data/elliptic_txs_features.csv'
DATA_PATH_CLASSES = '../data/elliptic_txs_classes.csv'

print("Loading and processing data...")
df_classes = pd.read_csv(DATA_PATH_CLASSES)
df_features = pd.read_csv(DATA_PATH_FEATURES, header=None)
df_features.rename(columns={0: 'txId', 1: 'time_step'}, inplace=True)

df_merged = pd.merge(df_features, df_classes, on='txId', how='left')
df_clean = df_merged[df_merged['class'] != 'unknown'].copy()
df_clean['class'] = df_clean['class'].map({'1': 1, '2': 0})

X = df_clean.drop(columns=['txId', 'class', 'time_step'])
y = df_clean['class']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

print(f"Training shape: {X_train_scaled.shape}")

counts = np.bincount(y_train)
weight_for_0 = 1.0 / counts[0]
weight_for_1 = 1.0 / counts[1]
total = len(y_train)
class_weight = {0: (weight_for_0 * total) / 2.0, 
                1: (weight_for_1 * total) / 2.0}

print(f"Class Weights: {class_weight}")

model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    BatchNormalization(),
    Dropout(0.3),
    Dense(32, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(1, activation='sigmoid')
])
opt = Adam(learning_rate=0.001) 
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
print("\nStarting Training...")
history = model.fit(
    X_train_scaled, y_train,
    epochs=100, 
    batch_size=256,  
    validation_split=0.2, 
    callbacks=[early_stop],
    class_weight=class_weight, 
    verbose=1
)
print("\n--- Deep Learning Evaluation ---")
y_pred_probs = model.predict(X_test_scaled)
y_pred = (y_pred_probs > 0.5).astype(int) 

print(classification_report(y_test, y_pred, target_names=['Licit (0)', 'Fraud (1)']))

Loading and processing data...
Training shape: (32594, 165)
Class Weights: {0: 0.5537170426746398, 1: 5.154016445287793}


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)



Starting Training...
Epoch 1/100
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 6ms/step - accuracy: 0.6227 - loss: 0.6480 - val_accuracy: 0.8928 - val_loss: 0.3692
Epoch 2/100
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7969 - loss: 0.3703 - val_accuracy: 0.9314 - val_loss: 0.2277
Epoch 3/100
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8666 - loss: 0.2888 - val_accuracy: 0.9370 - val_loss: 0.1887
Epoch 4/100
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8907 - loss: 0.2618 - val_accuracy: 0.9554 - val_loss: 0.1542
Epoch 5/100
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9044 - loss: 0.2463 - val_accuracy: 0.9385 - val_loss: 0.1684
Epoch 6/100
[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9138 - loss: 0.2241 - val_accuracy: 0.9587 - val_loss: 0.1303
