In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from pyod.models.auto_encoder import AutoEncoder
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score

# Load the dataset
df = pd.read_csv('creditcard.csv')

# Scale numerical features
scaler = StandardScaler()
df['scaled_amount'] = scaler.fit_transform(df['Amount'].values.reshape(-1, 1))
df['scaled_time'] = scaler.fit_transform(df['Time'].values.reshape(-1, 1))
df = df.drop(['Time', 'Amount'], axis=1)

# Separate normal and fraudulent transactions
normal_df = df[df['Class'] == 0]
fraud_df = df[df['Class'] == 1]

# Prepare training and testing data
X_train, X_test_normal = train_test_split(normal_df.drop('Class', axis=1), test_size=0.2, random_state=42)
X_test = pd.concat([X_test_normal, fraud_df.drop('Class', axis=1)])
y_test = pd.concat([pd.Series([0] * len(X_test_normal)), pd.Series([1] * len(fraud_df))])

# Instantiate and train the AutoEncoder model with the corrected parameter
clf = AutoEncoder(
    epoch_num=20,
    batch_size=32,
    contamination=len(fraud_df) / len(df)
)
clf.fit(X_train.values)

# Make predictions on the test set
y_test_scores = clf.decision_function(X_test.values)
y_test_pred = clf.predict(X_test.values)

# Evaluate the model's performance
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_test_pred, target_names=['Normal', 'Fraud']))
print(f"\nROC-AUC Score: {roc_auc_score(y_test, y_test_scores):.4f}")

Training: 100%|██████████| 20/20 [02:26<00:00,  7.35s/it]


Confusion Matrix:
[[56759   104]
 [  108   384]]

Classification Report:
              precision    recall  f1-score   support

      Normal       1.00      1.00      1.00     56863
       Fraud       0.79      0.78      0.78       492

    accuracy                           1.00     57355
   macro avg       0.89      0.89      0.89     57355
weighted avg       1.00      1.00      1.00     57355


ROC-AUC Score: 0.9590
