# Complete Fraud Detection Pipeline Demo

This notebook demonstrates the complete fraud detection pipeline covering all three tasks:
1. Data Analysis and Preprocessing
2. Model Building and Training
3. Model Explainability

In [None]:
import sys
sys.path.append('..')

from src.complete_pipeline import CompleteFraudPipeline
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

## Initialize Pipeline

In [None]:
pipeline = CompleteFraudPipeline()
print("Pipeline initialized successfully")

## Task 1: Data Analysis and Preprocessing

In [None]:
# Run Task 1
fraud_data, credit_data = pipeline.task1_data_analysis_preprocessing(
    fraud_path='../data/raw/Fraud_Data.csv',
    ip_path='../data/raw/IpAddress_to_Country.csv',
    credit_path='../data/raw/creditcard.csv'
)

print(f"Fraud data shape: {fraud_data.shape}")
print(f"Credit data shape: {credit_data.shape}")

## Task 2: Model Building and Training

In [None]:
# Run Task 2
fraud_results, credit_results = pipeline.task2_model_building_training()

print("Model training completed!")
print(f"Best fraud model: {max(fraud_results.keys(), key=lambda k: fraud_results[k]['auc_pr'])}")
print(f"Best credit model: {max(credit_results.keys(), key=lambda k: credit_results[k]['auc_pr'])}")

## Task 3: Model Explainability

In [None]:
# Run Task 3
pipeline.task3_model_explainability()
print("Model explainability analysis completed!")

## Results Summary

In [None]:
# Display results summary
for dataset, model_info in pipeline.best_models.items():
    print(f"\n=== {dataset} Best Model Results ===")
    print(f"AUC-ROC: {model_info['auc_roc']:.4f}")
    print(f"AUC-PR: {model_info['auc_pr']:.4f}")