In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, classification_report, confusion_matrix
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

class CreditCardMLPipeline:
    def __init__(self, experiment_name="credit_card_fraud_detection"):
        """Initialize the ML pipeline with MLflow experiment"""
        self.experiment_name = experiment_name
        self.models = {}
        self.results = {}
        self.scaler = StandardScaler()
        
        # Set up MLflow
        mlflow.set_experiment(experiment_name)
        print(f"MLflow experiment '{experiment_name}' is ready!")
    
    def load_dataset(self, dataset_path=None):
        """Load credit card dataset automatically"""
        if dataset_path:
            # Load custom dataset
            self.data = pd.read_csv(dataset_path)
            print(f"Loaded custom dataset with shape: {self.data.shape}")
        else:
            # Generate synthetic credit card fraud dataset
            print("Generating synthetic credit card dataset...")
            np.random.seed(42)
            n_samples = 10000
            
            # Generate features
            data = {
                'V1': np.random.normal(0, 1, n_samples),
                'V2': np.random.normal(0, 1, n_samples),
                'V3': np.random.normal(0, 1, n_samples),
                'V4': np.random.normal(0, 1, n_samples),
                'V5': np.random.normal(0, 1, n_samples),
                'Amount': np.random.exponential(50, n_samples),
                'Time': np.random.uniform(0, 172800, n_samples)  # 48 hours in seconds
            }
            
            # Create fraud labels (imbalanced - 2% fraud)
            fraud_indices = np.random.choice(n_samples, int(0.02 * n_samples), replace=False)
            data['Class'] = np.zeros(n_samples)
            data['Class'][fraud_indices] = 1
            
            # Make fraudulent transactions different
            for idx in fraud_indices:
                data['V1'][idx] += np.random.normal(2, 0.5)
                data['V2'][idx] += np.random.normal(-1.5, 0.5)
                data['Amount'][idx] *= np.random.uniform(0.1, 0.3)
            
            self.data = pd.DataFrame(data)
            print(f"Generated synthetic dataset with shape: {self.data.shape}")
            print(f"Fraud cases: {self.data['Class'].sum()} ({self.data['Class'].mean()*100:.2f}%)")
        
        return self.data
    
    def preprocess_data(self):
        """Preprocess the dataset"""
        print("Preprocessing data...")
        
        # Separate features and target
        X = self.data.drop('Class', axis=1)
        y = self.data['Class']
        
        # Handle any categorical variables if present
        for col in X.columns:
            if X[col].dtype == 'object':
                le = LabelEncoder()
                X[col] = le.fit_transform(X[col])
        
        # Split the data
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        )
        
        # Scale features
        self.X_train_scaled = self.scaler.fit_transform(self.X_train)
        self.X_test_scaled = self.scaler.transform(self.X_test)
        
        print(f"Training set shape: {self.X_train.shape}")
        print(f"Test set shape: {self.X_test.shape}")
        
        return self.X_train_scaled, self.X_test_scaled, self.y_train, self.y_test
    
    def initialize_models(self):
        """Initialize 5 different ML models"""
        self.models = {
            'Logistic Regression': LogisticRegression(random_state=42, max_iter=1000),
            'Random Forest': RandomForestClassifier(n_estimators=100, random_state=42),
            'Gradient Boosting': GradientBoostingClassifier(random_state=42),
            'SVM': SVC(probability=True, random_state=42),
            'Neural Network': MLPClassifier(hidden_layer_sizes=(100, 50), random_state=42, max_iter=1000)
        }
        print(f"Initialized {len(self.models)} models for training")
        return self.models
    
    def train_and_evaluate_models(self):
        """Train all models and log results to MLflow"""
        print("Training and evaluating models...")
        
        for model_name, model in self.models.items():
            print(f"\nTraining {model_name}...")
            
            with mlflow.start_run(run_name=model_name):
                # Train model
                model.fit(self.X_train_scaled, self.y_train)
                
                # Make predictions
                y_pred = model.predict(self.X_test_scaled)
                y_pred_proba = model.predict_proba(self.X_test_scaled)[:, 1] if hasattr(model, 'predict_proba') else None
                
                # Calculate metrics
                accuracy = accuracy_score(self.y_test, y_pred)
                precision = precision_score(self.y_test, y_pred, zero_division=0)
                recall = recall_score(self.y_test, y_pred, zero_division=0)
                f1 = f1_score(self.y_test, y_pred, zero_division=0)
                
                # AUC score if probability predictions available
                auc = roc_auc_score(self.y_test, y_pred_proba) if y_pred_proba is not None else None
                
                # Cross-validation score
                cv_scores = cross_val_score(model, self.X_train_scaled, self.y_train, cv=5, scoring='f1')
                cv_mean = cv_scores.mean()
                cv_std = cv_scores.std()
                
                # Store results
                self.results[model_name] = {
                    'accuracy': accuracy,
                    'precision': precision,
                    'recall': recall,
                    'f1': f1,
                    'auc': auc,
                    'cv_mean': cv_mean,
                    'cv_std': cv_std,
                    'model': model
                }
                
                # Log parameters to MLflow
                if hasattr(model, 'get_params'):
                    params = model.get_params()
                    for param, value in params.items():
                        mlflow.log_param(param, value)
                
                # Log metrics to MLflow
                mlflow.log_metric("accuracy", accuracy)
                mlflow.log_metric("precision", precision)
                mlflow.log_metric("recall", recall)
                mlflow.log_metric("f1_score", f1)
                if auc is not None:
                    mlflow.log_metric("auc", auc)
                mlflow.log_metric("cv_mean_f1", cv_mean)
                mlflow.log_metric("cv_std_f1", cv_std)
                
                # Log model
                mlflow.sklearn.log_model(model, "model")
                
                auc_str = f"{auc:.4f}" if auc is not None else "N/A"
                print(f"{model_name} - Accuracy: {accuracy:.4f}, F1: {f1:.4f}, AUC: {auc_str}")
        
        return self.results
    
    def display_results_summary(self):
        """Display comprehensive results summary"""
        print("\n" + "="*80)
        print("MODEL PERFORMANCE SUMMARY")
        print("="*80)
        
        # Create results DataFrame
        results_df = pd.DataFrame.from_dict(self.results, orient='index')
        results_df = results_df.round(4)
        
        # Sort by F1 score (important for imbalanced dataset)
        results_df_sorted = results_df.sort_values('f1', ascending=False)
        
        print(results_df_sorted[['accuracy', 'precision', 'recall', 'f1', 'auc', 'cv_mean']].to_string())
        
        # Best model
        best_model_name = results_df_sorted.index[0]
        best_model = self.results[best_model_name]['model']
        
        print(f"\n🏆 BEST MODEL: {best_model_name}")
        print(f"   F1 Score: {results_df_sorted.loc[best_model_name, 'f1']:.4f}")
        print(f"   Accuracy: {results_df_sorted.loc[best_model_name, 'accuracy']:.4f}")
        print(f"   AUC: {results_df_sorted.loc[best_model_name, 'auc']:.4f}")
        
        # Classification report for best model
        y_pred_best = best_model.predict(self.X_test_scaled)
        print(f"\nDetailed Classification Report for {best_model_name}:")
        print(classification_report(self.y_test, y_pred_best))
        
        return results_df_sorted, best_model_name
    
    def plot_model_comparison(self):
        """Create visualization comparing model performance"""
        # Prepare data for plotting
        model_names = list(self.results.keys())
        metrics = ['accuracy', 'precision', 'recall', 'f1']
        
        fig, axes = plt.subplots(2, 2, figsize=(15, 12))
        fig.suptitle('Model Performance Comparison', fontsize=16, fontweight='bold')
        
        for i, metric in enumerate(metrics):
            ax = axes[i//2, i%2]
            values = [self.results[model][metric] for model in model_names]
            
            bars = ax.bar(model_names, values, color=['skyblue', 'lightgreen', 'lightcoral', 'gold', 'plum'])
            ax.set_title(f'{metric.capitalize()} Comparison', fontweight='bold')
            ax.set_ylabel(metric.capitalize())
            ax.tick_params(axis='x', rotation=45)
            
            # Add value labels on bars
            for bar, value in zip(bars, values):
                ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
                       f'{value:.3f}', ha='center', va='bottom')
        
        plt.tight_layout()
        plt.show()
        
        # Log plot to MLflow
        with mlflow.start_run(run_name="model_comparison"):
            plt.savefig("model_comparison.png", dpi=300, bbox_inches='tight')
            mlflow.log_artifact("model_comparison.png")
    
    def run_complete_pipeline(self, dataset_path=None):
        """Run the complete ML pipeline"""
        print("🚀 Starting Credit Card Fraud Detection ML Pipeline")
        print("="*60)
        
        # Step 1: Load dataset
        self.load_dataset(dataset_path)
        
        # Step 2: Preprocess data
        self.preprocess_data()
        
        # Step 3: Initialize models
        self.initialize_models()
        
        # Step 4: Train and evaluate models
        self.train_and_evaluate_models()
        
        # Step 5: Display results
        results_df, best_model = self.display_results_summary()
        
        # Step 6: Create visualizations
        self.plot_model_comparison()
        
        print(f"\n✅ Pipeline completed successfully!")
        print(f"🔬 Check MLflow UI with: mlflow ui")
        print(f"📊 Experiment: {self.experiment_name}")
        
        return results_df, best_model

# Usage Example
if __name__ == "__main__":
    # Create and run the pipeline
    pipeline = CreditCardMLPipeline()
    
    # Option 1: Run with synthetic data
    results, best_model_name = pipeline.run_complete_pipeline()
    
    # Option 2: Run with your own dataset (uncomment below)
    # results, best_model_name = pipeline.run_complete_pipeline("path/to/your/creditcard.csv")
    
    print(f"\n🎯 Best performing model: {best_model_name}")