In [4]:
# Core Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_blobs , make_moons
from sklearn.preprocessing import StandardScaler
from sklearn.svm import OneClassSVM
from sklearn.metrics import precision_score, recall_score, f1_score, roc_auc_score
import warnings
warnings.filterwarnings('ignore')

# Configure plotting
plt.style.use('default')
sns.set_palette("Set2")
plt.rcParams['figure.figsize'] = (15, 10)
plt.rcParams['font.size'] = 11

print("✅ Libraries imported successfully!")

✅ Libraries imported successfully!


In [5]:
# One Class SVM Implementation and Analysis

# Data Generation for One Class SVM
def create_one_class_datasets():
    """Create various datasets for One Class SVM demonstration"""
    datasets = {}
    
    # Dataset 1: Single cluster with outliers
    X1, _ = make_blobs(n_samples=300, centers=1, cluster_std=1.5, random_state=42)
    # Add scattered outliers
    outliers1 = np.random.uniform(low=-8, high=8, size=(30, 2))
    X1_combined = np.vstack([X1, outliers1])
    y1 = np.concatenate([np.ones(300), -np.ones(30)])  # 1=normal, -1=outlier
    datasets['single_cluster'] = (X1_combined, y1)
    
    # Dataset 2: Moon-shaped normal data with outliers
    X2, _ = make_moons(n_samples=200, noise=0.1, random_state=42)
    # Add outliers around the moons
    outliers2 = np.array([[0.5, 1.5], [0.5, -1.5], [-1.5, 0.5], [2.5, 0.5], [1, 2], [1, -2], [-2, 0], [3, 0]])
    X2_combined = np.vstack([X2, outliers2])
    y2 = np.concatenate([np.ones(200), -np.ones(8)])
    datasets['moon_shape'] = (X2_combined, y2)
    
    # Dataset 3: Gaussian mixture (complex boundary)
    X3a, _ = make_blobs(n_samples=100, centers=1, cluster_std=0.8, center_box=(0, 2), random_state=42)
    X3b, _ = make_blobs(n_samples=100, centers=1, cluster_std=0.6, center_box=(3, 5), random_state=43)
    X3_normal = np.vstack([X3a, X3b])
    # Add outliers between and around clusters
    outliers3 = np.array([[1.5, 3.5], [7, 7], [-3, -3], [8, 1], [-1, 8]])
    X3_combined = np.vstack([X3_normal, outliers3])
    y3 = np.concatenate([np.ones(200), -np.ones(5)])
    datasets['gaussian_mixture'] = (X3_combined, y3)
    
    # Dataset 4: High-dimensional data
    X4, _ = make_blobs(n_samples=200, centers=1, n_features=10, cluster_std=1.0, random_state=42)
    # Add high-dimensional outliers
    outliers4 = np.random.uniform(low=-5, high=5, size=(20, 10))
    X4_combined = np.vstack([X4, outliers4])
    y4 = np.concatenate([np.ones(200), -np.ones(20)])
    datasets['high_dimensional'] = (X4_combined, y4)
    
    return datasets

# One Class SVM Analysis Functions
def optimize_one_class_svm(X, y=None, param_grid=None, cv_folds=3):
    """
    Optimize One Class SVM parameters using grid search
    
    Parameters:
    -----------
    X : array-like
        Training data (normal samples only)
    y : array-like, optional
        True labels for evaluation (if available)
    param_grid : dict, optional
        Parameter grid for optimization
    cv_folds : int
        Number of cross-validation folds
    
    Returns:
    --------
    best_params : dict
        Best parameters found
    results : list
        All parameter combinations and scores
    """
    if param_grid is None:
        param_grid = {
            'kernel': ['rbf', 'poly', 'sigmoid'],
            'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1],
            'nu': [0.01, 0.05, 0.1, 0.15, 0.2]
        }
    
    # If y is provided, extract normal samples for training
    if y is not None:
        X_normal = X[y == 1]
        print(f"Using {len(X_normal)} normal samples for training out of {len(X)} total")
    else:
        X_normal = X
        print(f"Using all {len(X)} samples for training (assuming all normal)")
    
    results = []
    best_score = -np.inf
    best_params = None
    
    print("Optimizing One Class SVM parameters...")
    total_combinations = len(param_grid['kernel']) * len(param_grid['gamma']) * len(param_grid['nu'])
    print(f"Testing {total_combinations} parameter combinations...")
    
    combination_count = 0
    
    for kernel in param_grid['kernel']:
        for gamma in param_grid['gamma']:
            for nu in param_grid['nu']:
                combination_count += 1
                
                try:
                    # Create and fit One Class SVM
                    oc_svm = OneClassSVM(kernel=kernel, gamma=gamma, nu=nu)
                    oc_svm.fit(X_normal)
                    
                    # Predict on training data
                    y_pred_train = oc_svm.predict(X_normal)
                    decision_scores_train = oc_svm.decision_function(X_normal)
                    
                    # Calculate training metrics
                    inlier_ratio = np.sum(y_pred_train == 1) / len(y_pred_train)
                    score_mean = np.mean(decision_scores_train)
                    score_std = np.std(decision_scores_train)
                    
                    # If true labels available, calculate external metrics
                    if y is not None:
                        y_pred_all = oc_svm.predict(X)
                        decision_scores_all = oc_svm.decision_function(X)
                        
                        # Convert predictions (-1, 1) to (1, 0) for consistency
                        y_pred_binary = (y_pred_all == -1).astype(int)
                        y_true_binary = (y == -1).astype(int)
                        
                        # Calculate metrics
                        if len(np.unique(y_true_binary)) > 1:
                            auc_score = roc_auc_score(y_true_binary, -decision_scores_all)
                            precision = precision_score(y_true_binary, y_pred_binary, zero_division=0)
                            recall = recall_score(y_true_binary, y_pred_binary, zero_division=0)
                            f1 = f1_score(y_true_binary, y_pred_binary, zero_division=0)
                        else:
                            auc_score = precision = recall = f1 = 0
                    else:
                        auc_score = precision = recall = f1 = 0
                    
                    # Combined score for optimization
                    if y is not None:
                        combined_score = f1  # Use F1 score when labels available
                    else:
                        # Unsupervised scoring: prefer models with reasonable outlier detection
                        # and good separation
                        combined_score = score_std * (1 - abs(inlier_ratio - 0.9))
                    
                    result = {
                        'kernel': kernel,
                        'gamma': gamma,
                        'nu': nu,
                        'inlier_ratio': inlier_ratio,
                        'score_mean': score_mean,
                        'score_std': score_std,
                        'auc_score': auc_score,
                        'precision': precision,
                        'recall': recall,
                        'f1_score': f1,
                        'combined_score': combined_score
                    }
                    
                    results.append(result)
                    
                    if combined_score > best_score:
                        best_score = combined_score
                        best_params = {'kernel': kernel, 'gamma': gamma, 'nu': nu}
                    
                    if combination_count % 10 == 0:
                        print(f"Progress: {combination_count}/{total_combinations} "
                                f"({combination_count/total_combinations*100:.1f}%)")
                    
                except Exception as e:
                    print(f"Error with params kernel={kernel}, gamma={gamma}, nu={nu}: {e}")
                    continue
    
    print(f"\\n✅ Optimization complete!")
    print(f"Best parameters: {best_params}")
    print(f"Best score: {best_score:.4f}")
    
    return best_params, results

def evaluate_one_class_svm(X, y, oc_svm_model):
    """
    Evaluate One Class SVM performance
    
    Parameters:
    -----------
    X : array-like
        Test data
    y : array-like
        True labels (1=normal, -1=outlier)
    oc_svm_model : OneClassSVM
        Trained One Class SVM model
    
    Returns:
    --------
    metrics : dict
        Performance metrics
    """
    # Get predictions and decision scores
    y_pred = oc_svm_model.predict(X)
    decision_scores = oc_svm_model.decision_function(X)
    
    # Convert to binary format for evaluation
    y_pred_binary = (y_pred == -1).astype(int)  # 1 for outlier, 0 for normal
    y_true_binary = (y == -1).astype(int)       # 1 for outlier, 0 for normal
    
    metrics = {}
    
    # Basic counts
    tp = np.sum((y_true_binary == 1) & (y_pred_binary == 1))  # True outliers detected
    fp = np.sum((y_true_binary == 0) & (y_pred_binary == 1))  # Normal flagged as outlier
    tn = np.sum((y_true_binary == 0) & (y_pred_binary == 0))  # Normal correctly identified
    fn = np.sum((y_true_binary == 1) & (y_pred_binary == 0))  # Outliers missed
    
    metrics['true_positives'] = tp
    metrics['false_positives'] = fp
    metrics['true_negatives'] = tn
    metrics['false_negatives'] = fn
    
    # Performance metrics
    metrics['accuracy'] = (tp + tn) / (tp + fp + tn + fn)
    metrics['precision'] = tp / (tp + fp) if (tp + fp) > 0 else 0
    metrics['recall'] = tp / (tp + fn) if (tp + fn) > 0 else 0
    metrics['f1_score'] = 2 * metrics['precision'] * metrics['recall'] / (metrics['precision'] + metrics['recall']) if (metrics['precision'] + metrics['recall']) > 0 else 0
    metrics['specificity'] = tn / (tn + fp) if (tn + fp) > 0 else 0
    
    # AUC Score
    if len(np.unique(y_true_binary)) > 1:
        metrics['auc_score'] = roc_auc_score(y_true_binary, -decision_scores)
    else:
        metrics['auc_score'] = 0
    
    # Decision function statistics
    normal_scores = decision_scores[y == 1]
    outlier_scores = decision_scores[y == -1]
    
    metrics['normal_score_mean'] = np.mean(normal_scores)
    metrics['normal_score_std'] = np.std(normal_scores)
    metrics['outlier_score_mean'] = np.mean(outlier_scores) if len(outlier_scores) > 0 else 0
    metrics['outlier_score_std'] = np.std(outlier_scores) if len(outlier_scores) > 0 else 0
    
    # Separation quality
    if len(outlier_scores) > 0:
        separation = metrics['normal_score_mean'] - metrics['outlier_score_mean']
        metrics['score_separation'] = separation
    else:
        metrics['score_separation'] = 0
    
    return metrics

def plot_one_class_svm_results(X, y, oc_svm_model, title="One Class SVM Results", figsize=(15, 10)):
    """
    Visualize One Class SVM results
    
    Parameters:
    -----------
    X : array-like
        Data (first 2 dimensions used for plotting)
    y : array-like
        True labels
    oc_svm_model : OneClassSVM
        Trained model
    title : str
        Plot title
    figsize : tuple
        Figure size
    """
    # Use first 2 dimensions for visualization
    X_plot = X[:, :2] if X.shape[1] >= 2 else X
    
    fig, axes = plt.subplots(2, 2, figsize=figsize)
    
    # Get predictions and scores
    y_pred = oc_svm_model.predict(X)
    decision_scores = oc_svm_model.decision_function(X)
    
    # 1. Decision boundary (if 2D)
    if X.shape[1] >= 2:
        h = 0.02  # Step size in mesh
        x_min, x_max = X_plot[:, 0].min() - 1, X_plot[:, 0].max() + 1
        y_min, y_max = X_plot[:, 1].min() - 1, X_plot[:, 1].max() + 1
        xx, yy = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))
        
        mesh_points = np.c_[xx.ravel(), yy.ravel()]
        
        # Pad with zeros if original data has more dimensions
        if X.shape[1] > 2:
            padding = np.zeros((mesh_points.shape[0], X.shape[1] - 2))
            mesh_points = np.hstack([mesh_points, padding])
        
        Z = oc_svm_model.decision_function(mesh_points)
        Z = Z.reshape(xx.shape)
        
        # Plot decision boundary
        axes[0, 0].contour(xx, yy, Z, levels=[0], linewidths=2, colors='black')
        axes[0, 0].contourf(xx, yy, Z, levels=[Z.min(), 0], colors=['red'], alpha=0.3)
        axes[0, 0].contourf(xx, yy, Z, levels=[0, Z.max()], colors=['blue'], alpha=0.3)
        
        # Plot data points
        normal_mask = y == 1
        outlier_mask = y == -1
        
        axes[0, 0].scatter(X_plot[normal_mask, 0], X_plot[normal_mask, 1], c='blue', marker='o', s=50, alpha=0.7, label='Normal')
        axes[0, 0].scatter(X_plot[outlier_mask, 0], X_plot[outlier_mask, 1], c='red', marker='^', s=50, alpha=0.7, label='Outlier')
        
        axes[0, 0].set_title('Decision Boundary')
        axes[0, 0].set_xlabel('Feature 1')
        axes[0, 0].set_ylabel('Feature 2')
        axes[0, 0].legend()
        axes[0, 0].grid(True, alpha=0.3)
    else:
        axes[0, 0].text(0.5, 0.5, 'Decision Boundary\\n(2D visualization only)', ha='center', va='center', transform=axes[0, 0].transAxes)
        axes[0, 0].set_title('Decision Boundary (N/A)')
    
    # 2. Predictions vs True labels
    if X.shape[1] >= 2:
        # Color by prediction
        pred_colors = ['blue' if pred == 1 else 'red' for pred in y_pred]
        axes[0, 1].scatter(X_plot[:, 0], X_plot[:, 1], c=pred_colors, alpha=0.7, s=50)
        axes[0, 1].set_title('Predictions (Blue=Normal, Red=Outlier)')
        axes[0, 1].set_xlabel('Feature 1')
        axes[0, 1].set_ylabel('Feature 2')
        axes[0, 1].grid(True, alpha=0.3)
    else:
        # Show prediction distribution
        unique_pred, counts_pred = np.unique(y_pred, return_counts=True)
        axes[0, 1].bar(['Normal', 'Outlier'], [counts_pred[1], counts_pred[0]] if len(counts_pred) > 1 else [counts_pred[0], 0])
        axes[0, 1].set_title('Prediction Distribution')
        axes[0, 1].set_ylabel('Count')
    
    # 3. Decision scores distribution
    normal_scores = decision_scores[y == 1]
    outlier_scores = decision_scores[y == -1]
    
    axes[1, 0].hist(normal_scores, bins=20, alpha=0.7, label='Normal', color='blue')
    if len(outlier_scores) > 0:
        axes[1, 0].hist(outlier_scores, bins=20, alpha=0.7, label='Outlier', color='red')
    axes[1, 0].axvline(x=0, color='black', linestyle='--', label='Decision Threshold')
    axes[1, 0].set_xlabel('Decision Score')
    axes[1, 0].set_ylabel('Frequency')
    axes[1, 0].set_title('Decision Score Distribution')
    axes[1, 0].legend()
    axes[1, 0].grid(True, alpha=0.3)
    
    # 4. ROC Curve
    if len(np.unique(y)) > 1:
        y_true_binary = (y == -1).astype(int)
        fpr, tpr, _ = roc_curve(y_true_binary, -decision_scores)
        auc_score = auc(fpr, tpr)
        
        axes[1, 1].plot(fpr, tpr, label=f'ROC Curve (AUC = {auc_score:.3f})')
        axes[1, 1].plot([0, 1], [0, 1], 'k--', label='Random')
        axes[1, 1].set_xlabel('False Positive Rate')
        axes[1, 1].set_ylabel('True Positive Rate')
        axes[1, 1].set_title('ROC Curve')
        axes[1, 1].legend()
        axes[1, 1].grid(True, alpha=0.3)
    else:
        axes[1, 1].text(0.5, 0.5, 'ROC Curve\\n(No outliers in data)', ha='center', va='center', transform=axes[1, 1].transAxes)
        axes[1, 1].set_title('ROC Curve (N/A)')
    
    plt.suptitle(title)
    plt.tight_layout()
    return fig

In [6]:
# Reusable One Class SVM Pipeline
class OneClassSVMPipeline:
    """
    A comprehensive One Class SVM pipeline for anomaly detection
    
    This class provides a complete workflow for One Class SVM analysis including:
    - Data preprocessing and scaling
    - Parameter optimization (kernel, gamma, nu)
    - Model training on normal data only
    - Anomaly detection and evaluation
    - Comprehensive visualization
    """
    
    def __init__(self, random_state=42):
        self.random_state = random_state
        self.scaler = StandardScaler()
        self.oc_svm_model = None
        self.optimal_params = None
        self.is_fitted = False
        self.feature_names = None
        
    def preprocess_data(self, X, scaling_method='standard'):
        """
        Preprocess data for One Class SVM
        
        Parameters:
        -----------
        X : array-like
            Input data
        scaling_method : str
            'standard', 'minmax', 'robust', or 'none'
        
        Returns:
        --------
        X_processed : array-like
            Preprocessed data
        """
        X_processed = np.array(X, dtype=np.float32)
        
        if scaling_method == 'standard':
            self.scaler = StandardScaler()
            X_processed = self.scaler.fit_transform(X_processed)
            print("✅ Data standardized using StandardScaler")
        elif scaling_method == 'minmax':
            self.scaler = MinMaxScaler()
            X_processed = self.scaler.fit_transform(X_processed)
            print("✅ Data normalized using MinMaxScaler")
        elif scaling_method == 'robust':
            from sklearn.preprocessing import RobustScaler
            self.scaler = RobustScaler()
            X_processed = self.scaler.fit_transform(X_processed)
            print("✅ Data scaled using RobustScaler")
        elif scaling_method == 'none':
            print("⚠️ No scaling applied")
        
        print(f"📊 Processed data shape: {X_processed.shape}")
        return X_processed
    
    def optimize_parameters(self, X_normal, X_test=None, y_test=None, param_grid=None, scoring='f1'):
        """
        Optimize One Class SVM parameters
        
        Parameters:
        -----------
        X_normal : array-like
            Normal training data only
        X_test : array-like, optional
            Test data for evaluation
        y_test : array-like, optional
            Test labels for evaluation
        param_grid : dict, optional
            Parameter grid for optimization
        scoring : str
            Scoring method ('f1', 'precision', 'recall', 'auc')
        
        Returns:
        --------
        optimal_params : dict
            Best parameters found
        """
        if param_grid is None:
            param_grid = {
                'kernel': ['rbf', 'poly', 'sigmoid'],
                'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1],
                'nu': [0.01, 0.05, 0.1, 0.15, 0.2, 0.25]
            }
        
        print(f"🔍 Optimizing One Class SVM parameters...")
        print(f"   - Training on {len(X_normal)} normal samples")
        print(f"   - Scoring method: {scoring}")
        
        # Combine test data for evaluation if available
        if X_test is not None and y_test is not None:
            X_eval = np.vstack([X_normal, X_test])
            y_eval = np.concatenate([np.ones(len(X_normal)), y_test])
            print(f"   - Evaluating on {len(X_eval)} total samples")
        else:
            X_eval = X_normal
            y_eval = np.ones(len(X_normal))
            print("   - No test data provided, using unsupervised optimization")
        
        best_params, results = optimize_one_class_svm(X_eval, y_eval, param_grid)
        self.optimal_params = best_params
        
        # Convert results to DataFrame for analysis
        self.optimization_results = pd.DataFrame(results)
        
        return best_params
    
    def fit(self, X_normal, kernel='rbf', gamma='scale', nu=0.1, use_optimized_params=True):
        """
        Fit One Class SVM on normal data
        
        Parameters:
        -----------
        X_normal : array-like
            Normal training data only
        kernel : str
            SVM kernel type
        gamma : str or float
            Kernel coefficient
        nu : float
            Upper bound on fraction of training errors and lower bound of support vectors
        use_optimized_params : bool
            Whether to use optimized parameters if available
        
        Returns:
        --------
        self : OneClassSVMPipeline
            Fitted pipeline
        """
        # Set parameters
        if use_optimized_params and self.optimal_params:
            params = self.optimal_params.copy()
        else:
            params = {'kernel': kernel, 'gamma': gamma, 'nu': nu}
        
        print(f"🔧 Training One Class SVM with parameters:")
        for key, value in params.items():
            print(f"   - {key}: {value}")
        
        # Create and fit model
        self.oc_svm_model = OneClassSVM(**params)
        self.oc_svm_model.fit(X_normal)
        self.is_fitted = True
        
        # Get training statistics
        train_predictions = self.oc_svm_model.predict(X_normal)
        train_scores = self.oc_svm_model.decision_function(X_normal)
        
        inlier_ratio = np.sum(train_predictions == 1) / len(train_predictions)
        support_vector_ratio = len(self.oc_svm_model.support_) / len(X_normal)
        
        print(f"✅ Training completed!")
        print(f"   - Training samples: {len(X_normal)}")
        print(f"   - Support vectors: {len(self.oc_svm_model.support_)} ({support_vector_ratio:.2%})")
        print(f"   - Inlier ratio on training: {inlier_ratio:.2%}")
        print(f"   - Decision score range: [{train_scores.min():.3f}, {train_scores.max():.3f}]")
        
        return self
    
    def predict(self, X):
        """
        Predict anomalies in new data
        
        Parameters:
        -----------
        X : array-like
            Input data
        
        Returns:
        --------
        predictions : array-like
            Predictions (1 for normal, -1 for outlier)
        decision_scores : array-like
            Decision function scores
        """
        if not self.is_fitted:
            raise ValueError("Model not fitted. Call fit first.")
        
        # Scale data using fitted scaler
        if hasattr(self.scaler, 'transform'):
            X_scaled = self.scaler.transform(X)
        else:
            X_scaled = X
        
        predictions = self.oc_svm_model.predict(X_scaled)
        decision_scores = self.oc_svm_model.decision_function(X_scaled)
        
        return predictions, decision_scores
    
    def evaluate_performance(self, X_test, y_test):
        """
        Evaluate model performance on test data
        
        Parameters:
        -----------
        X_test : array-like
            Test data
        y_test : array-like
            True labels (1=normal, -1=outlier)
        
        Returns:
        --------
        metrics : dict
            Performance metrics
        """
        if not self.is_fitted:
            raise ValueError("Model not fitted. Call fit first.")
        
        metrics = evaluate_one_class_svm(X_test, y_test, self.oc_svm_model)
        return metrics
    
    def plot_results(self, X, y, title="One Class SVM Results", figsize=(15, 10)):
        """
        Plot comprehensive One Class SVM results
        
        Parameters:
        -----------
        X : array-like
            Data for visualization
        y : array-like
            True labels
        title : str
            Plot title
        figsize : tuple
            Figure size
        
        Returns:
        --------
        fig : matplotlib.figure.Figure
            The figure object
        """
        if not self.is_fitted:
            raise ValueError("Model not fitted. Call fit first.")
        
        return plot_one_class_svm_results(X, y, self.oc_svm_model, title, figsize)
    
    def plot_optimization_results(self, figsize=(15, 8)):
        """
        Plot parameter optimization results
        
        Parameters:
        -----------
        figsize : tuple
            Figure size
        
        Returns:
        --------
        fig : matplotlib.figure.Figure
            The figure object
        """
        if not hasattr(self, 'optimization_results'):
            raise ValueError("No optimization results available. Run optimize_parameters first.")
        
        df = self.optimization_results
        
        fig, axes = plt.subplots(2, 3, figsize=figsize)
        
        # 1. F1 Score by kernel
        if 'f1_score' in df.columns:
            kernel_f1 = df.groupby('kernel')['f1_score'].mean()
            axes[0, 0].bar(kernel_f1.index, kernel_f1.values)
            axes[0, 0].set_title('Average F1 Score by Kernel')
            axes[0, 0].set_ylabel('F1 Score')
            axes[0, 0].tick_params(axis='x', rotation=45)
        
        # 2. Performance by Nu
        if 'f1_score' in df.columns:
            nu_performance = df.groupby('nu').agg({
                'f1_score': 'mean',
                'precision': 'mean',
                'recall': 'mean'
            })
            
            axes[0, 1].plot(nu_performance.index, nu_performance['f1_score'], 'o-', label='F1')
            axes[0, 1].plot(nu_performance.index, nu_performance['precision'], 's-', label='Precision')
            axes[0, 1].plot(nu_performance.index, nu_performance['recall'], '^-', label='Recall')
            axes[0, 1].set_title('Performance by Nu Parameter')
            axes[0, 1].set_xlabel('Nu')
            axes[0, 1].set_ylabel('Score')
            axes[0, 1].legend()
            axes[0, 1].grid(True, alpha=0.3)
        
        # 3. AUC Score by Gamma (for RBF kernel)
        if 'auc_score' in df.columns:
            rbf_results = df[df['kernel'] == 'rbf']
            if len(rbf_results) > 0:
                gamma_auc = rbf_results.groupby('gamma')['auc_score'].mean()
                axes[0, 2].plot(range(len(gamma_auc)), gamma_auc.values, 'o-')
                axes[0, 2].set_title('AUC Score by Gamma (RBF Kernel)')
                axes[0, 2].set_xlabel('Gamma (index)')
                axes[0, 2].set_ylabel('AUC Score')
                axes[0, 2].set_xticks(range(len(gamma_auc)))
                axes[0, 2].set_xticklabels([str(g) for g in gamma_auc.index], rotation=45)
                axes[0, 2].grid(True, alpha=0.3)
        
        # 4. Inlier ratio distribution
        if 'inlier_ratio' in df.columns:
            axes[1, 0].hist(df['inlier_ratio'], bins=20, alpha=0.7)
            axes[1, 0].axvline(df['inlier_ratio'].mean(), color='red', linestyle='--', label=f'Mean: {df["inlier_ratio"].mean():.3f}')
            axes[1, 0].set_title('Inlier Ratio Distribution')
            axes[1, 0].set_xlabel('Inlier Ratio')
            axes[1, 0].set_ylabel('Frequency')
            axes[1, 0].legend()
        
        # 5. Decision score statistics
        if 'score_mean' in df.columns:
            axes[1, 1].scatter(df['score_mean'], df['score_std'], alpha=0.6)
            axes[1, 1].set_title('Decision Score Statistics')
            axes[1, 1].set_xlabel('Mean Decision Score')
            axes[1, 1].set_ylabel('Std Decision Score')
            axes[1, 1].grid(True, alpha=0.3)
        
        # 6. Best parameters summary
        if self.optimal_params:
            params_text = "Best Parameters:\\n\\n"
            for key, value in self.optimal_params.items():
                params_text += f"{key}: {value}\\n"
            
            if 'f1_score' in df.columns:
                best_scores = df.loc[df['f1_score'].idxmax()]
                params_text += f"\\nBest Scores:\\n"
                params_text += f"F1: {best_scores['f1_score']:.3f}\\n"
                params_text += f"Precision: {best_scores['precision']:.3f}\\n"
                params_text += f"Recall: {best_scores['recall']:.3f}\\n"
                params_text += f"AUC: {best_scores['auc_score']:.3f}"
            
            axes[1, 2].text(0.05, 0.95, params_text, transform=axes[1, 2].transAxes, verticalalignment='top', fontsize=10, bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.8))
            axes[1, 2].set_xlim(0, 1)
            axes[1, 2].set_ylim(0, 1)
            axes[1, 2].axis('off')
            axes[1, 2].set_title('Optimization Summary')
        
        plt.tight_layout()
        return fig
    
    def save_model(self, filepath):
        """
        Save the trained model
        
        Parameters:
        -----------
        filepath : str
            Path to save the model
        """
        if not self.is_fitted:
            raise ValueError("Model not fitted. Call fit first.")
        
        import joblib
        model_data = {
            'oc_svm_model': self.oc_svm_model,
            'scaler': self.scaler,
            'optimal_params': self.optimal_params
        }
        joblib.dump(model_data, filepath)
        print(f"💾 Model saved to {filepath}")
    
    def load_model(self, filepath):
        """
        Load a pre-trained model
        
        Parameters:
        -----------
        filepath : str
            Path to the saved model
        """
        import joblib
        model_data = joblib.load(filepath)
        self.oc_svm_model = model_data['oc_svm_model']
        self.scaler = model_data['scaler']
        self.optimal_params = model_data['optimal_params']
        self.is_fitted = True
        print(f"📂 Model loaded from {filepath}")

# Comprehensive Demonstration
def demonstrate_one_class_svm():
    """
    Comprehensive demonstration of One Class SVM pipeline
    """
    print("🚀 Starting comprehensive One Class SVM demonstration...")
    print("="*60)
    
    # Create datasets
    datasets = create_one_class_datasets()
    
    for name, (X, y) in datasets.items():
        print(f"\\n{'='*60}")
        print(f"📊 ANALYZING DATASET: {name.upper()}")
        print(f"{'='*60}")
        print(f"Data shape: {X.shape}")
        print(f"Normal samples: {np.sum(y == 1)} ({np.sum(y == 1)/len(y)*100:.1f}%)")
        print(f"Outlier samples: {np.sum(y == -1)} ({np.sum(y == -1)/len(y)*100:.1f}%)")
        
        # Initialize pipeline
        pipeline = OneClassSVMPipeline(random_state=42)
        
        # Preprocess data
        X_processed = pipeline.preprocess_data(X, scaling_method='standard')
        
        # Split data: use only normal samples for training
        normal_mask = y == 1
        X_normal = X_processed[normal_mask]
        X_test = X_processed  # Test on all data
        y_test = y
        
        print(f"\\nTraining set (normal only): {len(X_normal)} samples")
        print(f"Test set (all data): {len(X_test)} samples")
        
        # Optimize parameters
        print("\\n🔍 Optimizing parameters...")
        optimal_params = pipeline.optimize_parameters(
            X_normal, X_test, y_test, scoring='f1'
        )
        
        # Train model
        print("\\n🔧 Training One Class SVM...")
        pipeline.fit(X_normal, use_optimized_params=True)
        
        # Evaluate performance
        print("\\n📊 Evaluating performance...")
        metrics = pipeline.evaluate_performance(X_test, y_test)
        
        print("\\nPerformance Metrics:")
        print("-" * 30)
        for metric_name, value in metrics.items():
            if isinstance(value, (int, float)):
                print(f"{metric_name.replace('_', ' ').title():<25}: {value:.4f}")
        
        # Plot results
        print("\\n📈 Generating visualizations...")
        
        # Main results
        pipeline.plot_results(X_processed, y, title=f'One Class SVM: {name}')
        plt.show()
        
        # Optimization results
        if hasattr(pipeline, 'optimization_results'):
            pipeline.plot_optimization_results()
            plt.suptitle(f'Parameter Optimization: {name}')
            plt.show()
        
        print(f"\\n✅ Analysis of {name} dataset complete!")
    
    print("\\n🎉 All One Class SVM demonstrations completed successfully!")
    
    return pipeline

# Quick usage example
def quick_one_class_svm_example():
    """Quick example of using One Class SVM pipeline"""
    print("📊 Quick One Class SVM Example")
    print("="*40)
    
    # Generate sample data with outliers
    X_normal, _ = make_blobs(n_samples=200, centers=1, cluster_std=1.0, random_state=42)
    X_outliers = np.random.uniform(low=-6, high=6, size=(20, 2))
    X = np.vstack([X_normal, X_outliers])
    y = np.concatenate([np.ones(200), -np.ones(20)])
    
    # Initialize and run pipeline
    pipeline = OneClassSVMPipeline()
    
    # Preprocess
    X_processed = pipeline.preprocess_data(X)
    
    # Train on normal data only
    X_train_normal = X_processed[y == 1]
    
    # Fit with default parameters
    pipeline.fit(X_train_normal, kernel='rbf', nu=0.1)
    
    # Evaluate
    metrics = pipeline.evaluate_performance(X_processed, y)
    
    print(f"\\n✅ Quick example complete!")
    print(f"Precision: {metrics['precision']:.3f}")
    print(f"Recall: {metrics['recall']:.3f}")
    print(f"F1 Score: {metrics['f1_score']:.3f}")
    print(f"AUC Score: {metrics['auc_score']:.3f}")
    
    return pipeline