In [1]:
# Brain MRI Image Anomaly Detection Pipeline
# Detects brain tumor anomalies using feature extraction and multiple anomaly detection methods

import os
import cv2
import numpy as np
import pandas as pd
from pathlib import Path
import warnings
import zipfile
import shutil
from urllib.request import urlretrieve

warnings.filterwarnings('ignore')

from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.svm import OneClassSVM
from sklearn.ensemble import IsolationForest, AdaBoostClassifier
from sklearn.neighbors import LocalOutlierFactor, NearestNeighbors
from sklearn.covariance import EllipticEnvelope
from sklearn.cluster import DBSCAN, KMeans
from sklearn.mixture import GaussianMixture
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import (precision_score, recall_score, f1_score, 
                             confusion_matrix, roc_auc_score, roc_curve)

try:
    from tensorflow.keras import layers, models, losses, optimizers
    from tensorflow.keras.callbacks import EarlyStopping
    import tensorflow as tf
    KERAS_AVAILABLE = True
except:
    print("TensorFlow/Keras not available. Deep learning methods will be skipped.")
    KERAS_AVAILABLE = False

2025-11-09 11:42:39.472535: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1762688559.769988      13 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1762688559.854724      13 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

AttributeError: 'MessageFactory' object has no attribute 'GetPrototype'

In [2]:
# SECTION 1: BRAIN MRI DATASET LOADER

class BrainMRIDatasetLoader:
    """Load brain MRI dataset from Kaggle"""
    
    def __init__(self, dataset_root_path):
        """Initialize loader for brain MRI dataset"""
        self.dataset_root = Path(dataset_root_path)
        self.yes_tumor_dir = self.dataset_root / "yes"
        self.no_tumor_dir = self.dataset_root / "no"
        
        self._validate_structure()
    
    def _validate_structure(self):
        """Validate that required directories exist"""
        required_dirs = [self.yes_tumor_dir, self.no_tumor_dir]
        
        for dir_path in required_dirs:
            if not dir_path.exists():
                raise ValueError(f"Required directory not found: {dir_path}")
        
        print(f"✓ Dataset structure validated")
        print(f"  Tumor images: {self.yes_tumor_dir}")
        print(f"  No tumor images: {self.no_tumor_dir}")
    
    def load_tumor_images(self):
        """Load all brain MRI images with tumors"""
        print("\nLoading tumor images...")
        images = []
        paths = []
        
        image_files = sorted(list(self.yes_tumor_dir.glob("*.jpg"))) + \
                     sorted(list(self.yes_tumor_dir.glob("*.jpeg"))) + \
                     sorted(list(self.yes_tumor_dir.glob("*.png")))
        
        for idx, image_file in enumerate(image_files):
            if (idx + 1) % 50 == 0 or idx == 0:
                print(f"  Loading: {idx + 1}/{len(image_files)}")
            
            try:
                img = cv2.imread(str(image_file), cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    img = cv2.resize(img, (256, 256))
                    images.append(img)
                    paths.append(str(image_file))
            except Exception as e:
                print(f"  Error loading {image_file}: {e}")
        
        print(f"Loaded {len(images)} tumor images")
        return images, paths
    
    def load_no_tumor_images(self):
        """Load all brain MRI images without tumors"""
        print("\nLoading non-tumor images...")
        images = []
        paths = []
        
        image_files = sorted(list(self.no_tumor_dir.glob("*.jpg"))) + \
                     sorted(list(self.no_tumor_dir.glob("*.jpeg"))) + \
                     sorted(list(self.no_tumor_dir.glob("*.png")))
        
        for idx, image_file in enumerate(image_files):
            if (idx + 1) % 50 == 0 or idx == 0:
                print(f"  Loading: {idx + 1}/{len(image_files)}")
            
            try:
                img = cv2.imread(str(image_file), cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    img = cv2.resize(img, (256, 256))
                    images.append(img)
                    paths.append(str(image_file))
            except Exception as e:
                print(f"  Error loading {image_file}: {e}")
        
        print(f"Loaded {len(images)} non-tumor images")
        return images, paths
    
    def get_dataset_summary(self):
        """Print dataset summary"""
        tumor_count = len(list(self.yes_tumor_dir.glob("*.jpg"))) + \
                     len(list(self.yes_tumor_dir.glob("*.jpeg"))) + \
                     len(list(self.yes_tumor_dir.glob("*.png")))
        
        no_tumor_count = len(list(self.no_tumor_dir.glob("*.jpg"))) + \
                        len(list(self.no_tumor_dir.glob("*.jpeg"))) + \
                        len(list(self.no_tumor_dir.glob("*.png")))
        
        print("\n" + "="*70)
        print("BRAIN MRI DATASET SUMMARY")
        print("="*70)
        print(f"Tumor images: {tumor_count}")
        print(f"Non-tumor images: {no_tumor_count}")
        print(f"Total images: {tumor_count + no_tumor_count}")
        print("="*70 + "\n")
        
        return {
            'tumor': tumor_count,
            'no_tumor': no_tumor_count,
            'total': tumor_count + no_tumor_count
        }

In [3]:
# SECTION 2: FEATURE EXTRACTION

class ImageFeatureExtractor:
    """Extract multiple types of features from brain MRI images"""
    
    def __init__(self, image_size=(256, 256)):
        self.image_size = image_size
        self.scaler = StandardScaler()
    
    def edge_detection(self, img):
        """Extract edge features using Canny edge detection"""
        edges = cv2.Canny(img, 50, 150)
        edge_features = np.array([
            np.sum(edges) / edges.size,
            np.std(edges),
            np.count_nonzero(edges) / edges.size,
        ])
        return edge_features, edges
    
    def corner_detection_harris(self, img):
        """Extract corner features using Harris corner detection"""
        corners = cv2.cornerHarris(img, 2, 3, 0.04)
        corners = cv2.dilate(corners, None)
        
        corner_features = np.array([
            np.sum(corners) / corners.size,
            np.max(corners),
            np.count_nonzero(corners > 0.01 * corners.max()) / corners.size,
            np.std(corners),
        ])
        return corner_features, corners
    
    def blob_detection(self, img):
        """Extract blob features using SimpleBlobDetector"""
        params = cv2.SimpleBlobDetector_Params()
        params.filterByArea = True
        params.minArea = 10
        params.maxArea = 5000
        params.filterByCircularity = True
        params.minCircularity = 0.1
        detector = cv2.SimpleBlobDetector_create(params)
        
        keypoints = detector.detect(img)
        
        blob_features = np.array([
            len(keypoints),
            np.mean([kp.size for kp in keypoints]) if keypoints else 0,
            np.std([kp.size for kp in keypoints]) if len(keypoints) > 1 else 0,
        ])
        return blob_features, keypoints
    
    def ridge_detection(self, img):
        """Extract ridge features using Sobel derivatives"""
        sobelx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=3)
        sobely = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=3)
        
        ridge_strength = np.sqrt(sobelx**2 + sobely**2)
        
        ridge_features = np.array([
            np.mean(ridge_strength),
            np.std(ridge_strength),
            np.percentile(ridge_strength, 95),
        ])
        return ridge_features, ridge_strength
    
    def sift_features(self, img):
        """Extract SIFT keypoints and descriptors"""
        sift = cv2.SIFT_create()
        keypoints, descriptors = sift.detectAndCompute(img, None)
        
        if descriptors is None or len(descriptors) == 0:
            sift_features = np.zeros(128)  # Fixed size
        else:
            # Use mean of descriptors for fixed-size output
            sift_features = np.mean(descriptors, axis=0)
        
        return sift_features, keypoints, descriptors
    
    def surf_features(self, img):
        """Extract SURF keypoints and descriptors"""
        try:
            surf = cv2.xfeatures2d.SURF_create(400)
            keypoints, descriptors = surf.detectAndCompute(img, None)
            
            if descriptors is None or len(descriptors) == 0:
                surf_features = np.zeros(64)  # Fixed size
            else:
                surf_features = np.mean(descriptors, axis=0)
        except:
            surf_features = np.zeros(64)
            keypoints = []
            descriptors = None
        
        return surf_features, keypoints, descriptors
    
    def orb_features(self, img):
        """Extract ORB keypoints and descriptors"""
        orb = cv2.ORB_create(nfeatures=500)
        keypoints, descriptors = orb.detectAndCompute(img, None)
        
        if descriptors is None or len(descriptors) == 0:
            orb_features = np.zeros(32)  # Fixed size
        else:
            descriptors_float = descriptors.astype(np.float32)
            orb_features = np.mean(descriptors_float, axis=0)
        
        return orb_features, keypoints, descriptors
    
    def texture_features(self, img):
        """Extract texture features using Laplacian"""
        log = cv2.Laplacian(img, cv2.CV_64F)
        
        texture_features = np.array([
            np.mean(log),
            np.std(log),
            np.percentile(log, 25),
            np.percentile(log, 75),
        ])
        return texture_features
    
    def histogram_features(self, img):
        """Extract histogram features"""
        hist = cv2.calcHist([img], [0], None, [32], [0, 256])
        hist = hist.flatten() / hist.sum()  # Normalize
        return hist
    
    def extract_all_features(self, img):
        """Extract all features from an image"""
        edge_feat, _ = self.edge_detection(img)
        corner_feat, _ = self.corner_detection_harris(img)
        blob_feat, _ = self.blob_detection(img)
        ridge_feat, _ = self.ridge_detection(img)
        texture_feat = self.texture_features(img)
        
        sift_feat, _, _ = self.sift_features(img)
        surf_feat, _, _ = self.surf_features(img)
        orb_feat, _, _ = self.orb_features(img)
        
        hist_feat = self.histogram_features(img)
        
        all_features = np.concatenate([
            edge_feat, corner_feat, blob_feat, ridge_feat, texture_feat,
            sift_feat, surf_feat, orb_feat, hist_feat
        ])
        
        return all_features.reshape(1, -1)

In [4]:
# SECTION 3: DEEP LEARNING AUTOENCODER

class AutoencoderAnomalyDetector:
    """Autoencoder for Anomaly Detection"""
    
    def __init__(self, input_dim, encoding_dim=32, contamination=0.1):
        self.input_dim = input_dim
        self.encoding_dim = encoding_dim
        self.contamination = contamination
        self.model = None
        self.scaler = StandardScaler()
        self.threshold = None
    
    def build_model(self):
        if not KERAS_AVAILABLE:
            raise ImportError("TensorFlow/Keras not installed")
        
        # Encoder
        input_layer = layers.Input(shape=(self.input_dim,))
        
        encoded = layers.Dense(128, activation='relu')(input_layer)
        encoded = layers.Dropout(0.2)(encoded)
        encoded = layers.Dense(64, activation='relu')(encoded)
        encoded = layers.Dropout(0.2)(encoded)
        encoded = layers.Dense(self.encoding_dim, activation='relu')(encoded)
        
        # Decoder
        decoded = layers.Dense(64, activation='relu')(encoded)
        decoded = layers.Dropout(0.2)(decoded)
        decoded = layers.Dense(128, activation='relu')(decoded)
        decoded = layers.Dropout(0.2)(decoded)
        decoded = layers.Dense(self.input_dim, activation='sigmoid')(decoded)
        
        # Autoencoder model
        self.model = models.Model(input_layer, decoded)
        
        # Compile
        self.model.compile(
            optimizer=optimizers.Adam(learning_rate=0.001),
            loss=losses.MeanSquaredError()
        )
        
        return self
    
    def fit(self, X_train, epochs=50, batch_size=32, validation_split=0.1, verbose=1):
        if self.model is None:
            self.build_model()
        
        # Normalize data
        X_train_scaled = self.scaler.fit_transform(X_train)
        
        # Early stopping callback
        early_stop = EarlyStopping(
            monitor='val_loss',
            patience=5,
            restore_best_weights=True
        )
        
        # Train
        history = self.model.fit(
            X_train_scaled, X_train_scaled,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=validation_split,
            callbacks=[early_stop],
            verbose=verbose
        )
        
        # Set threshold based on training data
        train_predictions = self.model.predict(X_train_scaled, verbose=0)
        train_mse = np.mean((X_train_scaled - train_predictions) ** 2, axis=1)
        self.threshold = np.percentile(train_mse, (1 - self.contamination) * 100)
        
        return history
    
    def predict(self, X_test):
        """
        Returns:
            predictions: -1 (anomaly) or 1 (normal)
            reconstruction_errors: MSE for each sample
        """
        if self.model is None:
            raise ValueError("Model not fitted yet")
        
        # Normalize
        X_test_scaled = self.scaler.transform(X_test)
        
        # Get reconstructions
        reconstructions = self.model.predict(X_test_scaled, verbose=0)
        
        # Compute reconstruction error (MSE)
        reconstruction_errors = np.mean(
            (X_test_scaled - reconstructions) ** 2,
            axis=1
        )
        
        # Classify as anomaly if error > threshold
        predictions = np.where(
            reconstruction_errors > self.threshold,
            -1,  # anomaly
            1    # normal
        )
        
        return predictions, reconstruction_errors

In [5]:
# SECTION 4: ANOMALY DETECTION METHODS

class AnomalyDetectionMethods:
    """Comprehensive anomaly detection using multiple algorithms"""
    
    def __init__(self):
        self.models = {}
        self.results = {}
        self.scaler = StandardScaler()
    
    def knn_anomaly_detection(self, X_train, X_test, n_neighbors=5, percentile=95):
        """Detect anomalies using K-Nearest Neighbors"""
        model = NearestNeighbors(n_neighbors=n_neighbors)
        model.fit(X_train)
        distances, _ = model.kneighbors(X_test)
        anomaly_scores = distances[:, -1]
        threshold = np.percentile(distances[:, -1], percentile)
        predictions = (anomaly_scores > threshold).astype(int)
        predictions = np.where(predictions == 1, -1, 1)
        return model, predictions, anomaly_scores
    
    def pca_anomaly_detection(self, X_train, X_test, variance_explained=0.95, percentile=95):
        """Detect anomalies using PCA reconstruction error"""
        pca = PCA(n_components=variance_explained)
        X_train_pca = pca.fit_transform(X_train)
        X_test_pca = pca.transform(X_test)
        
        X_train_reconstructed = pca.inverse_transform(X_train_pca)
        X_test_reconstructed = pca.inverse_transform(X_test_pca)
        
        train_errors = np.mean((X_train - X_train_reconstructed) ** 2, axis=1)
        test_errors = np.mean((X_test - X_test_reconstructed) ** 2, axis=1)
        
        threshold = np.percentile(train_errors, percentile)
        predictions = (test_errors > threshold).astype(int)
        predictions = np.where(predictions == 1, -1, 1)
        
        return pca, predictions, test_errors
    
    def isolation_forest_detection(self, X_train, X_test, contamination=0.1):
        """Detect anomalies using Isolation Forest"""
        model = IsolationForest(contamination=contamination, random_state=42)
        model.fit(X_train)
        predictions = model.predict(X_test)
        scores = model.score_samples(X_test)
        return model, predictions, scores
    
    def local_outlier_factor_detection(self, X_train, X_test, n_neighbors=20, contamination=0.1, novelty=True):
        """Detect anomalies using Local Outlier Factor"""
        model = LocalOutlierFactor(n_neighbors=n_neighbors, contamination=contamination, novelty=novelty)
        X_combined = np.vstack([X_train, X_test])
        model.fit(X_combined)
        predictions = model.predict(X_test)
        scores = model.negative_outlier_factor_
        return model, predictions, scores[-len(X_test):]
    
    def one_class_svm_detection(self, X_train, X_test, nu=0.1):
        """Detect anomalies using One-Class SVM"""
        model = OneClassSVM(kernel='rbf', nu=nu)
        model.fit(X_train)
        predictions = model.predict(X_test)
        scores = model.decision_function(X_test)
        return model, predictions, scores
    
    def elliptic_envelope_detection(self, X_train, X_test, contamination=0.1):
        """Detect anomalies using Elliptic Envelope"""
        model = EllipticEnvelope(contamination=contamination, random_state=42)
        model.fit(X_train)
        predictions = model.predict(X_test)
        scores = model.decision_function(X_test)
        return model, predictions, scores
    
    def dbscan_anomaly_detection(self, X_train, X_test, eps=0.5, min_samples=5):
        """Detect anomalies using DBSCAN"""
        X_combined = np.vstack([X_train, X_test])
        model = DBSCAN(eps=eps, min_samples=min_samples)
        labels = model.fit_predict(X_combined)
        predictions = labels[-len(X_test):]
        predictions = np.where(predictions == -1, -1, 1)
        return model, predictions, labels[-len(X_test):]
    
    def gaussian_mixture_anomaly_detection(self, X_train, X_test, n_components=10, percentile=95):
        """Detect anomalies using Gaussian Mixture Model"""
        model = GaussianMixture(n_components=n_components, random_state=42)
        model.fit(X_train)
        train_scores = -model.score_samples(X_train)
        test_scores = -model.score_samples(X_test)
        threshold = np.percentile(train_scores, percentile)
        predictions = (test_scores > threshold).astype(int)
        predictions = np.where(predictions == 1, -1, 1)
        return model, predictions, test_scores
    
    def kmeans_anomaly_detection(self, X_train, X_test, n_clusters=5, contamination=0.1):
        """Detect anomalies using K-Means"""
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)
        
        model = KMeans(n_clusters=n_clusters, init='k-means++', random_state=42)
        model.fit(X_train_scaled)
        
        train_labels = model.predict(X_train_scaled)
        train_distances = np.sqrt(np.sum((X_train_scaled - model.cluster_centers_[train_labels])**2, axis=1))
        
        threshold = np.percentile(train_distances, (1 - contamination) * 100)
        
        test_labels = model.predict(X_test_scaled)
        test_distances = np.sqrt(np.sum((X_test_scaled - model.cluster_centers_[test_labels])**2, axis=1))
        
        predictions = np.where(test_distances > threshold, -1, 1)
        return model, predictions, test_distances
    
    def adaboost_detection(self, X_train, y_train, X_test,
                          n_estimators=100, learning_rate=0.5):
        """Detect anomalies using AdaBoost"""
        X_train_scaled = self.scaler.fit_transform(X_train)
        X_test_scaled = self.scaler.transform(X_test)
        
        # Create and train model
        base_estimator = DecisionTreeClassifier(max_depth=1, random_state=42)
        model = AdaBoostClassifier(
            estimator=base_estimator,
            n_estimators=n_estimators,
            learning_rate=learning_rate,
            random_state=42
        )
        model.fit(X_train_scaled, y_train)
        
        # Predict
        probabilities = model.predict_proba(X_test_scaled)[:, 1]
        predictions = np.where(probabilities > 0.5, -1, 1)
        
        return model, predictions, probabilities
    
    def autoencoder_detection(self, X_train, X_test, encoding_dim=32,
                             epochs=50, batch_size=32,
                             contamination=0.1):
        """Detect anomalies using Autoencoder"""
        input_dim = X_train.shape[1]
        
        # Create detector
        detector = AutoencoderAnomalyDetector(
            input_dim=input_dim,
            encoding_dim=encoding_dim,
            contamination=contamination
        )
        
        # Train
        detector.fit(X_train, epochs=epochs, batch_size=batch_size, verbose=0)
        
        # Predict
        predictions, reconstruction_errors = detector.predict(X_test)
        return detector, predictions, reconstruction_errors

In [6]:
# SECTION 5: MAIN PIPELINE

class BrainMRIAnomalyDetectionPipeline:
    """Complete pipeline for Brain MRI anomaly detection"""
    
    def __init__(self, dataset_path):
        self.dataset_path = dataset_path
        self.loader = BrainMRIDatasetLoader(dataset_path)
        self.extractor = ImageFeatureExtractor()
        self.anomaly_detector = AnomalyDetectionMethods()
        self.results_df = None
    
    def load_and_prepare_dataset(self, train_ratio=0.7, limit_no_tumor=None):
        """Load brain MRI dataset and prepare for training"""
        print("Loading brain MRI dataset...")
        
        # Get dataset summary
        summary = self.loader.get_dataset_summary()
        
        # Load tumor images (anomalies)
        tumor_images, tumor_paths = self.loader.load_tumor_images()
        tumor_labels = np.ones(len(tumor_images))  # 1 for tumor
        
        # Load non-tumor images (normal)
        no_tumor_images, no_tumor_paths = self.loader.load_no_tumor_images()
        no_tumor_labels = np.zeros(len(no_tumor_images))  # 0 for normal
        
        # Limit non-tumor if needed
        if limit_no_tumor and len(no_tumor_images) > limit_no_tumor:
            indices = np.random.choice(len(no_tumor_images), limit_no_tumor, replace=False)
            no_tumor_images = [no_tumor_images[i] for i in indices]
            no_tumor_paths = [no_tumor_paths[i] for i in indices]
            no_tumor_labels = no_tumor_labels[indices]
        
        print(f"\nDataset loaded:")
        print(f"  Tumor images: {len(tumor_images)}")
        print(f"  Non-tumor images: {len(no_tumor_images)}")
        
        # Extract features
        print("\nExtracting features...")
        all_images = tumor_images + no_tumor_images
        all_labels = np.concatenate([tumor_labels, no_tumor_labels])
        all_paths = tumor_paths + no_tumor_paths
        
        features_list = []
        for idx, img in enumerate(all_images):
            if (idx + 1) % 50 == 0 or idx == 0:
                print(f"  Extracting: {idx + 1}/{len(all_images)}")
            
            try:
                features = self.extractor.extract_all_features(img)
                features_list.append(features)
            except Exception as e:
                print(f"  Error extracting features from image {idx}: {e}")
                # Add zeros if feature extraction fails
                features_list.append(np.zeros((1, 256)))  # Approximate feature size
        
        X = np.vstack(features_list)
        y = all_labels
        
        # Split into train and test
        # Training: mostly normal images
        normal_indices = np.where(y == 0)[0]
        tumor_indices = np.where(y == 1)[0]
        
        n_train_normal = int(len(normal_indices) * train_ratio)
        train_indices = np.concatenate([
            normal_indices[:n_train_normal],
            np.random.choice(tumor_indices, size=min(len(tumor_indices)//3, 5), replace=False)
        ])
        
        test_indices = np.concatenate([
            normal_indices[n_train_normal:],
            tumor_indices[len(tumor_indices)//3:]
        ])
        
        X_train = X[train_indices]
        X_test = X[test_indices]
        y_train = y[train_indices]
        y_test = y[test_indices]
        
        # Normalize
        self.anomaly_detector.scaler.fit(X_train)
        X_train = self.anomaly_detector.scaler.transform(X_train)
        X_test = self.anomaly_detector.scaler.transform(X_test)
        
        print(f"\nDataset prepared:")
        print(f"  Training: {len(X_train)} samples")
        print(f"  Testing: {len(X_test)} samples")
        print(f"  Features: {X.shape[1]}")
        
        return X_train, X_test, y_train, y_test, all_paths
    
    def run_all_methods(self, X_train, X_test, y_train, y_test):
        """Run all anomaly detection methods"""
        print("\n" + "="*70)
        print("Running Anomaly Detection Methods")
        print("="*70)
        
        results = {
            'Method': [],
            'Precision': [],
            'Recall': [],
            'F1-Score': [],
            'ROC-AUC': []
        }
        
        methods = [
            ('K-Nearest Neighbors', self._run_knn),
            ('PCA', self._run_pca),
            ('Isolation Forest', self._run_isolation_forest),
            ('Local Outlier Factor', self._run_lof),
            ('One-Class SVM', self._run_ocsvm),
            ('Elliptic Envelope', self._run_elliptic),
            ('DBSCAN', self._run_dbscan),
            ('Gaussian Mixture Model', self._run_gmm),
            ('K-Means', self._run_kmeans),
        ]
        
        for method_name, method_func in methods:
            print(f"\n{method_name}...")
            try:
                metrics = method_func(X_train, X_test, y_test)
                self._print_results(method_name, metrics)
                results = self._append_results(results, method_name, metrics)
            except Exception as e:
                print(f"  Error: {e}")
        
        # Deep learning method
        if KERAS_AVAILABLE:
            methods_dl = [('Autoencoder', self._run_autoencoder)]
            for method_name, method_func in methods_dl:
                print(f"\n{method_name}...")
                try:
                    metrics = method_func(X_train, X_test, y_test)
                    self._print_results(method_name, metrics)
                    results = self._append_results(results, method_name, metrics)
                except Exception as e:
                    print(f"  Error: {e}")
        
        # Supervised method
        methods_supervised = [('AdaBoost', self._run_adaboost)]
        for method_name, method_func in methods_supervised:
            print(f"\n{method_name}...")
            try:
                metrics = method_func(X_train, y_train, X_test, y_test)
                self._print_results(method_name, metrics)
                results = self._append_results(results, method_name, metrics)
            except Exception as e:
                print(f"  Error: {e}")
        
        self.results_df = pd.DataFrame(results)
        return self.results_df
    
    def _run_knn(self, X_train, X_test, y_test):
        """Run KNN method"""
        model, predictions, scores = self.anomaly_detector.knn_anomaly_detection(X_train, X_test)
        return self._compute_metrics(y_test, predictions, scores)
    
    def _run_pca(self, X_train, X_test, y_test):
        """Run PCA method"""
        model, predictions, scores = self.anomaly_detector.pca_anomaly_detection(X_train, X_test)
        return self._compute_metrics(y_test, predictions, scores)
    
    def _run_isolation_forest(self, X_train, X_test, y_test):
        """Run Isolation Forest method"""
        model, predictions, scores = self.anomaly_detector.isolation_forest_detection(X_train, X_test)
        return self._compute_metrics(y_test, predictions, scores)
    
    def _run_lof(self, X_train, X_test, y_test):
        """Run LOF method"""
        model, predictions, scores = self.anomaly_detector.local_outlier_factor_detection(X_train, X_test)
        return self._compute_metrics(y_test, predictions, scores)
    
    def _run_ocsvm(self, X_train, X_test, y_test):
        """Run One-Class SVM method"""
        model, predictions, scores = self.anomaly_detector.one_class_svm_detection(X_train, X_test)
        return self._compute_metrics(y_test, predictions, scores)
    
    def _run_elliptic(self, X_train, X_test, y_test):
        """Run Elliptic Envelope method"""
        model, predictions, scores = self.anomaly_detector.elliptic_envelope_detection(X_train, X_test)
        return self._compute_metrics(y_test, predictions, scores)
    
    def _run_dbscan(self, X_train, X_test, y_test):
        """Run DBSCAN method"""
        model, predictions, scores = self.anomaly_detector.dbscan_anomaly_detection(X_train, X_test)
        return self._compute_metrics(y_test, predictions, scores)
    
    def _run_gmm(self, X_train, X_test, y_test):
        """Run Gaussian Mixture Model method"""
        model, predictions, scores = self.anomaly_detector.gaussian_mixture_anomaly_detection(X_train, X_test)
        return self._compute_metrics(y_test, predictions, scores)
    
    def _run_kmeans(self, X_train, X_test, y_test):
        """Run K-Means method"""
        model, predictions, scores = self.anomaly_detector.kmeans_anomaly_detection(X_train, X_test)
        return self._compute_metrics(y_test, predictions, scores)
    
    def _run_autoencoder(self, X_train, X_test, y_test):
        """Run Autoencoder method"""
        model, predictions, scores = self.anomaly_detector.autoencoder_detection(X_train, X_test)
        return self._compute_metrics(y_test, predictions, scores)
    
    def _run_adaboost(self, X_train, y_train, X_test, y_test):
        """Run AdaBoost method"""
        model, predictions, scores = self.anomaly_detector.adaboost_detection(X_train, y_train, X_test)
        return self._compute_metrics(y_test, predictions, scores)
    
    def _compute_metrics(self, y_true, predictions, scores):
        """Compute evaluation metrics"""
        y_pred_binary = np.where(predictions == -1, 1, 0)
        
        precision = precision_score(y_true, y_pred_binary, zero_division=0)
        recall = recall_score(y_true, y_pred_binary, zero_division=0)
        f1 = f1_score(y_true, y_pred_binary, zero_division=0)
        
        try:
            roc_auc = roc_auc_score(y_true, scores)
        except:
            roc_auc = 0.0
        
        return {
            'precision': precision,
            'recall': recall,
            'f1': f1,
            'roc_auc': roc_auc
        }
    
    def _print_results(self, method_name, metrics):
        """Print results for a method"""
        print(f"  Precision: {metrics['precision']:.4f}")
        print(f"  Recall: {metrics['recall']:.4f}")
        print(f"  F1-Score: {metrics['f1']:.4f}")
        print(f"  ROC-AUC: {metrics['roc_auc']:.4f}")
    
    def _append_results(self, results, method_name, metrics):
        """Append results to dictionary"""
        results['Method'].append(method_name)
        results['Precision'].append(metrics['precision'])
        results['Recall'].append(metrics['recall'])
        results['F1-Score'].append(metrics['f1'])
        results['ROC-AUC'].append(metrics['roc_auc'])
        return results
    
    def save_results(self, output_file='brain_mri_anomaly_results.csv'):
        """Save results to CSV"""
        if self.results_df is not None:
            self.results_df.to_csv(output_file, index=False)
            print(f"\nResults saved to {output_file}")
            print("\nResults Summary:")
            print(self.results_df.to_string(index=False))

In [7]:
if __name__ == "__main__":
    # Configuration
    DATASET_ROOT = "/kaggle/input/brain-mri-images-for-brain-tumor-detection"
    LIMIT_NO_TUMOR = None  # Set to a number to limit non-tumor images
    
    try:
        pipeline = BrainMRIAnomalyDetectionPipeline(DATASET_ROOT)
        
        # Load and prepare dataset
        X_train, X_test, y_train, y_test, paths = pipeline.load_and_prepare_dataset(
            train_ratio=0.7,
            limit_no_tumor=LIMIT_NO_TUMOR
        )
        
        # Run all anomaly detection methods
        results = pipeline.run_all_methods(X_train, X_test, y_train, y_test)
        
        # Save results
        pipeline.save_results("brain_mri_anomaly_results.csv")
        
        # Print summary
        print("\nANALYSIS COMPLETE")
        best_idx = results['F1-Score'].idxmax()
        print(f"Best method: {results.loc[best_idx, 'Method']}")
        print(f"Best F1-Score: {results.loc[best_idx, 'F1-Score']:.4f}")
        print(f"Best ROC-AUC: {results.loc[best_idx, 'ROC-AUC']:.4f}")
        
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()

✓ Dataset structure validated
  Tumor images: /kaggle/input/brain-mri-images-for-brain-tumor-detection/yes
  No tumor images: /kaggle/input/brain-mri-images-for-brain-tumor-detection/no
Loading brain MRI dataset...

BRAIN MRI DATASET SUMMARY
Tumor images: 87
Non-tumor images: 92
Total images: 179


Loading tumor images...
  Loading: 1/87
  Loading: 50/87
Loaded 87 tumor images

Loading non-tumor images...
  Loading: 1/92
  Loading: 50/92
Loaded 92 non-tumor images

Dataset loaded:
  Tumor images: 87
  Non-tumor images: 92

Extracting features...
  Extracting: 1/179
  Extracting: 50/179
  Extracting: 100/179
  Extracting: 150/179

Dataset prepared:
  Training: 69 samples
  Testing: 86 samples
  Features: 273

Running Anomaly Detection Methods

K-Nearest Neighbors...
  Precision: 0.4000
  Recall: 0.0345
  F1-Score: 0.0635
  ROC-AUC: 0.5308

PCA...
  Precision: 0.6842
  Recall: 0.8966
  F1-Score: 0.7761
  ROC-AUC: 0.4698

Isolation Forest...
  Precision: 0.6000
  Recall: 0.1552
  F1-Score

2025-11-09 11:43:19.147920: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


  Precision: 0.6538
  Recall: 0.2931
  F1-Score: 0.4048
  ROC-AUC: 0.3571

AdaBoost...
  Precision: 1.0000
  Recall: 0.1724
  F1-Score: 0.2941
  ROC-AUC: 0.7937

Results saved to brain_mri_anomaly_results.csv

Results Summary:
                Method  Precision   Recall  F1-Score  ROC-AUC
   K-Nearest Neighbors   0.400000 0.034483  0.063492 0.530788
                   PCA   0.684211 0.896552  0.776119 0.469828
      Isolation Forest   0.600000 0.155172  0.246575 0.636700
  Local Outlier Factor   0.363636 0.068966  0.115942 0.586823
         One-Class SVM   0.562500 0.465517  0.509434 0.719212
     Elliptic Envelope   0.680556 0.844828  0.753846 0.526478
                DBSCAN   0.658537 0.931034  0.771429 0.534483
Gaussian Mixture Model   0.675325 0.896552  0.770370 0.528325
               K-Means   0.722222 0.448276  0.553191 0.503079
           Autoencoder   0.653846 0.293103  0.404762 0.357143
              AdaBoost   1.000000 0.172414  0.294118 0.793719

ANALYSIS COMPLETE
Best metho