<a href="https://www.kaggle.com/code/shashankroy568/patchcore6?scriptVersionId=262854896" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/mvtec-anomaly-detection/mvtec_anomaly_detection/readme.txt
/kaggle/input/mvtec-anomaly-detection/mvtec_anomaly_detection/license.txt
/kaggle/input/mvtec-anomaly-detection/mvtec_anomaly_detection/wood/readme.txt
/kaggle/input/mvtec-anomaly-detection/mvtec_anomaly_detection/wood/license.txt
/kaggle/input/mvtec-anomaly-detection/mvtec_anomaly_detection/wood/ground_truth/hole/000_mask.png
/kaggle/input/mvtec-anomaly-detection/mvtec_anomaly_detection/wood/ground_truth/hole/003_mask.png
/kaggle/input/mvtec-anomaly-detection/mvtec_anomaly_detection/wood/ground_truth/hole/004_mask.png
/kaggle/input/mvtec-anomaly-detection/mvtec_anomaly_detection/wood/ground_truth/hole/005_mask.png
/kaggle/input/mvtec-anomaly-detection/mvtec_anomaly_detection/wood/ground_truth/hole/001_mask.png
/kaggle/input/mvtec-anomaly-detection/mvtec_anomaly_detection/wood/ground_truth/hole/006_mask.png
/kaggle/input/mvtec-anomaly-detection/mvtec_anomaly_detection/wood/ground_truth/hole/008_mask.png
/kaggle/in

In [2]:
# MVTec Multi-Category PatchCore Anomaly Detection - FIXED VARIABLE SCOPE
# Designed for Kaggle GPU P100 environment
import os
import sys
import warnings
import subprocess
warnings.filterwarnings('ignore')

print("🔧 Starting PatchCore environment setup...")

def install_package(package_name, import_name=None, extra_args=""):
    """Install package with proper error handling"""
    try:
        # Try importing first
        if import_name:
            __import__(import_name)
            print(f"✅ {package_name} already available")
            return True
        
        print(f"Installing {package_name}...")
        cmd = f"pip install {package_name} {extra_args}"
        result = subprocess.run(cmd.split(), capture_output=True, text=True)
        
        if result.returncode == 0:
            print(f"✅ {package_name} installed successfully")
            return True
        else:
            print(f"⚠️ Warning installing {package_name}: {result.stderr}")
            return False
            
    except Exception as e:
        print(f"❌ Error with {package_name}: {e}")
        return False

# Step 1: Install dependencies with better error handling
print("📦 Installing dependencies...")

# Core dependencies first
install_package("python-dotenv", None, "--quiet")
install_package("opencv-python", "cv2", "--quiet")
install_package("Pillow", "PIL", "--quiet --upgrade")
install_package("scikit-learn", "sklearn", "--quiet")

# Try FAISS installation (optional)
faiss_available = False
print("\n🔧 Attempting FAISS installation (optional for performance)...")
faiss_strategies = [
    ("faiss-gpu", "--quiet"),
    ("faiss-cpu", "--quiet"),
    ("faiss", "--quiet")
]

for package, args in faiss_strategies:
    if install_package(package, None, args):
        try:
            import faiss
            faiss_available = True
            print(f"✅ FAISS successfully imported!")
            break
        except ImportError:
            continue

if not faiss_available:
    print("⚠️ FAISS not available - will use sklearn for nearest neighbors")

# Try anomalib installation
print("\n🔧 Installing anomalib...")
anomalib_installed = False
strategies = [
    ("anomalib", "--quiet --no-deps --upgrade"),
    ("anomalib", "--quiet --force-reinstall"),
    ("anomalib==1.0.1", "--quiet")
]

for package, args in strategies:
    print(f"Trying: pip install {package} {args}")
    if install_package(package, None, args):
        try:
            import anomalib
            print(f"✅ Anomalib successfully imported!")
            anomalib_installed = True
            break
        except Exception as e:
            print(f"⚠️ Installation succeeded but import failed: {e}")
            continue

# Step 2: Import required libraries with graceful FAISS handling
print("\n📚 Importing libraries...")

# Global FAISS availability flag
FAISS_AVAILABLE = False

try:
    import kagglehub
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    from pathlib import Path
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    from PIL import Image
    import cv2
    from glob import glob
    import json
    from sklearn.metrics import roc_auc_score, roc_curve
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.neighbors import NearestNeighbors
    import torchvision.transforms as transforms
    import torchvision.models as models
    from torch.utils.data import Dataset, DataLoader
    import time
    from scipy import spatial
    
    # Try importing FAISS (optional)
    try:
        import faiss
        FAISS_AVAILABLE = True
        print("✅ FAISS imported successfully")
    except ImportError:
        FAISS_AVAILABLE = False
        print("⚠️ FAISS not available, using sklearn fallback")
    
    print("✅ All required libraries imported")
    
except ImportError as e:
    print(f"❌ Required library import error: {e}")
    print("Please install missing dependencies")
    FAISS_AVAILABLE = False

# Try anomalib imports
ANOMALIB_VERSION = None
anomalib_components = {}

print("🔍 Detecting anomalib configuration...")

try:
    from anomalib import TaskType
    from anomalib.data.image.mvtec import MVTecDataModule
    from anomalib.models.image.patchcore import Patchcore
    from anomalib.engine import Engine
    
    ANOMALIB_VERSION = "v1.0+"
    anomalib_components = {
        'datamodule_class': MVTecDataModule,
        'model_class': Patchcore,
        'engine_class': Engine
    }
    print("✅ Anomalib v1.0+ PatchCore API detected")
    
except ImportError:
    try:
        from anomalib.data import MVTec
        from anomalib.models.patchcore import Patchcore
        
        ANOMALIB_VERSION = "v0.7+"
        anomalib_components = {
            'datamodule_class': MVTec,
            'model_class': Patchcore
        }
        print("✅ Anomalib v0.7+ PatchCore API detected")
        
    except ImportError:
        print("⚠️ No anomalib PatchCore API detected - will use manual implementation")
        ANOMALIB_VERSION = "manual"

# Step 3: Manual PatchCore implementation with FIXED variable scope
class ManualPatchCore:
    """Manual PatchCore implementation with sklearn fallback"""
    
    def __init__(self, backbone='wide_resnet50_2', layers=['layer2', 'layer3'], 
                 coreset_sampling_ratio=0.1, num_neighbors=9):
        self.backbone_name = backbone
        self.layer_names = layers
        self.coreset_ratio = coreset_sampling_ratio
        self.num_neighbors = num_neighbors
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        # Store FAISS availability as instance variable
        self.faiss_available = FAISS_AVAILABLE
        
        print(f"🧠 Manual PatchCore initialized:")
        print(f"   - Backbone: {backbone}")
        print(f"   - Layers: {layers}")
        print(f"   - Coreset ratio: {coreset_sampling_ratio}")
        print(f"   - FAISS available: {self.faiss_available}")
        print(f"   - Device: {self.device}")
        
        # Load pretrained backbone
        self.backbone = self._load_backbone()
        self.feature_extractor = {}
        self._register_hooks()
        
        # Memory bank for storing features
        self.memory_bank = []
        self.coreset_idx = None
        self.faiss_index = None
        self.nn_model = None
        
    def _load_backbone(self):
        """Load pretrained backbone"""
        try:
            if self.backbone_name == 'wide_resnet50_2':
                backbone = models.wide_resnet50_2(pretrained=True)
            elif self.backbone_name == 'resnet50':
                backbone = models.resnet50(pretrained=True)
            elif self.backbone_name == 'resnet18':
                backbone = models.resnet18(pretrained=True)
            else:
                print(f"⚠️ Unknown backbone {self.backbone_name}, using ResNet18")
                backbone = models.resnet18(pretrained=True)
        except Exception as e:
            print(f"⚠️ Failed to load {self.backbone_name}, using ResNet18: {e}")
            backbone = models.resnet18(pretrained=True)
        
        backbone.eval()
        backbone.to(self.device)
        return backbone
        
    def _register_hooks(self):
        """Register hooks for feature extraction"""
        def hook_fn(name):
            def hook(module, input, output):
                self.feature_extractor[name] = output
            return hook
        
        # Register hooks for specified layers
        hooks_registered = 0
        for name, module in self.backbone.named_modules():
            if name in self.layer_names:
                module.register_forward_hook(hook_fn(name))
                print(f"   ✅ Registered hook for layer: {name}")
                hooks_registered += 1
        
        if hooks_registered == 0:
            print("⚠️ No hooks registered, falling back to default layers")
            # Fallback to common layer names
            fallback_layers = ['layer1', 'layer2', 'layer3']
            self.layer_names = []
            for name, module in self.backbone.named_modules():
                if name in fallback_layers:
                    self.layer_names.append(name)
                    module.register_forward_hook(hook_fn(name))
                    print(f"   ✅ Registered fallback hook for layer: {name}")
    
    def _extract_features(self, images):
        """Extract patch features from images"""
        self.feature_extractor.clear()
        
        with torch.no_grad():
            _ = self.backbone(images)
        
        # Aggregate features from multiple layers
        features = []
        for layer_name in self.layer_names:
            if layer_name in self.feature_extractor:
                feat = self.feature_extractor[layer_name]
                # Adaptive average pooling to make features spatial
                feat = F.adaptive_avg_pool2d(feat, (28, 28))
                # Reshape to patches: (batch, channels, h, w) -> (batch, h*w, channels)
                b, c, h, w = feat.shape
                feat = feat.permute(0, 2, 3, 1).reshape(b, h*w, c)
                features.append(feat)
        
        if features:
            # Concatenate features from all layers
            concatenated_features = torch.cat(features, dim=2)  # Concat along channel dimension
            return concatenated_features
        else:
            print("⚠️ No features extracted")
            return None
    
    def _random_coreset_selection(self, features, ratio):
        """Random coreset selection (faster than greedy)"""
        print(f"🔄 Performing random coreset selection (ratio: {ratio})...")
        
        # Flatten all features
        all_features = features.reshape(-1, features.shape[-1])
        n_samples = all_features.shape[0]
        n_select = max(1, int(n_samples * ratio))
        
        print(f"   - Total features: {n_samples}")
        print(f"   - Selecting: {n_select} features")
        
        # Random selection for efficiency
        selected_indices = np.random.choice(n_samples, n_select, replace=False)
        
        print(f"   ✅ Selected {len(selected_indices)} coreset features")
        return torch.tensor(selected_indices)
    
    def fit(self, train_loader):
        """Fit PatchCore on training data"""
        print("🔄 Training PatchCore...")
        
        all_features = []
        
        for batch_idx, (images, _) in enumerate(train_loader):
            if batch_idx % 10 == 0:
                print(f"Processing batch {batch_idx}...")
            
            images = images.to(self.device)
            features = self._extract_features(images)
            
            if features is not None:
                all_features.append(features.cpu())
        
        if all_features:
            # Concatenate all features
            all_features = torch.cat(all_features, dim=0)
            print(f"✅ Extracted features shape: {all_features.shape}")
            
            # Perform coreset selection (using random for efficiency)
            self.coreset_idx = self._random_coreset_selection(all_features, self.coreset_ratio)
            
            # Build memory bank from coreset
            flattened_features = all_features.reshape(-1, all_features.shape[-1])
            self.memory_bank = flattened_features[self.coreset_idx].numpy()
            
            print(f"✅ Memory bank created with {self.memory_bank.shape[0]} features")
            
            # Build index for efficient nearest neighbor search
            if self.faiss_available:
                try:
                    self.faiss_index = faiss.IndexFlatL2(self.memory_bank.shape[1])
                    self.faiss_index.add(self.memory_bank.astype(np.float32))
                    print("✅ FAISS index built successfully")
                except Exception as e:
                    print(f"⚠️ FAISS index failed: {e}, using sklearn")
                    self.faiss_available = False
            
            if not self.faiss_available:
                self.nn_model = NearestNeighbors(n_neighbors=1, metric='euclidean', algorithm='auto')
                self.nn_model.fit(self.memory_bank)
                print("✅ Sklearn NearestNeighbors index built")
            
            return True
        else:
            print("❌ No features extracted during training")
            return False
    
    def predict(self, test_loader):
        """Predict anomalies on test data"""
        print("🔍 Predicting with PatchCore...")
        
        predictions = []
        true_labels = []
        
        for batch_idx, (images, labels) in enumerate(test_loader):
            images = images.to(self.device)
            features = self._extract_features(images)
            
            true_labels.extend(labels.numpy())
            
            if features is not None:
                # Calculate anomaly scores
                batch_scores = []
                
                for i in range(features.shape[0]):
                    # Get features for single image
                    img_features = features[i].reshape(-1, features.shape[-1]).cpu().numpy()
                    
                    # Find distances to nearest neighbors
                    if self.faiss_available and self.faiss_index is not None:
                        # Use FAISS
                        distances, _ = self.faiss_index.search(img_features.astype(np.float32), 1)
                        distances = distances.flatten()
                    else:
                        # Use sklearn
                        distances, _ = self.nn_model.kneighbors(img_features)
                        distances = distances[:, 0]  # Take first neighbor distance
                    
                    # Image-level anomaly score is maximum patch distance
                    anomaly_score = np.max(distances)
                    batch_scores.append(anomaly_score)
                
                predictions.extend(batch_scores)
            else:
                predictions.extend([0.5] * len(labels))
        
        return np.array(predictions), np.array(true_labels)

# Step 4: Dataset exploration
def explore_dataset_structure():
    """Thoroughly explore the MVTec dataset structure"""
    print("\n📂 Downloading and exploring MVTec dataset...")
    
    try:
        # Download dataset
        dataset_path = kagglehub.dataset_download("shashankroy568/mvtec-anomaly-detection")
        print(f"✅ Dataset downloaded to: {dataset_path}")
        
        root_path = Path(dataset_path)
        
        # Look for MVTec categories
        mvtec_categories = [
            'bottle', 'cable', 'capsule', 'carpet', 'grid',
            'hazelnut', 'leather', 'metal_nut', 'pill', 'screw',
            'tile', 'toothbrush', 'transistor', 'wood', 'zipper'
        ]
        
        print(f"\n🔍 Searching for MVTec categories...")
        
        # Recursive search for categories
        found_categories = {}
        
        for root, dirs, files in os.walk(root_path):
            for dir_name in dirs:
                if dir_name in mvtec_categories:
                    category_path = Path(root) / dir_name
                    found_categories[dir_name] = category_path
                    print(f"  ✅ Found {dir_name} at: {category_path}")
        
        if found_categories:
            print(f"\n🎯 Found {len(found_categories)} MVTec categories!")
            return root_path, found_categories
        
        else:
            print("❌ No MVTec categories found in expected locations")
            return root_path, {}
    
    except Exception as e:
        print(f"❌ Dataset exploration error: {e}")
        return None, {}

# Step 5: Dataset class
class MVTecManualDataset(Dataset):
    """Manual MVTec dataset"""
    
    def __init__(self, root_path, category, split='train', transform=None):
        self.root_path = Path(root_path)
        self.category = category
        self.split = split
        self.transform = transform or transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.CenterCrop((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                              std=[0.229, 0.224, 0.225])
        ])
        
        self.samples = self._load_samples()
        print(f"📊 {category}: Loaded {len(self.samples)} {split} samples")
    
    def _load_samples(self):
        """Load all samples for the dataset"""
        samples = []
        
        # Find category path
        possible_paths = [
            self.root_path / self.category,
            self.root_path / "mvtec_anomaly_detection" / self.category,
            self.root_path / "MVTec" / self.category,
            self.root_path / "mvtec" / self.category
        ]
        
        category_path = None
        for path in possible_paths:
            if path.exists():
                category_path = path
                break
        
        if not category_path:
            print(f"❌ Category path not found for {self.category}")
            return samples
        
        if self.split == 'train':
            # Training: Only normal samples from train/good
            good_path = category_path / 'train' / 'good'
            if good_path.exists():
                for ext in ['*.png', '*.jpg', '*.jpeg', '*.bmp']:
                    for img_path in good_path.rglob(ext):
                        samples.append((str(img_path), 0))  # Label 0 = Normal
        else:  # test split
            # Test: Both normal and anomaly samples
            test_path = category_path / 'test'
            if test_path.exists():
                # Load normal test samples from test/good
                good_test_path = test_path / 'good'
                if good_test_path.exists():
                    for ext in ['*.png', '*.jpg', '*.jpeg', '*.bmp']:
                        for img_path in good_test_path.rglob(ext):
                            samples.append((str(img_path), 0))  # Label 0 = Normal
                
                # Load anomaly samples from test/defect_type folders
                for defect_dir in test_path.iterdir():
                    if defect_dir.is_dir() and defect_dir.name != 'good':
                        for ext in ['*.png', '*.jpg', '*.jpeg', '*.bmp']:
                            for img_path in defect_dir.rglob(ext):
                                samples.append((str(img_path), 1))  # Label 1 = Anomaly
        
        # Count samples by label
        normal_count = len([s for s in samples if s[1] == 0])
        anomaly_count = len([s for s in samples if s[1] == 1])
        
        print(f"   ✅ {self.category} {self.split}: {normal_count} normal, {anomaly_count} anomaly")
        
        return samples
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        
        try:
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, label
        except Exception as e:
            print(f"⚠️ Error loading {img_path}: {e}")
            # Return dummy data
            dummy_image = torch.zeros((3, 224, 224))
            return dummy_image, label

# Step 6: Single category training function
def train_single_category_patchcore(root_path, category, results_dict):
    """Train PatchCore on a single category"""
    
    print(f"\n" + "="*50)
    print(f"🚀 TRAINING PATCHCORE: {category.upper()}")
    print(f"="*50)
    
    start_time = time.time()
    
    try:
        # Create datasets
        train_dataset = MVTecManualDataset(root_path, category, 'train')
        test_dataset = MVTecManualDataset(root_path, category, 'test')
        
        # Safety checks
        if len(train_dataset) == 0:
            print(f"❌ No training samples found for {category}")
            results_dict[category] = {'status': 'failed', 'reason': 'no_train_data'}
            return
            
        if len(test_dataset) == 0:
            print(f"❌ No test samples found for {category}")
            results_dict[category] = {'status': 'failed', 'reason': 'no_test_data'}
            return
        
        # Check test labels
        test_labels = [test_dataset.samples[i][1] for i in range(len(test_dataset))]
        unique_labels = set(test_labels)
        
        if len(unique_labels) < 2:
            print(f"⚠️ Warning: {category} test set only has {unique_labels} labels")
            results_dict[category] = {'status': 'failed', 'reason': 'insufficient_labels'}
            return
        
        # Create data loaders
        train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2)
        test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=2)
        
        print(f"✅ {category}: Dataset ready - {len(train_dataset)} train, {len(test_dataset)} test")
        
        # Train model - Use manual PatchCore (more reliable)
        print(f"🧠 Training manual PatchCore for {category}...")
        
        # Choose backbone based on available models
        try:
            # Try wide_resnet50_2 first
            model = ManualPatchCore(
                backbone='wide_resnet50_2',
                layers=['layer2', 'layer3'],
                coreset_sampling_ratio=0.1,
                num_neighbors=9
            )
        except Exception as e:
            print(f"⚠️ Wide ResNet failed, using ResNet18: {e}")
            model = ManualPatchCore(
                backbone='resnet18',
                layers=['layer1', 'layer2'],
                coreset_sampling_ratio=0.1,
                num_neighbors=9
            )
        
        training_success = model.fit(train_loader)
        
        if not training_success:
            results_dict[category] = {'status': 'failed', 'reason': 'training_failed'}
            return
        
        # Test model
        predictions, true_labels = model.predict(test_loader)
        
        # Calculate metrics
        if len(predictions) == len(true_labels) and len(predictions) > 0:
            unique_test_labels = set(true_labels)
            
            if len(unique_test_labels) >= 2:
                auc_score = roc_auc_score(true_labels, predictions)
            else:
                results_dict[category] = {'status': 'failed', 'reason': 'insufficient_test_labels'}
                return
        else:
            results_dict[category] = {'status': 'failed', 'reason': 'prediction_mismatch'}
            return
        
        # Calculate additional metrics
        normal_indices = true_labels == 0
        anomaly_indices = true_labels == 1
        
        normal_scores = predictions[normal_indices] if normal_indices.any() else np.array([])
        anomaly_scores = predictions[anomaly_indices] if anomaly_indices.any() else np.array([])
        
        training_time = time.time() - start_time
        
        # Store results
        results_dict[category] = {
            'status': 'success',
            'auc_score': auc_score,
            'train_samples': len(train_dataset),
            'test_samples': len(test_dataset),
            'normal_test_samples': len([l for l in test_labels if l == 0]),
            'anomaly_test_samples': len([l for l in test_labels if l == 1]),
            'normal_mean_score': float(normal_scores.mean()) if len(normal_scores) > 0 else 0,
            'anomaly_mean_score': float(anomaly_scores.mean()) if len(anomaly_scores) > 0 else 0,
            'training_time': training_time,
            'model_type': 'manual_patchcore',
            'backbone': model.backbone_name,
            'faiss_used': model.faiss_available
        }
        
        print(f"✅ {category}: AUC Score = {auc_score:.4f}")
        print(f"📊 {category}: Normal scores = {normal_scores.mean():.2f}, Anomaly scores = {anomaly_scores.mean():.2f}")
        print(f"⏱️  {category}: Training time = {training_time:.1f}s")
        print(f"🔧 {category}: Backbone = {model.backbone_name}, FAISS = {model.faiss_available}")
            
    except Exception as e:
        print(f"❌ {category}: Training failed - {e}")
        import traceback
        traceback.print_exc()
        results_dict[category] = {'status': 'failed', 'reason': str(e)}

# Step 7: Multi-category PatchCore pipeline
def run_multi_category_patchcore_pipeline():
    """Train PatchCore on all target categories"""
    
    print("🚀 Starting Multi-Category PatchCore Pipeline")
    print("=" * 70)
    
    # Explore dataset
    root_path, categories = explore_dataset_structure()
    
    if not root_path or not categories:
        print("❌ Cannot proceed without dataset")
        return
    
    # Target categories for warehouse research
    target_categories = ['bottle', 'metal_nut', 'capsule', 'cable']
    available_targets = [cat for cat in target_categories if cat in categories]
    
    if not available_targets:
        print("❌ No target categories found in dataset")
        return
    
    print(f"\n🎯 Will train PatchCore on {len(available_targets)} categories: {available_targets}")
    
    # Train each category
    results = {}
    total_start_time = time.time()
    
    for i, category in enumerate(available_targets, 1):
        print(f"\n🔄 Progress: {i}/{len(available_targets)} categories")
        train_single_category_patchcore(root_path, category, results)
    
    total_time = time.time() - total_start_time
    
    # Generate comprehensive results summary
    print(f"\n" + "="*70)
    print("📋 MULTI-CATEGORY PATCHCORE RESULTS")
    print("="*70)
    
    successful_trainings = [cat for cat, res in results.items() if res.get('status') == 'success']
    failed_trainings = [cat for cat, res in results.items() if res.get('status') == 'failed']
    
    print(f"✅ Successful: {len(successful_trainings)}/{len(available_targets)} categories")
    print(f"❌ Failed: {len(failed_trainings)}/{len(available_targets)} categories")
    print(f"⏱️  Total time: {total_time:.1f}s")
    print(f"🔧 FAISS available: {FAISS_AVAILABLE}")
    
    # Detailed results table
    if successful_trainings:
        print(f"\n📊 DETAILED PATCHCORE RESULTS:")
        print("-" * 110)
        print(f"{'Category':<12} {'AUC Score':<10} {'Train':<7} {'Test':<6} {'Normal':<8} {'Anomaly':<8} {'Time':<6} {'Backbone':<12} {'FAISS':<6}")
        print("-" * 110)
        
        for category in successful_trainings:
            res = results[category]
            backbone = res.get('backbone', 'unknown')
            faiss_used = res.get('faiss_used', False)
            print(f"{category:<12} {res['auc_score']:<10.4f} {res['train_samples']:<7} {res['test_samples']:<6} "
                  f"{res['normal_test_samples']:<8} {res['anomaly_test_samples']:<8} {res['training_time']:<6.1f}s "
                  f"{backbone:<12} {'Yes' if faiss_used else 'No':<6}")
        
        # Calculate average AUC
        avg_auc = np.mean([results[cat]['auc_score'] for cat in successful_trainings])
        print("-" * 110)
        print(f"{'AVERAGE':<12} {avg_auc:<10.4f}")
        print("-" * 110)
    
    # Failed categories details
    if failed_trainings:
        print(f"\n❌ FAILED CATEGORIES:")
        for category in failed_trainings:
            reason = results[category].get('reason', 'unknown')
            print(f"   {category}: {reason}")
    
    # Research summary
    print(f"\n🎓 PATCHCORE RESEARCH SUMMARY:")
    print(f"   Dataset: MVTec Anomaly Detection")
    print(f"   Model: PatchCore (Manual Implementation)")
    print(f"   Categories: {len(successful_trainings)} warehouse-relevant products")
    print(f"   Performance: {avg_auc:.4f} average AUC score" if successful_trainings else "   Performance: No successful trainings")
    print(f"   Device: {torch.device('cuda' if torch.cuda.is_available() else 'cpu')}")
    print(f"   FAISS: {'Available' if FAISS_AVAILABLE else 'Not available (using sklearn)'}")
    
    # Export results to CSV for research paper
    if successful_trainings:
        results_df = pd.DataFrame([
            {
                'category': category,
                'model': 'PatchCore',
                'auc_score': results[category]['auc_score'],
                'train_samples': results[category]['train_samples'],
                'test_samples': results[category]['test_samples'],
                'normal_test_samples': results[category]['normal_test_samples'],
                'anomaly_test_samples': results[category]['anomaly_test_samples'],
                'normal_mean_score': results[category]['normal_mean_score'],
                'anomaly_mean_score': results[category]['anomaly_mean_score'],
                'training_time': results[category]['training_time'],
                'backbone': results[category].get('backbone', 'unknown'),
                'faiss_used': results[category].get('faiss_used', False)
            }
            for category in successful_trainings
        ])
        
        results_df.to_csv('mvtec_patchcore_results.csv', index=False)
        print(f"\n💾 PatchCore results exported to: mvtec_patchcore_results.csv")
    
    print(f"\n🎉 Multi-category PatchCore training pipeline completed!")
    return results

# Execute the multi-category PatchCore pipeline
if __name__ == "__main__":
    results = run_multi_category_patchcore_pipeline()


🔧 Starting PatchCore environment setup...
📦 Installing dependencies...
Installing python-dotenv...
✅ python-dotenv installed successfully
✅ opencv-python already available
✅ Pillow already available
✅ scikit-learn already available

🔧 Attempting FAISS installation (optional for performance)...
Installing faiss-gpu...
ERROR: No matching distribution found for faiss-gpu

Installing faiss-cpu...
✅ faiss-cpu installed successfully
✅ FAISS successfully imported!

🔧 Installing anomalib...
Trying: pip install anomalib --quiet --no-deps --upgrade
Installing anomalib...
✅ anomalib installed successfully
✅ Anomalib successfully imported!

📚 Importing libraries...
✅ FAISS imported successfully
✅ All required libraries imported
🔍 Detecting anomalib configuration...
⚠️ No anomalib PatchCore API detected - will use manual implementation
🚀 Starting Multi-Category PatchCore Pipeline

📂 Downloading and exploring MVTec dataset...
✅ Dataset downloaded to: /kaggle/input/mvtec-anomaly-detection

🔍 Searchin

Downloading: "https://download.pytorch.org/models/wide_resnet50_2-95faca4d.pth" to /root/.cache/torch/hub/checkpoints/wide_resnet50_2-95faca4d.pth
100%|██████████| 132M/132M [00:01<00:00, 130MB/s]


   ✅ Registered hook for layer: layer2
   ✅ Registered hook for layer: layer3
🔄 Training PatchCore...
Processing batch 0...
Processing batch 10...
Processing batch 20...
✅ Extracted features shape: torch.Size([209, 784, 1536])
🔄 Performing random coreset selection (ratio: 0.1)...
   - Total features: 163856
   - Selecting: 16385 features
   ✅ Selected 16385 coreset features
✅ Memory bank created with 16385 features
✅ FAISS index built successfully
🔍 Predicting with PatchCore...
✅ bottle: AUC Score = 0.9921
📊 bottle: Normal scores = 17.25, Anomaly scores = 40.62
⏱️  bottle: Training time = 23.7s
🔧 bottle: Backbone = wide_resnet50_2, FAISS = True

🔄 Progress: 2/4 categories

🚀 TRAINING PATCHCORE: METAL_NUT
   ✅ metal_nut train: 220 normal, 0 anomaly
📊 metal_nut: Loaded 220 train samples
   ✅ metal_nut test: 22 normal, 93 anomaly
📊 metal_nut: Loaded 115 test samples
✅ metal_nut: Dataset ready - 220 train, 115 test
🧠 Training manual PatchCore for metal_nut...
🧠 Manual PatchCore initialized

In [3]:
# MVTec Multi-Category PatchCore Anomaly Detection - FIXED VARIABLE SCOPE + PRECISION & RECALL
# Designed for Kaggle GPU P100 environment
import os
import sys
import warnings
import subprocess
warnings.filterwarnings('ignore')

print("🔧 Starting PatchCore environment setup...")

def install_package(package_name, import_name=None, extra_args=""):
    """Install package with proper error handling"""
    try:
        # Try importing first
        if import_name:
            __import__(import_name)
            print(f"✅ {package_name} already available")
            return True
        
        print(f"Installing {package_name}...")
        cmd = f"pip install {package_name} {extra_args}"
        result = subprocess.run(cmd.split(), capture_output=True, text=True)
        
        if result.returncode == 0:
            print(f"✅ {package_name} installed successfully")
            return True
        else:
            print(f"⚠️ Warning installing {package_name}: {result.stderr}")
            return False
            
    except Exception as e:
        print(f"❌ Error with {package_name}: {e}")
        return False

# Step 1: Install dependencies with better error handling
print("📦 Installing dependencies...")

# Core dependencies first
install_package("python-dotenv", None, "--quiet")
install_package("opencv-python", "cv2", "--quiet")
install_package("Pillow", "PIL", "--quiet --upgrade")
install_package("scikit-learn", "sklearn", "--quiet")

# Try FAISS installation (optional)
faiss_available = False
print("\n🔧 Attempting FAISS installation (optional for performance)...")
faiss_strategies = [
    ("faiss-gpu", "--quiet"),
    ("faiss-cpu", "--quiet"),
    ("faiss", "--quiet")
]

for package, args in faiss_strategies:
    if install_package(package, None, args):
        try:
            import faiss
            faiss_available = True
            print(f"✅ FAISS successfully imported!")
            break
        except ImportError:
            continue

if not faiss_available:
    print("⚠️ FAISS not available - will use sklearn for nearest neighbors")

# Try anomalib installation
print("\n🔧 Installing anomalib...")
anomalib_installed = False
strategies = [
    ("anomalib", "--quiet --no-deps --upgrade"),
    ("anomalib", "--quiet --force-reinstall"),
    ("anomalib==1.0.1", "--quiet")
]

for package, args in strategies:
    print(f"Trying: pip install {package} {args}")
    if install_package(package, None, args):
        try:
            import anomalib
            print(f"✅ Anomalib successfully imported!")
            anomalib_installed = True
            break
        except Exception as e:
            print(f"⚠️ Installation succeeded but import failed: {e}")
            continue

# Step 2: Import required libraries with graceful FAISS handling
print("\n📚 Importing libraries...")

# Global FAISS availability flag
FAISS_AVAILABLE = False

try:
    import kagglehub
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    from pathlib import Path
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    from PIL import Image
    import cv2
    from glob import glob
    import json
    from sklearn.metrics import roc_auc_score, roc_curve, precision_score, recall_score, f1_score, confusion_matrix, precision_recall_curve
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.neighbors import NearestNeighbors
    import torchvision.transforms as transforms
    import torchvision.models as models
    from torch.utils.data import Dataset, DataLoader
    import time
    from scipy import spatial
    
    # Try importing FAISS (optional)
    try:
        import faiss
        FAISS_AVAILABLE = True
        print("✅ FAISS imported successfully")
    except ImportError:
        FAISS_AVAILABLE = False
        print("⚠️ FAISS not available, using sklearn fallback")
    
    print("✅ All required libraries imported")
    
except ImportError as e:
    print(f"❌ Required library import error: {e}")
    print("Please install missing dependencies")
    FAISS_AVAILABLE = False

# Try anomalib imports
ANOMALIB_VERSION = None
anomalib_components = {}

print("🔍 Detecting anomalib configuration...")

try:
    from anomalib import TaskType
    from anomalib.data.image.mvtec import MVTecDataModule
    from anomalib.models.image.patchcore import Patchcore
    from anomalib.engine import Engine
    
    ANOMALIB_VERSION = "v1.0+"
    anomalib_components = {
        'datamodule_class': MVTecDataModule,
        'model_class': Patchcore,
        'engine_class': Engine
    }
    print("✅ Anomalib v1.0+ PatchCore API detected")
    
except ImportError:
    try:
        from anomalib.data import MVTec
        from anomalib.models.patchcore import Patchcore
        
        ANOMALIB_VERSION = "v0.7+"
        anomalib_components = {
            'datamodule_class': MVTec,
            'model_class': Patchcore
        }
        print("✅ Anomalib v0.7+ PatchCore API detected")
        
    except ImportError:
        print("⚠️ No anomalib PatchCore API detected - will use manual implementation")
        ANOMALIB_VERSION = "manual"

# NEW: Helper function to calculate optimal threshold
def calculate_optimal_threshold(y_true, y_scores):
    """Calculate optimal threshold using Youden's J statistic"""
    fpr, tpr, thresholds = roc_curve(y_true, y_scores)
    j_scores = tpr - fpr  # Youden's J statistic
    best_threshold_idx = np.argmax(j_scores)
    best_threshold = thresholds[best_threshold_idx]
    return best_threshold

# NEW: Helper function to calculate precision, recall, F1
def calculate_precision_recall_f1(y_true, y_scores, threshold=None):
    """Calculate precision, recall, and F1 score"""
    if threshold is None:
        # Use optimal threshold based on Youden's J statistic
        threshold = calculate_optimal_threshold(y_true, y_scores)
    
    # Convert scores to binary predictions
    y_pred = (y_scores >= threshold).astype(int)
    
    # Calculate metrics
    precision = precision_score(y_true, y_pred, zero_division=0)
    recall = recall_score(y_true, y_pred, zero_division=0)
    f1 = f1_score(y_true, y_pred, zero_division=0)
    
    # Additional metrics from confusion matrix
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0
    accuracy = (tp + tn) / (tp + tn + fp + fn) if (tp + tn + fp + fn) > 0 else 0
    
    return {
        'threshold': threshold,
        'precision': precision,
        'recall': recall,
        'f1_score': f1,
        'specificity': specificity,
        'accuracy': accuracy,
        'true_positives': int(tp),
        'true_negatives': int(tn),
        'false_positives': int(fp),
        'false_negatives': int(fn)
    }

# Step 3: Manual PatchCore implementation with FIXED variable scope
class ManualPatchCore:
    """Manual PatchCore implementation with sklearn fallback"""
    
    def __init__(self, backbone='wide_resnet50_2', layers=['layer2', 'layer3'], 
                 coreset_sampling_ratio=0.1, num_neighbors=9):
        self.backbone_name = backbone
        self.layer_names = layers
        self.coreset_ratio = coreset_sampling_ratio
        self.num_neighbors = num_neighbors
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        
        # Store FAISS availability as instance variable
        self.faiss_available = FAISS_AVAILABLE
        
        print(f"🧠 Manual PatchCore initialized:")
        print(f"   - Backbone: {backbone}")
        print(f"   - Layers: {layers}")
        print(f"   - Coreset ratio: {coreset_sampling_ratio}")
        print(f"   - FAISS available: {self.faiss_available}")
        print(f"   - Device: {self.device}")
        
        # Load pretrained backbone
        self.backbone = self._load_backbone()
        self.feature_extractor = {}
        self._register_hooks()
        
        # Memory bank for storing features
        self.memory_bank = []
        self.coreset_idx = None
        self.faiss_index = None
        self.nn_model = None
        
    def _load_backbone(self):
        """Load pretrained backbone"""
        try:
            if self.backbone_name == 'wide_resnet50_2':
                backbone = models.wide_resnet50_2(pretrained=True)
            elif self.backbone_name == 'resnet50':
                backbone = models.resnet50(pretrained=True)
            elif self.backbone_name == 'resnet18':
                backbone = models.resnet18(pretrained=True)
            else:
                print(f"⚠️ Unknown backbone {self.backbone_name}, using ResNet18")
                backbone = models.resnet18(pretrained=True)
        except Exception as e:
            print(f"⚠️ Failed to load {self.backbone_name}, using ResNet18: {e}")
            backbone = models.resnet18(pretrained=True)
        
        backbone.eval()
        backbone.to(self.device)
        return backbone
        
    def _register_hooks(self):
        """Register hooks for feature extraction"""
        def hook_fn(name):
            def hook(module, input, output):
                self.feature_extractor[name] = output
            return hook
        
        # Register hooks for specified layers
        hooks_registered = 0
        for name, module in self.backbone.named_modules():
            if name in self.layer_names:
                module.register_forward_hook(hook_fn(name))
                print(f"   ✅ Registered hook for layer: {name}")
                hooks_registered += 1
        
        if hooks_registered == 0:
            print("⚠️ No hooks registered, falling back to default layers")
            # Fallback to common layer names
            fallback_layers = ['layer1', 'layer2', 'layer3']
            self.layer_names = []
            for name, module in self.backbone.named_modules():
                if name in fallback_layers:
                    self.layer_names.append(name)
                    module.register_forward_hook(hook_fn(name))
                    print(f"   ✅ Registered fallback hook for layer: {name}")
    
    def _extract_features(self, images):
        """Extract patch features from images"""
        self.feature_extractor.clear()
        
        with torch.no_grad():
            _ = self.backbone(images)
        
        # Aggregate features from multiple layers
        features = []
        for layer_name in self.layer_names:
            if layer_name in self.feature_extractor:
                feat = self.feature_extractor[layer_name]
                # Adaptive average pooling to make features spatial
                feat = F.adaptive_avg_pool2d(feat, (28, 28))
                # Reshape to patches: (batch, channels, h, w) -> (batch, h*w, channels)
                b, c, h, w = feat.shape
                feat = feat.permute(0, 2, 3, 1).reshape(b, h*w, c)
                features.append(feat)
        
        if features:
            # Concatenate features from all layers
            concatenated_features = torch.cat(features, dim=2)  # Concat along channel dimension
            return concatenated_features
        else:
            print("⚠️ No features extracted")
            return None
    
    def _random_coreset_selection(self, features, ratio):
        """Random coreset selection (faster than greedy)"""
        print(f"🔄 Performing random coreset selection (ratio: {ratio})...")
        
        # Flatten all features
        all_features = features.reshape(-1, features.shape[-1])
        n_samples = all_features.shape[0]
        n_select = max(1, int(n_samples * ratio))
        
        print(f"   - Total features: {n_samples}")
        print(f"   - Selecting: {n_select} features")
        
        # Random selection for efficiency
        selected_indices = np.random.choice(n_samples, n_select, replace=False)
        
        print(f"   ✅ Selected {len(selected_indices)} coreset features")
        return torch.tensor(selected_indices)
    
    def fit(self, train_loader):
        """Fit PatchCore on training data"""
        print("🔄 Training PatchCore...")
        
        all_features = []
        
        for batch_idx, (images, _) in enumerate(train_loader):
            if batch_idx % 10 == 0:
                print(f"Processing batch {batch_idx}...")
            
            images = images.to(self.device)
            features = self._extract_features(images)
            
            if features is not None:
                all_features.append(features.cpu())
        
        if all_features:
            # Concatenate all features
            all_features = torch.cat(all_features, dim=0)
            print(f"✅ Extracted features shape: {all_features.shape}")
            
            # Perform coreset selection (using random for efficiency)
            self.coreset_idx = self._random_coreset_selection(all_features, self.coreset_ratio)
            
            # Build memory bank from coreset
            flattened_features = all_features.reshape(-1, all_features.shape[-1])
            self.memory_bank = flattened_features[self.coreset_idx].numpy()
            
            print(f"✅ Memory bank created with {self.memory_bank.shape[0]} features")
            
            # Build index for efficient nearest neighbor search
            if self.faiss_available:
                try:
                    self.faiss_index = faiss.IndexFlatL2(self.memory_bank.shape[1])
                    self.faiss_index.add(self.memory_bank.astype(np.float32))
                    print("✅ FAISS index built successfully")
                except Exception as e:
                    print(f"⚠️ FAISS index failed: {e}, using sklearn")
                    self.faiss_available = False
            
            if not self.faiss_available:
                self.nn_model = NearestNeighbors(n_neighbors=1, metric='euclidean', algorithm='auto')
                self.nn_model.fit(self.memory_bank)
                print("✅ Sklearn NearestNeighbors index built")
            
            return True
        else:
            print("❌ No features extracted during training")
            return False
    
    def predict(self, test_loader):
        """Predict anomalies on test data"""
        print("🔍 Predicting with PatchCore...")
        
        predictions = []
        true_labels = []
        
        for batch_idx, (images, labels) in enumerate(test_loader):
            images = images.to(self.device)
            features = self._extract_features(images)
            
            true_labels.extend(labels.numpy())
            
            if features is not None:
                # Calculate anomaly scores
                batch_scores = []
                
                for i in range(features.shape[0]):
                    # Get features for single image
                    img_features = features[i].reshape(-1, features.shape[-1]).cpu().numpy()
                    
                    # Find distances to nearest neighbors
                    if self.faiss_available and self.faiss_index is not None:
                        # Use FAISS
                        distances, _ = self.faiss_index.search(img_features.astype(np.float32), 1)
                        distances = distances.flatten()
                    else:
                        # Use sklearn
                        distances, _ = self.nn_model.kneighbors(img_features)
                        distances = distances[:, 0]  # Take first neighbor distance
                    
                    # Image-level anomaly score is maximum patch distance
                    anomaly_score = np.max(distances)
                    batch_scores.append(anomaly_score)
                
                predictions.extend(batch_scores)
            else:
                predictions.extend([0.5] * len(labels))
        
        return np.array(predictions), np.array(true_labels)

# Step 4: Dataset exploration
def explore_dataset_structure():
    """Thoroughly explore the MVTec dataset structure"""
    print("\n📂 Downloading and exploring MVTec dataset...")
    
    try:
        # Download dataset
        dataset_path = kagglehub.dataset_download("shashankroy568/mvtec-anomaly-detection")
        print(f"✅ Dataset downloaded to: {dataset_path}")
        
        root_path = Path(dataset_path)
        
        # Look for MVTec categories
        mvtec_categories = [
            'bottle', 'cable', 'capsule', 'carpet', 'grid',
            'hazelnut', 'leather', 'metal_nut', 'pill', 'screw',
            'tile', 'toothbrush', 'transistor', 'wood', 'zipper'
        ]
        
        print(f"\n🔍 Searching for MVTec categories...")
        
        # Recursive search for categories
        found_categories = {}
        
        for root, dirs, files in os.walk(root_path):
            for dir_name in dirs:
                if dir_name in mvtec_categories:
                    category_path = Path(root) / dir_name
                    found_categories[dir_name] = category_path
                    print(f"  ✅ Found {dir_name} at: {category_path}")
        
        if found_categories:
            print(f"\n🎯 Found {len(found_categories)} MVTec categories!")
            return root_path, found_categories
        
        else:
            print("❌ No MVTec categories found in expected locations")
            return root_path, {}
    
    except Exception as e:
        print(f"❌ Dataset exploration error: {e}")
        return None, {}

# Step 5: Dataset class
class MVTecManualDataset(Dataset):
    """Manual MVTec dataset"""
    
    def __init__(self, root_path, category, split='train', transform=None):
        self.root_path = Path(root_path)
        self.category = category
        self.split = split
        self.transform = transform or transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.CenterCrop((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                              std=[0.229, 0.224, 0.225])
        ])
        
        self.samples = self._load_samples()
        print(f"📊 {category}: Loaded {len(self.samples)} {split} samples")
    
    def _load_samples(self):
        """Load all samples for the dataset"""
        samples = []
        
        # Find category path
        possible_paths = [
            self.root_path / self.category,
            self.root_path / "mvtec_anomaly_detection" / self.category,
            self.root_path / "MVTec" / self.category,
            self.root_path / "mvtec" / self.category
        ]
        
        category_path = None
        for path in possible_paths:
            if path.exists():
                category_path = path
                break
        
        if not category_path:
            print(f"❌ Category path not found for {self.category}")
            return samples
        
        if self.split == 'train':
            # Training: Only normal samples from train/good
            good_path = category_path / 'train' / 'good'
            if good_path.exists():
                for ext in ['*.png', '*.jpg', '*.jpeg', '*.bmp']:
                    for img_path in good_path.rglob(ext):
                        samples.append((str(img_path), 0))  # Label 0 = Normal
        else:  # test split
            # Test: Both normal and anomaly samples
            test_path = category_path / 'test'
            if test_path.exists():
                # Load normal test samples from test/good
                good_test_path = test_path / 'good'
                if good_test_path.exists():
                    for ext in ['*.png', '*.jpg', '*.jpeg', '*.bmp']:
                        for img_path in good_test_path.rglob(ext):
                            samples.append((str(img_path), 0))  # Label 0 = Normal
                
                # Load anomaly samples from test/defect_type folders
                for defect_dir in test_path.iterdir():
                    if defect_dir.is_dir() and defect_dir.name != 'good':
                        for ext in ['*.png', '*.jpg', '*.jpeg', '*.bmp']:
                            for img_path in defect_dir.rglob(ext):
                                samples.append((str(img_path), 1))  # Label 1 = Anomaly
        
        # Count samples by label
        normal_count = len([s for s in samples if s[1] == 0])
        anomaly_count = len([s for s in samples if s[1] == 1])
        
        print(f"   ✅ {self.category} {self.split}: {normal_count} normal, {anomaly_count} anomaly")
        
        return samples
    
    def __len__(self):
        return len(self.samples)
    
    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        
        try:
            image = Image.open(img_path).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image, label
        except Exception as e:
            print(f"⚠️ Error loading {img_path}: {e}")
            # Return dummy data
            dummy_image = torch.zeros((3, 224, 224))
            return dummy_image, label

# Step 6: Single category training function with ENHANCED METRICS
def train_single_category_patchcore(root_path, category, results_dict):
    """Train PatchCore on a single category"""
    
    print(f"\n" + "="*50)
    print(f"🚀 TRAINING PATCHCORE: {category.upper()}")
    print(f"="*50)
    
    start_time = time.time()
    
    try:
        # Create datasets
        train_dataset = MVTecManualDataset(root_path, category, 'train')
        test_dataset = MVTecManualDataset(root_path, category, 'test')
        
        # Safety checks
        if len(train_dataset) == 0:
            print(f"❌ No training samples found for {category}")
            results_dict[category] = {'status': 'failed', 'reason': 'no_train_data'}
            return
            
        if len(test_dataset) == 0:
            print(f"❌ No test samples found for {category}")
            results_dict[category] = {'status': 'failed', 'reason': 'no_test_data'}
            return
        
        # Check test labels
        test_labels = [test_dataset.samples[i][1] for i in range(len(test_dataset))]
        unique_labels = set(test_labels)
        
        if len(unique_labels) < 2:
            print(f"⚠️ Warning: {category} test set only has {unique_labels} labels")
            results_dict[category] = {'status': 'failed', 'reason': 'insufficient_labels'}
            return
        
        # Create data loaders
        train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, num_workers=2)
        test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, num_workers=2)
        
        print(f"✅ {category}: Dataset ready - {len(train_dataset)} train, {len(test_dataset)} test")
        
        # Train model - Use manual PatchCore (more reliable)
        print(f"🧠 Training manual PatchCore for {category}...")
        
        # Choose backbone based on available models
        try:
            # Try wide_resnet50_2 first
            model = ManualPatchCore(
                backbone='wide_resnet50_2',
                layers=['layer2', 'layer3'],
                coreset_sampling_ratio=0.1,
                num_neighbors=9
            )
        except Exception as e:
            print(f"⚠️ Wide ResNet failed, using ResNet18: {e}")
            model = ManualPatchCore(
                backbone='resnet18',
                layers=['layer1', 'layer2'],
                coreset_sampling_ratio=0.1,
                num_neighbors=9
            )
        
        training_success = model.fit(train_loader)
        
        if not training_success:
            results_dict[category] = {'status': 'failed', 'reason': 'training_failed'}
            return
        
        # Test model
        predictions, true_labels = model.predict(test_loader)
        
        # Calculate metrics
        if len(predictions) == len(true_labels) and len(predictions) > 0:
            unique_test_labels = set(true_labels)
            
            if len(unique_test_labels) >= 2:
                auc_score = roc_auc_score(true_labels, predictions)
                # NEW: Calculate precision, recall, F1
                prec_recall_metrics = calculate_precision_recall_f1(true_labels, predictions)
            else:
                results_dict[category] = {'status': 'failed', 'reason': 'insufficient_test_labels'}
                return
        else:
            results_dict[category] = {'status': 'failed', 'reason': 'prediction_mismatch'}
            return
        
        # Calculate additional metrics
        normal_indices = true_labels == 0
        anomaly_indices = true_labels == 1
        
        normal_scores = predictions[normal_indices] if normal_indices.any() else np.array([])
        anomaly_scores = predictions[anomaly_indices] if anomaly_indices.any() else np.array([])
        
        training_time = time.time() - start_time
        
        # Store results with ENHANCED METRICS
        results_dict[category] = {
            'status': 'success',
            'auc_score': auc_score,
            'precision': prec_recall_metrics['precision'],  # NEW
            'recall': prec_recall_metrics['recall'],        # NEW
            'f1_score': prec_recall_metrics['f1_score'],    # NEW
            'specificity': prec_recall_metrics['specificity'], # NEW
            'accuracy': prec_recall_metrics['accuracy'],    # NEW
            'threshold': prec_recall_metrics['threshold'],  # NEW
            'true_positives': prec_recall_metrics['true_positives'],   # NEW
            'true_negatives': prec_recall_metrics['true_negatives'],   # NEW
            'false_positives': prec_recall_metrics['false_positives'], # NEW
            'false_negatives': prec_recall_metrics['false_negatives'], # NEW
            'train_samples': len(train_dataset),
            'test_samples': len(test_dataset),
            'normal_test_samples': len([l for l in test_labels if l == 0]),
            'anomaly_test_samples': len([l for l in test_labels if l == 1]),
            'normal_mean_score': float(normal_scores.mean()) if len(normal_scores) > 0 else 0,
            'anomaly_mean_score': float(anomaly_scores.mean()) if len(anomaly_scores) > 0 else 0,
            'training_time': training_time,
            'model_type': 'manual_patchcore',
            'backbone': model.backbone_name,
            'faiss_used': model.faiss_available
        }
        
        print(f"✅ {category}: AUC Score = {auc_score:.4f}")
        print(f"📊 {category}: Precision = {prec_recall_metrics['precision']:.4f}, Recall = {prec_recall_metrics['recall']:.4f}, F1 = {prec_recall_metrics['f1_score']:.4f}")
        print(f"📊 {category}: Normal scores = {normal_scores.mean():.2f}, Anomaly scores = {anomaly_scores.mean():.2f}")
        print(f"⏱️  {category}: Training time = {training_time:.1f}s")
        print(f"🔧 {category}: Backbone = {model.backbone_name}, FAISS = {model.faiss_available}")
            
    except Exception as e:
        print(f"❌ {category}: Training failed - {e}")
        import traceback
        traceback.print_exc()
        results_dict[category] = {'status': 'failed', 'reason': str(e)}

# Step 7: Multi-category PatchCore pipeline with ENHANCED REPORTING
def run_multi_category_patchcore_pipeline():
    """Train PatchCore on all target categories"""
    
    print("🚀 Starting Multi-Category PatchCore Pipeline")
    print("=" * 70)
    
    # Explore dataset
    root_path, categories = explore_dataset_structure()
    
    if not root_path or not categories:
        print("❌ Cannot proceed without dataset")
        return
    
    # Target categories for warehouse research
    target_categories = ['bottle', 'metal_nut', 'capsule', 'cable', 'screw', 'pill', 'transistor', 'hazelnut']
    available_targets = [cat for cat in target_categories if cat in categories]
    
    if not available_targets:
        print("❌ No target categories found in dataset")
        return
    
    print(f"\n🎯 Will train PatchCore on {len(available_targets)} categories: {available_targets}")
    
    # Train each category
    results = {}
    total_start_time = time.time()
    
    for i, category in enumerate(available_targets, 1):
        print(f"\n🔄 Progress: {i}/{len(available_targets)} categories")
        train_single_category_patchcore(root_path, category, results)
    
    total_time = time.time() - total_start_time
    
    # Generate comprehensive results summary
    print(f"\n" + "="*70)
    print("📋 MULTI-CATEGORY PATCHCORE RESULTS WITH PRECISION & RECALL")
    print("="*70)
    
    successful_trainings = [cat for cat, res in results.items() if res.get('status') == 'success']
    failed_trainings = [cat for cat, res in results.items() if res.get('status') == 'failed']
    
    print(f"✅ Successful: {len(successful_trainings)}/{len(available_targets)} categories")
    print(f"❌ Failed: {len(failed_trainings)}/{len(available_targets)} categories")
    print(f"⏱️  Total time: {total_time:.1f}s")
    print(f"🔧 FAISS available: {FAISS_AVAILABLE}")
    
    # Enhanced detailed results table with precision & recall
    if successful_trainings:
        print(f"\n📊 DETAILED PATCHCORE RESULTS WITH PRECISION & RECALL:")
        print("-" * 140)
        print(f"{'Category':<12} {'AUC':<8} {'Precision':<9} {'Recall':<8} {'F1':<8} {'Acc':<8} {'Train':<6} {'Test':<5} {'Time':<6} {'Backbone':<12}")
        print("-" * 140)
        
        for category in successful_trainings:
            res = results[category]
            backbone = res.get('backbone', 'unknown')[:11]  # Truncate for display
            print(f"{category:<12} {res['auc_score']:<8.4f} {res['precision']:<9.4f} {res['recall']:<8.4f} "
                  f"{res['f1_score']:<8.4f} {res['accuracy']:<8.4f} {res['train_samples']:<6} {res['test_samples']:<5} "
                  f"{res['training_time']:<6.1f}s {backbone:<12}")
        
        # Calculate averages
        avg_auc = np.mean([results[cat]['auc_score'] for cat in successful_trainings])
        avg_precision = np.mean([results[cat]['precision'] for cat in successful_trainings])
        avg_recall = np.mean([results[cat]['recall'] for cat in successful_trainings])
        avg_f1 = np.mean([results[cat]['f1_score'] for cat in successful_trainings])
        avg_accuracy = np.mean([results[cat]['accuracy'] for cat in successful_trainings])
        
        print("-" * 140)
        print(f"{'AVERAGE':<12} {avg_auc:<8.4f} {avg_precision:<9.4f} {avg_recall:<8.4f} "
              f"{avg_f1:<8.4f} {avg_accuracy:<8.4f}")
        print("-" * 140)
    
    # Failed categories details
    if failed_trainings:
        print(f"\n❌ FAILED CATEGORIES:")
        for category in failed_trainings:
            reason = results[category].get('reason', 'unknown')
            print(f"   {category}: {reason}")
    
    # Research summary
    print(f"\n🎓 PATCHCORE RESEARCH SUMMARY:")
    print(f"   Dataset: MVTec Anomaly Detection")
    print(f"   Model: PatchCore (Manual Implementation)")
    print(f"   Categories: {len(successful_trainings)} warehouse-relevant products")
    if successful_trainings:
        print(f"   Performance:")
        print(f"     - Average AUC: {avg_auc:.4f}")
        print(f"     - Average Precision: {avg_precision:.4f}")
        print(f"     - Average Recall: {avg_recall:.4f}")
        print(f"     - Average F1-Score: {avg_f1:.4f}")
        print(f"     - Average Accuracy: {avg_accuracy:.4f}")
    else:
        print(f"   Performance: No successful trainings")
    print(f"   Device: {torch.device('cuda' if torch.cuda.is_available() else 'cpu')}")
    print(f"   FAISS: {'Available' if FAISS_AVAILABLE else 'Not available (using sklearn)'}")
    
    # Export results to CSV for research paper with enhanced metrics
    if successful_trainings:
        results_df = pd.DataFrame([
            {
                'category': category,
                'model': 'PatchCore',
                'auc_score': results[category]['auc_score'],
                'precision': results[category]['precision'],
                'recall': results[category]['recall'],
                'f1_score': results[category]['f1_score'],
                'specificity': results[category]['specificity'],
                'accuracy': results[category]['accuracy'],
                'threshold': results[category]['threshold'],
                'true_positives': results[category]['true_positives'],
                'true_negatives': results[category]['true_negatives'],
                'false_positives': results[category]['false_positives'],
                'false_negatives': results[category]['false_negatives'],
                'train_samples': results[category]['train_samples'],
                'test_samples': results[category]['test_samples'],
                'normal_test_samples': results[category]['normal_test_samples'],
                'anomaly_test_samples': results[category]['anomaly_test_samples'],
                'normal_mean_score': results[category]['normal_mean_score'],
                'anomaly_mean_score': results[category]['anomaly_mean_score'],
                'training_time': results[category]['training_time'],
                'backbone': results[category].get('backbone', 'unknown'),
                'faiss_used': results[category].get('faiss_used', False)
            }
            for category in successful_trainings
        ])
        
        results_df.to_csv('mvtec_patchcore_results_with_precision_recall.csv', index=False)
        print(f"\n💾 Enhanced PatchCore results exported to: mvtec_patchcore_results_with_precision_recall.csv")
        
        # Show confusion matrix summary
        print(f"\n🔍 CONFUSION MATRIX SUMMARY:")
        print("-" * 80)
        print(f"{'Category':<12} {'TP':<5} {'TN':<5} {'FP':<5} {'FN':<5} {'Threshold':<10}")
        print("-" * 80)
        for category in successful_trainings:
            res = results[category]
            print(f"{category:<12} {res['true_positives']:<5} {res['true_negatives']:<5} "
                  f"{res['false_positives']:<5} {res['false_negatives']:<5} {res['threshold']:<10.4f}")
        print("-" * 80)
    
    print(f"\n🎉 Multi-category PatchCore training pipeline with Precision & Recall completed!")
    return results

# Execute the multi-category PatchCore pipeline
if __name__ == "__main__":
    results = run_multi_category_patchcore_pipeline()

🔧 Starting PatchCore environment setup...
📦 Installing dependencies...
Installing python-dotenv...
✅ python-dotenv installed successfully
✅ opencv-python already available
✅ Pillow already available
✅ scikit-learn already available

🔧 Attempting FAISS installation (optional for performance)...
Installing faiss-gpu...
ERROR: No matching distribution found for faiss-gpu

Installing faiss-cpu...
✅ faiss-cpu installed successfully
✅ FAISS successfully imported!

🔧 Installing anomalib...
Trying: pip install anomalib --quiet --no-deps --upgrade
Installing anomalib...
✅ anomalib installed successfully
✅ Anomalib successfully imported!

📚 Importing libraries...
✅ FAISS imported successfully
✅ All required libraries imported
🔍 Detecting anomalib configuration...
⚠️ No anomalib PatchCore API detected - will use manual implementation
🚀 Starting Multi-Category PatchCore Pipeline

📂 Downloading and exploring MVTec dataset...
✅ Dataset downloaded to: /kaggle/input/mvtec-anomaly-detection

🔍 Searchin