In [1]:
import os
import numpy as np
import cv2
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import json
import time
import warnings
warnings.filterwarnings('ignore')

# PyTorch CUDA support
try:
    import torch
    if torch.cuda.is_available():
        GPU_AVAILABLE = True
        device = torch.device('cuda:0')
        print("=" * 70)
        print("GPU INITIALIZATION")
        print("=" * 70)
        print(f"GPU Device: {torch.cuda.get_device_name(0)}")
        print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
        print(f"CUDA Version: {torch.version.cuda}")
        print("CUDA enabled - All processing will run on GPU!")
        print("=" * 70)
    else:
        GPU_AVAILABLE = False
        device = torch.device('cpu')
        print("WARNING: CUDA not available. Using CPU.")
except ImportError:
    GPU_AVAILABLE = False
    device = torch.device('cpu')
    print("ERROR: PyTorch not available. Please install with: pip install torch torchvision")
    exit(1)

# ============================================================================
# 1. DATASET PREPARATION AND LOADING
# ============================================================================

class DatasetLoader:
    """Handles dataset loading with GPU transfer"""
    
    def __init__(self, dataset_path):
        self.dataset_path = dataset_path
        self.categories = ['Normal', 'Pneumonia_bacterial', 'Pneumonia_viral']
        self.images = []
        self.labels = []
        self.label_map = {'Normal': 0, 'Pneumonia_bacterial': 1, 'Pneumonia_viral': 2}
        
    def check_image_quality(self, img_path):
        """Check if image is corrupted or low quality"""
        try:
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                return False, "Corrupted"
            
            if img.shape[0] < 64 or img.shape[1] < 64:
                return False, "Low resolution"
            
            mean_intensity = np.mean(img)
            if mean_intensity < 10 or mean_intensity > 245:
                return False, "Poor contrast"
            
            return True, "OK"
        except Exception as e:
            return False, str(e)
    
    def load_dataset(self, target_size=(256, 256)):
        """Load and preprocess dataset, then transfer to GPU"""
        print("=" * 70)
        print("DATASET LOADING AND QUALITY CHECK")
        print("=" * 70)
        
        stats = {cat: {'total': 0, 'loaded': 0, 'rejected': 0} for cat in self.categories}
        
        for category in self.categories:
            cat_path = os.path.join(self.dataset_path, category)
            if not os.path.exists(cat_path):
                print(f"Warning: {category} folder not found!")
                continue
            
            files = [f for f in os.listdir(cat_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
            stats[category]['total'] = len(files)
            
            print(f"\nProcessing {category}...")
            for filename in files:
                img_path = os.path.join(cat_path, filename)
                is_valid, reason = self.check_image_quality(img_path)
                
                if is_valid:
                    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                    img_resized = cv2.resize(img, target_size)
                    self.images.append(img_resized)
                    self.labels.append(self.label_map[category])
                    stats[category]['loaded'] += 1
                else:
                    stats[category]['rejected'] += 1
        
        print("\n" + "=" * 70)
        print("DATASET STATISTICS")
        print("=" * 70)
        for cat in self.categories:
            print(f"{cat}:")
            print(f"  Total: {stats[cat]['total']}")
            print(f"  Loaded: {stats[cat]['loaded']}")
            print(f"  Rejected: {stats[cat]['rejected']}")
        
        # Convert to numpy first, then transfer to GPU
        images_np = np.array(self.images, dtype=np.float32)
        labels_np = np.array(self.labels, dtype=np.int64)
        
        print(f"\nTransferring data to GPU...")
        self.images = torch.from_numpy(images_np).to(device)
        self.labels = torch.from_numpy(labels_np).to(device)
        
        print(f"Final Dataset Shape: {self.images.shape}")
        print(f"Labels Shape: {self.labels.shape}")
        print(f"Data location: {self.images.device}")
        
        return self.images, self.labels

# ============================================================================
# 2. DATA PREPROCESSING (GPU-Optimized)
# ============================================================================

class DataPreprocessor:
    
    def __init__(self):
        self.mean = None
        self.std = None
    
    def apply_clahe_batch(self, images):
        """Apply CLAHE to batch of images"""
        results = []
        for i in range(len(images)):
            img_cpu = images[i].cpu().numpy().astype(np.uint8)
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
            result = clahe.apply(img_cpu)
            results.append(result)
        return torch.from_numpy(np.stack(results)).to(device)
    
    def add_gaussian_noise_gpu(self, images, mean=0, sigma=10):
        """Add Gaussian noise using GPU"""
        noise = torch.randn_like(images) * sigma + mean
        noisy_img = images + noise
        return torch.clamp(noisy_img, 0, 255)
    
    def rotate_image_batch(self, images, angle):
        """Rotate batch of images"""
        results = []
        for i in range(len(images)):
            img_cpu = images[i].cpu().numpy().astype(np.uint8)
            h, w = img_cpu.shape
            center = (w // 2, h // 2)
            M = cv2.getRotationMatrix2D(center, angle, 1.0)
            rotated = cv2.warpAffine(img_cpu, M, (w, h))
            results.append(rotated)
        return torch.from_numpy(np.stack(results)).to(device)
    
    def flip_image_gpu(self, images, direction='horizontal'):
        """Flip images on GPU"""
        if direction == 'horizontal':
            return torch.flip(images, dims=[2])
        else:
            return torch.flip(images, dims=[1])
    
    def augment_data(self, images, labels, augmentation_factor=2):
        """Apply data augmentation on GPU"""
        print("\n" + "=" * 70)
        print("DATA AUGMENTATION (GPU-Accelerated)")
        print("=" * 70)
        
        augmented_images = [images]
        augmented_labels = [labels]
        
        n_original = len(images)
        
        # CLAHE enhancement
        print("Applying CLAHE enhancement...")
        batch_size = 100
        clahe_results = []
        for i in range(0, n_original, batch_size):
            if i % 500 == 0:
                print(f"  Processing {i}/{n_original}...")
            batch = images[i:i+batch_size]
            clahe_batch = self.apply_clahe_batch(batch)
            clahe_results.append(clahe_batch)
        augmented_images.append(torch.cat(clahe_results, dim=0))
        augmented_labels.append(labels)
        
        if augmentation_factor >= 2:
            print("Applying rotation augmentation...")
            rotation_results = []
            for i in range(0, n_original, batch_size):
                if i % 500 == 0:
                    print(f"  Processing {i}/{n_original}...")
                batch = images[i:i+batch_size]
                rot_batch = self.rotate_image_batch(batch, 15)
                rotation_results.append(rot_batch)
            augmented_images.append(torch.cat(rotation_results, dim=0))
            augmented_labels.append(labels)
        
        if augmentation_factor >= 3:
            print("Applying flip augmentation...")
            # Reshape for flip operation
            images_3d = images.unsqueeze(1)  # Add channel dimension
            flipped = torch.flip(images_3d, dims=[3]).squeeze(1)
            augmented_images.append(flipped)
            augmented_labels.append(labels)
        
        if augmentation_factor >= 4:
            print("Applying noise augmentation...")
            noisy = self.add_gaussian_noise_gpu(images, sigma=5)
            augmented_images.append(noisy)
            augmented_labels.append(labels)
        
        # Concatenate all augmented data on GPU
        final_images = torch.cat(augmented_images, dim=0)
        final_labels = torch.cat(augmented_labels, dim=0)
        
        print(f"\nOriginal dataset size: {n_original}")
        print(f"Augmented dataset size: {len(final_images)}")
        print(f"Data stored on: {final_images.device}")
        
        return final_images, final_labels
    
    def remove_outliers_iqr(self, images, labels):
        """Remove outliers using IQR method on GPU"""
        print("\n" + "=" * 70)
        print("OUTLIER REMOVAL (IQR Method - GPU)")
        print("=" * 70)
        
        # Calculate mean intensity for each image on GPU
        mean_intensities = torch.mean(images.view(len(images), -1), dim=1)
        
        Q1 = torch.quantile(mean_intensities, 0.25)
        Q3 = torch.quantile(mean_intensities, 0.75)
        IQR = Q3 - Q1
        
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        
        mask = (mean_intensities >= lower_bound) & (mean_intensities <= upper_bound)
        
        print(f"Original samples: {len(images)}")
        print(f"Outliers removed: {(~mask).sum().item()}")
        print(f"Remaining samples: {mask.sum().item()}")
        
        return images[mask], labels[mask]
    
    def normalize_images(self, images):
        """Z-score normalization on GPU"""
        print("\n" + "=" * 70)
        print("IMAGE NORMALIZATION (Z-Score - GPU)")
        print("=" * 70)
        
        # Global statistics on GPU
        self.mean = torch.mean(images)
        self.std = torch.std(images)
        
        print(f"Global Mean: {self.mean.item():.2f}")
        print(f"Global Std: {self.std.item():.2f}")
        
        # Normalize on GPU
        normalized = (images - self.mean) / (self.std + 1e-8)
        
        return normalized

# ============================================================================
# 3. RADIOMIC FEATURE EXTRACTION (GPU-Optimized)
# ============================================================================

class RadiomicFeatureExtractor:
    """GPU-accelerated radiomic feature extraction"""
    
    def __init__(self):
        pass
    
    def extract_statistical_features_gpu(self, image):
        """Extract statistical features on GPU"""
        features = []
        
        features.append(torch.mean(image).item())
        features.append(torch.var(image).item())
        features.append(torch.std(image).item())
        
        return torch.tensor(features, device=device)
    
    def compute_glcm_gpu(self, image, distance=1, angle=0):
        """Compute GLCM"""
        levels = 16
        
        # Quantize on GPU
        image_quantized = (image / (256 / levels)).long()
        image_quantized = torch.clamp(image_quantized, 0, levels - 1)
        
        # Transfer to CPU for co-occurrence computation
        img_cpu = image_quantized.cpu().numpy()
        glcm = np.zeros((levels, levels), dtype=np.float64)
        
        rows, cols = img_cpu.shape
        
        if angle == 0:
            dx, dy = 0, distance
        elif angle == 45:
            dx, dy = distance, distance
        elif angle == 90:
            dx, dy = distance, 0
        else:
            dx, dy = distance, -distance
        
        for i in range(rows):
            for j in range(cols):
                ni, nj = i + dx, j + dy
                if 0 <= ni < rows and 0 <= nj < cols:
                    glcm[img_cpu[i, j], img_cpu[ni, nj]] += 1
        
        glcm = glcm / (np.sum(glcm) + 1e-10)
        
        return torch.from_numpy(glcm).to(device)
    
    def extract_texture_features_gpu(self, image):
        """Extract texture features using GPU"""
        glcm = self.compute_glcm_gpu(image, distance=1, angle=0)
        
        features = []
        
        # Contrast computation on GPU
        i_idx = torch.arange(glcm.shape[0], device=device).view(-1, 1).float()
        j_idx = torch.arange(glcm.shape[1], device=device).view(1, -1).float()
        
        contrast = torch.sum(glcm * (i_idx - j_idx) ** 2).item()
        features.append(contrast)
        
        # Homogeneity computation on GPU
        homogeneity = torch.sum(glcm / (1 + (i_idx - j_idx) ** 2)).item()
        features.append(homogeneity)
        
        return torch.tensor(features, device=device)
    
    def gabor_kernel_gpu(self, ksize, sigma, theta, lambd, gamma, psi):
        """Create Gabor kernel on GPU"""
        sigma_x = sigma
        sigma_y = sigma / gamma
        
        xmax = ksize // 2
        ymax = ksize // 2
        
        y = torch.arange(-ymax, ymax + 1, device=device).view(-1, 1).float()
        x = torch.arange(-xmax, xmax + 1, device=device).view(1, -1).float()
        
        # Rotate coordinates
        x_theta = x * torch.cos(torch.tensor(theta, device=device)) + y * torch.sin(torch.tensor(theta, device=device))
        y_theta = -x * torch.sin(torch.tensor(theta, device=device)) + y * torch.cos(torch.tensor(theta, device=device))
        
        # Gaussian envelope
        exp_part = torch.exp(-0.5 * (x_theta**2 / sigma_x**2 + y_theta**2 / sigma_y**2))
        
        # Sinusoidal carrier
        cos_part = torch.cos(2 * np.pi * x_theta / lambd + psi)
        
        kernel = exp_part * cos_part
        
        return kernel
    
    def extract_filter_features_gpu(self, image):
        """Extract Gabor filter features on GPU"""
        features = []
        
        ksize = 21
        sigma = 3
        lambd = 10
        gamma = 0.5
        psi = 0
        theta = 0
        
        # Create kernel on GPU
        kernel = self.gabor_kernel_gpu(ksize, sigma, theta, lambd, gamma, psi)
        
        # Apply filter using conv2d
        img_4d = image.unsqueeze(0).unsqueeze(0)  # [1, 1, H, W]
        kernel_4d = kernel.unsqueeze(0).unsqueeze(0)  # [1, 1, K, K]
        
        filtered = torch.nn.functional.conv2d(img_4d, kernel_4d, padding=ksize//2)
        filtered = filtered.squeeze()
        
        mean_response = torch.mean(torch.abs(filtered)).item()
        features.append(mean_response)
        
        std_response = torch.std(filtered).item()
        features.append(std_response)
        
        return torch.tensor(features, device=device)
    
    def extract_all_features(self, images):
        """Extract all features using GPU"""
        print("\n" + "=" * 70)
        print("RADIOMIC FEATURE EXTRACTION (GPU-Accelerated)")
        print("=" * 70)
        print("Feature Categories:")
        print("  1. Statistical: Mean, Variance, Std")
        print("  2. Texture: GLCM Contrast, Homogeneity")
        print("  3. Filter-based: Gabor Mean Response, Gabor Std Response")
        print("=" * 70)
        
        all_features = []
        n_images = len(images)
        
        for idx in range(n_images):
            if idx % 100 == 0:
                print(f"Extracting features from image {idx}/{n_images}...")
            
            img = images[idx]
            
            # Convert to uint8 range on GPU
            img_min = torch.min(img)
            img_max = torch.max(img)
            img_uint8 = ((img - img_min) / (img_max - img_min + 1e-8) * 255)
            
            # Extract features (all on GPU)
            stat_features = self.extract_statistical_features_gpu(img_uint8)
            texture_features = self.extract_texture_features_gpu(img_uint8)
            filter_features = self.extract_filter_features_gpu(img_uint8)
            
            # Concatenate on GPU
            combined = torch.cat([stat_features, texture_features, filter_features])
            all_features.append(combined)
        
        # Stack all features on GPU
        all_features = torch.stack(all_features)
        
        print(f"\nTotal features extracted per image: {all_features.shape[1]}")
        print(f"All features stored on: {all_features.device}")
        
        return all_features

# ============================================================================
# 4. FEATURE FUSION AND SCALING (GPU)
# ============================================================================

class FeatureFusion:
    """GPU-accelerated feature fusion and scaling"""
    
    def __init__(self):
        self.mean = None
        self.std = None
    
    def scale_features(self, features, fit=True):
        """Z-score normalization on GPU"""
        print("\n" + "=" * 70)
        print("FEATURE SCALING (GPU - Z-Score)")
        print("=" * 70)
        
        if fit:
            self.mean = torch.mean(features, dim=0)
            self.std = torch.std(features, dim=0)
            print("Fitted scaling parameters on GPU")
        
        self.std[self.std == 0] = 1
        
        scaled = (features - self.mean) / self.std
        
        print(f"Scaled features shape: {scaled.shape}")
        print(f"Mean: {torch.mean(scaled).item():.4f}")
        print(f"Std: {torch.std(scaled).item():.4f}")
        
        return scaled

# ============================================================================
# 5. DIMENSIONALITY REDUCTION (PCA on GPU)
# ============================================================================

class PCA:
    """GPU-accelerated PCA"""
    
    def __init__(self, n_components=50):
        self.n_components = n_components
        self.components = None
        self.mean = None
        self.explained_variance = None
    
    def fit(self, X):
        """Fit PCA on GPU"""
        print("\n" + "=" * 70)
        print(f"DIMENSIONALITY REDUCTION (PCA on GPU - {self.n_components} components)")
        print("=" * 70)
        
        # Center data on GPU
        self.mean = torch.mean(X, dim=0)
        X_centered = X - self.mean
        
        # Compute covariance matrix on GPU
        cov_matrix = torch.mm(X_centered.T, X_centered) / (X.shape[0] - 1)
        
        # Compute eigenvalues and eigenvectors on GPU
        eigenvalues, eigenvectors = torch.linalg.eigh(cov_matrix)
        
        # Sort in descending order
        idx = torch.argsort(eigenvalues, descending=True)
        eigenvalues = eigenvalues[idx]
        eigenvectors = eigenvectors[:, idx]
        
        # Select top components
        self.components = eigenvectors[:, :self.n_components]
        self.explained_variance = eigenvalues[:self.n_components]
        
        # Calculate variance ratios
        total_var = torch.sum(eigenvalues)
        explained_var_ratio = self.explained_variance / total_var
        cumulative_var = torch.cumsum(explained_var_ratio, dim=0)
        
        print(f"Original dimension: {X.shape[1]}")
        print(f"Reduced dimension: {self.n_components}")
        print(f"Total variance explained: {cumulative_var[-1].item():.4f}")
        print(f"Computation done on: GPU")
        
        return self
    
    def transform(self, X):
        """Transform data on GPU"""
        X_centered = X - self.mean
        return torch.mm(X_centered, self.components)
    
    def fit_transform(self, X):
        """Fit and transform on GPU"""
        self.fit(X)
        return self.transform(X)

# ============================================================================
# 6. DATA SPLITTING (GPU)
# ============================================================================

class DataSplitter:
    """GPU-based data splitting"""
    
    @staticmethod
    def shuffle_data(X, y, seed=42):
        """Shuffle data on GPU"""
        torch.manual_seed(seed)
        indices = torch.randperm(len(X), device=device)
        return X[indices], y[indices]
    
    @staticmethod
    def train_test_split(X, y, train_ratio=0.8, seed=42):
        """Split data on GPU"""
        X_shuffled, y_shuffled = DataSplitter.shuffle_data(X, y, seed)
        
        n_samples = len(X)
        n_train = int(n_samples * train_ratio)
        
        return X_shuffled[:n_train], X_shuffled[n_train:], y_shuffled[:n_train], y_shuffled[n_train:]

# ============================================================================
# 7. SVM CLASSIFIER (Full GPU Implementation)
# ============================================================================

class SVM:
    """Full GPU-accelerated SVM"""
    
    def __init__(self, learning_rate=0.001, n_iterations=1000, C=1.0):
        self.lr = learning_rate
        self.n_iter = n_iterations
        self.C = C
        self.lambda_param = 1.0 / C
        self.weights = []
        self.biases = []
        self.classes = None
        self.loss_history = []
        self.accuracy_history = []
    
    def compute_hinge_loss(self, X, y, w, b):
        """Compute hinge loss on GPU"""
        decision = torch.mm(X, w.unsqueeze(1)).squeeze() + b
        hinge_loss = torch.clamp(1 - y * decision, min=0)
        return torch.mean(hinge_loss) + self.lambda_param * torch.sum(w ** 2)
    
    def compute_accuracy_binary(self, X, y, w, b):
        """Compute accuracy on GPU"""
        decision = torch.mm(X, w.unsqueeze(1)).squeeze() + b
        predictions = torch.sign(decision)
        predictions[predictions == 0] = 1
        return (predictions == y).float().mean().item()
    
    def fit_binary(self, X, y):
        """Train binary SVM on GPU"""
        n_samples, n_features = X.shape
        
        # Initialize on GPU
        w = torch.randn(n_features, device=device) * 0.01
        b = torch.tensor(0.0, device=device)
        
        for iteration in range(self.n_iter):
            decision = torch.mm(X, w.unsqueeze(1)).squeeze() + b
            margin_violations = (y * decision < 1)
            
            # Gradient computation on GPU
            dw = 2 * self.lambda_param * w
            db = torch.tensor(0.0, device=device)
            
            if margin_violations.sum() > 0:
                dw -= torch.mm(X[margin_violations].T, y[margin_violations].unsqueeze(1)).squeeze() / n_samples
                db -= y[margin_violations].sum() / n_samples
            
            # Update on GPU
            w -= self.lr * dw
            b -= self.lr * db
            
            if iteration % 10 == 0:
                loss = self.compute_hinge_loss(X, y, w, b).item()
                acc = self.compute_accuracy_binary(X, y, w, b)
                self.loss_history.append(loss)
                self.accuracy_history.append(acc)
                
                if iteration % 100 == 0:
                    print(f"  Iteration {iteration}, Loss: {loss:.4f}, Accuracy: {acc:.4f}")
        
        return w, b
    
    def fit(self, X, y):
        """Train multiclass SVM on GPU"""
        print("\n" + "-" * 70)
        print("TRAINING: SVM on GPU (One-vs-Rest)")
        print("-" * 70)
        
        self.classes = torch.unique(y)
        n_classes = len(self.classes)
        
        print(f"Training {n_classes} binary classifiers on GPU...")
        
        for i, class_label in enumerate(self.classes):
            print(f"\nTraining classifier for class {int(class_label.item())}...")
            y_binary = torch.where(y == class_label, 
                                  torch.tensor(1.0, device=device), 
                                  torch.tensor(-1.0, device=device))
            w, b = self.fit_binary(X, y_binary)
            self.weights.append(w)
            self.biases.append(b)
    
    def predict_proba(self, X):
        """Predict probabilities on GPU"""
        n_samples = X.shape[0]
        n_classes = len(self.classes)
        
        decision_values = torch.zeros((n_samples, n_classes), device=device)
        
        for i in range(n_classes):
            decision_values[:, i] = torch.mm(X, self.weights[i].unsqueeze(1)).squeeze() + self.biases[i]
        
        # Softmax on GPU
        exp_values = torch.exp(decision_values - torch.max(decision_values, dim=1, keepdim=True)[0])
        probabilities = exp_values / torch.sum(exp_values, dim=1, keepdim=True)
        
        return probabilities
    
    def predict(self, X):
        """Predict labels on GPU"""
        n_samples = X.shape[0]
        n_classes = len(self.classes)
        
        decision_values = torch.zeros((n_samples, n_classes), device=device)
        
        for i in range(n_classes):
            decision_values[:, i] = torch.mm(X, self.weights[i].unsqueeze(1)).squeeze() + self.biases[i]
        
        predictions = torch.argmax(decision_values, dim=1)
        return self.classes[predictions]

# ============================================================================
# 8. MODEL EVALUATION
# ============================================================================

class ModelEvaluator:
    """GPU-accelerated model evaluation"""
    
    @staticmethod
    def confusion_matrix(y_true, y_pred, n_classes=3):
        """Compute confusion matrix"""
        if isinstance(y_true, torch.Tensor):
            y_true = y_true.cpu().numpy()
        if isinstance(y_pred, torch.Tensor):
            y_pred = y_pred.cpu().numpy()
        
        cm = np.zeros((n_classes, n_classes), dtype=int)
        for true, pred in zip(y_true, y_pred):
            cm[int(true), int(pred)] += 1
        return cm
    
    @staticmethod
    def accuracy(y_true, y_pred):
        """Calculate accuracy"""
        if isinstance(y_true, torch.Tensor) and isinstance(y_pred, torch.Tensor):
            return (y_true == y_pred).float().mean().item()
        return float(np.mean(y_true == y_pred))
    
    @staticmethod
    def precision_recall_f1(y_true, y_pred, n_classes=3):
        """Calculate metrics"""
        cm = ModelEvaluator.confusion_matrix(y_true, y_pred, n_classes)
        
        precision = np.zeros(n_classes)
        recall = np.zeros(n_classes)
        f1 = np.zeros(n_classes)
        
        for i in range(n_classes):
            tp = cm[i, i]
            fp = np.sum(cm[:, i]) - tp
            fn = np.sum(cm[i, :]) - tp
            
            precision[i] = tp / (tp + fp) if (tp + fp) > 0 else 0
            recall[i] = tp / (tp + fn) if (tp + fn) > 0 else 0
            f1[i] = 2 * (precision[i] * recall[i]) / (precision[i] + recall[i]) if (precision[i] + recall[i]) > 0 else 0
        
        return precision, recall, f1
    
    @staticmethod
    def plot_confusion_matrix(cm, class_names, filename):
        """Plot confusion matrix"""
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                   xticklabels=class_names, yticklabels=class_names)
        plt.title('Confusion Matrix')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.tight_layout()
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        plt.close()
    
    @staticmethod
    def plot_roc_curve(y_true, y_proba, n_classes, class_names, filename):
        """Plot ROC curves"""
        if isinstance(y_true, torch.Tensor):
            y_true = y_true.cpu().numpy()
        if isinstance(y_proba, torch.Tensor):
            y_proba = y_proba.cpu().numpy()
        
        plt.figure(figsize=(12, 8))
        
        for i in range(n_classes):
            y_true_binary = (y_true == i).astype(int)
            y_scores = y_proba[:, i]
            
            thresholds = np.linspace(0, 1, 100)
            tpr_list = []
            fpr_list = []
            
            for thresh in thresholds:
                y_pred_binary = (y_scores >= thresh).astype(int)
                
                tp = np.sum((y_true_binary == 1) & (y_pred_binary == 1))
                fp = np.sum((y_true_binary == 0) & (y_pred_binary == 1))
                tn = np.sum((y_true_binary == 0) & (y_pred_binary == 0))
                fn = np.sum((y_true_binary == 1) & (y_pred_binary == 0))
                
                tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
                fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
                
                tpr_list.append(tpr)
                fpr_list.append(fpr)
            
            fpr_array = np.array(fpr_list)
            tpr_array = np.array(tpr_list)
            
            sorted_idx = np.argsort(fpr_array)
            fpr_sorted = fpr_array[sorted_idx]
            tpr_sorted = tpr_array[sorted_idx]
            
            auc = np.trapz(tpr_sorted, fpr_sorted)
            
            plt.plot(fpr_sorted, tpr_sorted, label=f'{class_names[i]} (AUC = {auc:.3f})', linewidth=2)
        
        plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC Curves - Multiclass Classification')
        plt.legend(loc="lower right")
        plt.grid(alpha=0.3)
        plt.tight_layout()
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        plt.close()
    
    @staticmethod
    def plot_training_curves(loss_history, accuracy_history, filename):
        """Plot training curves"""
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
        
        ax1.plot(loss_history, linewidth=2, color='red')
        ax1.set_xlabel('Iteration (x10)')
        ax1.set_ylabel('Loss')
        ax1.set_title('Training Loss over Iterations')
        ax1.grid(alpha=0.3)
        
        ax2.plot(accuracy_history, linewidth=2, color='blue')
        ax2.set_xlabel('Iteration (x10)')
        ax2.set_ylabel('Accuracy')
        ax2.set_title('Training Accuracy over Iterations')
        ax2.grid(alpha=0.3)
        
        plt.tight_layout()
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        plt.close()

# ============================================================================
# 9. MAIN PIPELINE (Full CUDA with PyTorch)
# ============================================================================

def main():
    """Main execution pipeline - Full GPU acceleration with PyTorch"""
    output_dir = "output_svm_cuda"
    os.makedirs(output_dir, exist_ok=True)
    
    print("\n" + "=" * 70)
    print("CUDA-ACCELERATED LUNG DISEASE CLASSIFICATION")
    print("PyTorch CUDA - All Processing on GPU")
    print("=" * 70)
    
    # Monitor GPU memory
    if GPU_AVAILABLE:
        torch.cuda.empty_cache()
        print(f"\nInitial GPU Memory: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
        print(f"Initial GPU Memory: {torch.cuda.memory_reserved() / 1e9:.2f} GB reserved")
    
    # ========================================================================
    # STEP 1: Load Dataset (Transfer to GPU)
    # ========================================================================
    dataset_path = r'C:\Users\Wolf\PAGANI\chest_xray'
    loader = DatasetLoader(dataset_path)
    images, labels = loader.load_dataset(target_size=(256, 256))
    
    if GPU_AVAILABLE:
        print(f"GPU Memory after loading: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
    
    # ========================================================================
    # STEP 2: Preprocessing (GPU)
    # ========================================================================
    preprocessor = DataPreprocessor()
    
    images_aug, labels_aug = preprocessor.augment_data(images, labels, augmentation_factor=2)
    
    if GPU_AVAILABLE:
        print(f"GPU Memory after augmentation: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
    
    images_clean, labels_clean = preprocessor.remove_outliers_iqr(images_aug, labels_aug)
    images_norm = preprocessor.normalize_images(images_clean)
    
    if GPU_AVAILABLE:
        print(f"GPU Memory after preprocessing: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
    
    # ========================================================================
    # STEP 3: Feature Extraction (GPU)
    # ========================================================================
    feature_extractor = RadiomicFeatureExtractor()
    features = feature_extractor.extract_all_features(images_norm)
    
    if GPU_AVAILABLE:
        print(f"GPU Memory after feature extraction: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
    
    # ========================================================================
    # STEP 4: Feature Fusion (GPU)
    # ========================================================================
    fusion = FeatureFusion()
    features_scaled = fusion.scale_features(features, fit=True)
    
    # ========================================================================
    # STEP 5: Dimensionality Reduction (GPU)
    # ========================================================================
    pca = PCA(n_components=min(7, features_scaled.shape[1]))
    features_pca = pca.fit_transform(features_scaled)
    
    if GPU_AVAILABLE:
        print(f"GPU Memory after PCA: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
    
    # ========================================================================
    # STEP 6: Train-Test Split and Model Training (GPU)
    # ========================================================================
    class_names = ['Normal', 'Bacterial Pneumonia', 'Viral Pneumonia']
    train_ratio = 0.8
    
    print("\n" + "=" * 70)
    print(f"TRAINING WITH {int(train_ratio*100)}% TRAIN / {int((1-train_ratio)*100)}% TEST SPLIT")
    print("=" * 70)
    
    X_train, X_test, y_train, y_test = DataSplitter.train_test_split(
        features_pca, labels_clean, train_ratio=train_ratio
    )
    
    print(f"\nTrain set: {len(X_train)} samples (GPU)")
    print(f"Test set: {len(X_test)} samples (GPU)")
    
    if GPU_AVAILABLE:
        print(f"GPU Memory before training: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
    
    # ====================================================================
    # Train SVM on GPU
    # ====================================================================
    start_time = time.time()
    
    svm_model = SVM(learning_rate=0.001, n_iterations=500, C=1.0)
    svm_model.fit(X_train, y_train)
    
    train_time = time.time() - start_time
    
    if GPU_AVAILABLE:
        print(f"\nGPU Memory after training: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
    
    # Predictions (on GPU)
    y_pred = svm_model.predict(X_test)
    y_proba = svm_model.predict_proba(X_test)
    
    # Evaluation
    cm = ModelEvaluator.confusion_matrix(y_test, y_pred)
    accuracy = ModelEvaluator.accuracy(y_test, y_pred)
    precision, recall, f1 = ModelEvaluator.precision_recall_f1(y_test, y_pred)
    
    print(f"\n" + "=" * 70)
    print("RESULTS (CUDA-Accelerated with PyTorch)")
    print("=" * 70)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Training Time: {train_time:.2f}s")
    if GPU_AVAILABLE:
        print(f"Processing: 100% on {torch.cuda.get_device_name(0)}")
    
    for i, class_name in enumerate(class_names):
        print(f"\n{class_name}:")
        print(f"  Precision: {precision[i]:.4f}")
        print(f"  Recall: {recall[i]:.4f}")
        print(f"  F1-Score: {f1[i]:.4f}")
    
    # Save visualizations
    cm_filename = os.path.join(output_dir, f'confusion_matrix_CUDA_{int(train_ratio*100)}.png')
    ModelEvaluator.plot_confusion_matrix(cm, class_names, cm_filename)
    
    roc_filename = os.path.join(output_dir, f'roc_curve_CUDA_{int(train_ratio*100)}.png')
    ModelEvaluator.plot_roc_curve(y_test, y_proba, 3, class_names, roc_filename)
    
    training_curves_filename = os.path.join(output_dir, f'training_curves_CUDA_{int(train_ratio*100)}.png')
    ModelEvaluator.plot_training_curves(svm_model.loss_history, svm_model.accuracy_history, training_curves_filename)
    
    # Save results
    results = {
        'model': 'SVM (PyTorch CUDA-Accelerated)',
        'gpu': torch.cuda.get_device_name(0) if GPU_AVAILABLE else 'CPU',
        'train_ratio': train_ratio,
        'accuracy': float(accuracy),
        'precision': precision.tolist(),
        'recall': recall.tolist(),
        'f1_score': f1.tolist(),
        'training_time': float(train_time),
        'confusion_matrix': cm.tolist(),
        'final_loss': float(svm_model.loss_history[-1]) if svm_model.loss_history else 0.0,
        'final_train_accuracy': float(svm_model.accuracy_history[-1]) if svm_model.accuracy_history else 0.0
    }
    
    results_file = os.path.join(output_dir, 'results_summary_cuda.json')
    with open(results_file, 'w') as f:
        json.dump(results, f, indent=4)
    
    print(f"\nResults saved to: {results_file}")
    print(f"Visualizations saved to: {output_dir}/")
    
    # Final GPU memory status
    if GPU_AVAILABLE:
        print(f"\nFinal GPU Memory: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
        print(f"Peak GPU Memory: {torch.cuda.max_memory_allocated() / 1e9:.2f} GB")
    
    print("\n" + "=" * 70)
    print("CUDA-ACCELERATED PIPELINE COMPLETED SUCCESSFULLY!")
    print("=" * 70)


if __name__ == "__main__":
    main()

GPU INITIALIZATION
GPU Device: NVIDIA GeForce RTX 3060
GPU Memory: 12.88 GB
CUDA Version: 12.1
CUDA enabled - All processing will run on GPU!

CUDA-ACCELERATED LUNG DISEASE CLASSIFICATION
PyTorch CUDA - All Processing on GPU

Initial GPU Memory: 0.00 GB allocated
Initial GPU Memory: 0.00 GB reserved
DATASET LOADING AND QUALITY CHECK

Processing Normal...

Processing Pneumonia_bacterial...

Processing Pneumonia_viral...

DATASET STATISTICS
Normal:
  Total: 1583
  Loaded: 1583
  Rejected: 0
Pneumonia_bacterial:
  Total: 2780
  Loaded: 2780
  Rejected: 0
Pneumonia_viral:
  Total: 1493
  Loaded: 1493
  Rejected: 0

Transferring data to GPU...
Final Dataset Shape: torch.Size([5856, 256, 256])
Labels Shape: torch.Size([5856])
Data location: cuda:0
GPU Memory after loading: 1.54 GB allocated

DATA AUGMENTATION (GPU-Accelerated)
Applying CLAHE enhancement...
  Processing 0/5856...
  Processing 500/5856...
  Processing 1000/5856...
  Processing 1500/5856...
  Processing 2000/5856...
  Processin