In [1]:
import os
import numpy as np
import cv2
from collections import defaultdict
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import json
import time
import warnings
warnings.filterwarnings('ignore')

# PyTorch CUDA support
try:
    import torch
    if torch.cuda.is_available():
        GPU_AVAILABLE = True
        device = torch.device('cuda:0')
        print("=" * 70)
        print("GPU INITIALIZATION")
        print("=" * 70)
        print(f"GPU Device: {torch.cuda.get_device_name(0)}")
        print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
        print(f"CUDA Version: {torch.version.cuda}")
        print("CUDA enabled - All processing will run on GPU!")
        print("=" * 70)
    else:
        GPU_AVAILABLE = False
        device = torch.device('cpu')
        print("WARNING: CUDA not available. Using CPU.")
except ImportError:
    GPU_AVAILABLE = False
    device = torch.device('cpu')
    print("ERROR: PyTorch not available. Please install with: pip install torch torchvision")
    exit(1)

# ============================================================================
# 1. DATASET PREPARATION AND LOADING
# ============================================================================
class DatasetLoader:
    """Handles dataset loading with GPU transfer"""
  
    def __init__(self, dataset_path):
        self.dataset_path = dataset_path
        self.categories = ['Normal', 'Pneumonia_bacterial', 'Pneumonia_viral']
        self.images = []
        self.labels = []
        self.label_map = {'Normal': 0, 'Pneumonia_bacterial': 1, 'Pneumonia_viral': 2}
      
    def check_image_quality(self, img_path):
        try:
            img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            if img is None:
                return False, "Corrupted"
            if img.shape[0] < 64 or img.shape[1] < 64:
                return False, "Low resolution"
            mean_intensity = np.mean(img)
            if mean_intensity < 10 or mean_intensity > 245:
                return False, "Poor contrast"
            return True, "OK"
        except Exception as e:
            return False, str(e)
  
    def load_dataset(self, target_size=(256, 256), max_samples_per_class=None):
        print("=" * 70)
        print("DATASET LOADING AND QUALITY CHECK")
        print("=" * 70)
      
        stats = {cat: {'total': 0, 'loaded': 0, 'rejected': 0} for cat in self.categories}
      
        for category in self.categories:
            cat_path = os.path.join(self.dataset_path, category)
            if not os.path.exists(cat_path):
                print(f"Warning: {category} folder not found!")
                continue
          
            files = [f for f in os.listdir(cat_path) if f.endswith(('.jpg', '.jpeg', '.png'))]
            stats[category]['total'] = len(files)
           
            if max_samples_per_class is not None:
                files = files[:max_samples_per_class]
          
            print(f"\nProcessing {category}...")
            for filename in files:
                img_path = os.path.join(cat_path, filename)
                is_valid, reason = self.check_image_quality(img_path)
              
                if is_valid:
                    img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
                    img_resized = cv2.resize(img, target_size)
                    self.images.append(img_resized)
                    self.labels.append(self.label_map[category])
                    stats[category]['loaded'] += 1
                else:
                    stats[category]['rejected'] += 1
      
        print("\n" + "=" * 70)
        print("DATASET STATISTICS")
        print("=" * 70)
        for cat in self.categories:
            print(f"{cat}:")
            print(f"  Total: {stats[cat]['total']}")
            print(f"  Loaded: {stats[cat]['loaded']}")
            print(f"  Rejected: {stats[cat]['rejected']}")
      
        images_np = np.array(self.images, dtype=np.float32)
        labels_np = np.array(self.labels, dtype=np.int64)
      
        print(f"\nTransferring data to GPU...")
        self.images = torch.from_numpy(images_np).to(device)
        self.labels = torch.from_numpy(labels_np).to(device)
      
        print(f"Final Dataset Shape: {self.images.shape}")
        print(f"Labels Shape: {self.labels.shape}")
        print(f"Data location: {self.images.device}")
      
        return self.images, self.labels

# ============================================================================
# 2. DATA PREPROCESSING (GPU-Optimized)
# ============================================================================
class DataPreprocessor:
  
    def __init__(self):
        self.mean = None
        self.std = None
  
    def apply_clahe_batch(self, images):
        results = []
        for i in range(len(images)):
            img_cpu = images[i].cpu().numpy().astype(np.uint8)
            clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
            result = clahe.apply(img_cpu)
            results.append(result)
        return torch.from_numpy(np.stack(results)).to(device)
  
    def add_gaussian_noise_gpu(self, images, mean=0, sigma=10):
        noise = torch.randn_like(images) * sigma + mean
        noisy_img = images + noise
        return torch.clamp(noisy_img, 0, 255)
  
    def rotate_image_batch(self, images, angle):
        results = []
        for i in range(len(images)):
            img_cpu = images[i].cpu().numpy().astype(np.uint8)
            h, w = img_cpu.shape
            center = (w // 2, h // 2)
            M = cv2.getRotationMatrix2D(center, angle, 1.0)
            rotated = cv2.warpAffine(img_cpu, M, (w, h))
            results.append(rotated)
        return torch.from_numpy(np.stack(results)).to(device)
  
    def flip_image_gpu(self, images, direction='horizontal'):
        if direction == 'horizontal':
            return torch.flip(images, dims=[2])
        else:
            return torch.flip(images, dims=[1])
  
    def augment_data(self, images, labels, augmentation_factor=0):
        print("\n" + "=" * 70)
        print("DATA AUGMENTATION (GPU-Accelerated)")
        print("=" * 70)
      
        n_original = len(images)
       
        if augmentation_factor == 0:
            print("No augmentation applied - using original data only")
            return images, labels
       
        batch_size = 50
      
        final_images = images
        final_labels = labels
       
        if augmentation_factor >= 1:
            print("Applying CLAHE enhancement...")
            clahe_results = []
            for i in range(0, n_original, batch_size):
                if i % 500 == 0:
                    print(f" Processing {i}/{n_original}...")
                batch = images[i:i+batch_size]
                clahe_batch = self.apply_clahe_batch(batch)
                clahe_results.append(clahe_batch)
                if GPU_AVAILABLE:
                    torch.cuda.empty_cache()
           
            clahe_images = torch.cat(clahe_results, dim=0)
            del clahe_results
            if GPU_AVAILABLE:
                torch.cuda.empty_cache()
           
            final_images = torch.cat([final_images, clahe_images], dim=0)
            final_labels = torch.cat([final_labels, labels], dim=0)
            del clahe_images
            if GPU_AVAILABLE:
                torch.cuda.empty_cache()
      
        if augmentation_factor >= 2:
            print("Applying rotation augmentation...")
            rotation_results = []
            for i in range(0, n_original, batch_size):
                if i % 500 == 0:
                    print(f" Processing {i}/{n_original}...")
                batch = images[i:i+batch_size]
                rot_batch = self.rotate_image_batch(batch, 15)
                rotation_results.append(rot_batch)
                if GPU_AVAILABLE:
                    torch.cuda.empty_cache()
           
            rotation_images = torch.cat(rotation_results, dim=0)
            del rotation_results
            if GPU_AVAILABLE:
                torch.cuda.empty_cache()
               
            final_images = torch.cat([final_images, rotation_images], dim=0)
            final_labels = torch.cat([final_labels, labels], dim=0)
            del rotation_images
            if GPU_AVAILABLE:
                torch.cuda.empty_cache()
      
        if augmentation_factor >= 3:
            print("Applying flip augmentation...")
            flip_results = []
            for i in range(0, n_original, batch_size):
                if i % 500 == 0:
                    print(f" Processing {i}/{n_original}...")
                batch = images[i:i+batch_size]
                batch_3d = batch.unsqueeze(1)
                flipped = torch.flip(batch_3d, dims=[3]).squeeze(1)
                flip_results.append(flipped)
                if GPU_AVAILABLE:
                    torch.cuda.empty_cache()
           
            flip_images = torch.cat(flip_results, dim=0)
            del flip_results
            if GPU_AVAILABLE:
                torch.cuda.empty_cache()
               
            final_images = torch.cat([final_images, flip_images], dim=0)
            final_labels = torch.cat([final_labels, labels], dim=0)
            del flip_images
            if GPU_AVAILABLE:
                torch.cuda.empty_cache()
      
        if augmentation_factor >= 4:
            print("Applying noise augmentation...")
            noise_results = []
            for i in range(0, n_original, batch_size):
                if i % 500 == 0:
                    print(f" Processing {i}/{n_original}...")
                batch = images[i:i+batch_size]
                noisy = self.add_gaussian_noise_gpu(batch, sigma=5)
                noise_results.append(noisy)
                if GPU_AVAILABLE:
                    torch.cuda.empty_cache()
           
            noise_images = torch.cat(noise_results, dim=0)
            del noise_results
            if GPU_AVAILABLE:
                torch.cuda.empty_cache()
               
            final_images = torch.cat([final_images, noise_images], dim=0)
            final_labels = torch.cat([final_labels, labels], dim=0)
            del noise_images
            if GPU_AVAILABLE:
                torch.cuda.empty_cache()
      
        print(f"\nOriginal dataset size: {n_original}")
        print(f"Augmented dataset size: {len(final_images)}")
        print(f"Augmentation multiplier: {len(final_images) / n_original:.1f}x")
        print(f"Data stored on: {final_images.device}")
      
        return final_images, final_labels
  
    def remove_outliers_iqr(self, images, labels):
        print("\n" + "=" * 70)
        print("OUTLIER REMOVAL (IQR Method - GPU)")
        print("=" * 70)
      
        batch_size = 100
        mean_intensities_list = []
       
        for i in range(0, len(images), batch_size):
            batch = images[i:i+batch_size]
            batch_means = torch.mean(batch.view(len(batch), -1), dim=1)
            mean_intensities_list.append(batch_means)
            if GPU_AVAILABLE:
                torch.cuda.empty_cache()
       
        mean_intensities = torch.cat(mean_intensities_list)
        del mean_intensities_list
        if GPU_AVAILABLE:
            torch.cuda.empty_cache()
      
        Q1 = torch.quantile(mean_intensities, 0.25)
        Q3 = torch.quantile(mean_intensities, 0.75)
        IQR = Q3 - Q1
      
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
      
        mask = (mean_intensities >= lower_bound) & (mean_intensities <= upper_bound)
       
        indices = torch.where(mask)[0]
       
        print(f"Original samples: {len(images)}")
        print(f"Outliers removed: {(~mask).sum().item()}")
        print(f"Remaining samples: {len(indices)}")
       
        clean_images = torch.index_select(images, 0, indices)
        clean_labels = torch.index_select(labels, 0, indices)
       
        del mask, indices, mean_intensities
        if GPU_AVAILABLE:
            torch.cuda.empty_cache()
      
        return clean_images, clean_labels
  
    def normalize_images(self, images):
        print("\n" + "=" * 70)
        print("IMAGE NORMALIZATION (Z-Score - GPU)")
        print("=" * 70)
      
        self.mean = torch.mean(images)
        self.std = torch.std(images)
      
        print(f"Global Mean: {self.mean.item():.2f}")
        print(f"Global Std: {self.std.item():.2f}")
      
        normalized = (images - self.mean) / (self.std + 1e-8)
      
        return normalized

# ============================================================================
# 3. RADIOMIC FEATURE EXTRACTION (GPU-Optimized)
# ============================================================================
class RadiomicFeatureExtractor:
    def __init__(self):
        pass
  
    def extract_statistical_features_gpu(self, image):
        features = []
        features.append(torch.mean(image).item())
        features.append(torch.var(image).item())
        features.append(torch.std(image).item())
        return torch.tensor(features, device=device)
  
    def compute_glcm_gpu(self, image, distance=1, angle=0):
        levels = 16
        image_quantized = (image / (256 / levels)).long()
        image_quantized = torch.clamp(image_quantized, 0, levels - 1)
        img_cpu = image_quantized.cpu().numpy()
        glcm = np.zeros((levels, levels), dtype=np.float64)
      
        rows, cols = img_cpu.shape
        if angle == 0:
            dx, dy = 0, distance
        elif angle == 45:
            dx, dy = distance, distance
        elif angle == 90:
            dx, dy = distance, 0
        else:
            dx, dy = distance, -distance
      
        for i in range(rows):
            for j in range(cols):
                ni, nj = i + dx, j + dy
                if 0 <= ni < rows and 0 <= nj < cols:
                    glcm[img_cpu[i, j], img_cpu[ni, nj]] += 1
      
        glcm = glcm / (np.sum(glcm) + 1e-10)
        return torch.from_numpy(glcm).to(device)
  
    def extract_texture_features_gpu(self, image):
        glcm = self.compute_glcm_gpu(image, distance=1, angle=0)
        features = []
        i_idx = torch.arange(glcm.shape[0], device=device).view(-1, 1).float()
        j_idx = torch.arange(glcm.shape[1], device=device).view(1, -1).float()
        contrast = torch.sum(glcm * (i_idx - j_idx) ** 2).item()
        features.append(contrast)
        homogeneity = torch.sum(glcm / (1 + (i_idx - j_idx) ** 2)).item()
        features.append(homogeneity)
        return torch.tensor(features, device=device)
  
    def gabor_kernel_gpu(self, ksize, sigma, theta, lambd, gamma, psi):
        sigma_x = sigma
        sigma_y = sigma / gamma
        xmax = ksize // 2
        ymax = ksize // 2
        y = torch.arange(-ymax, ymax + 1, device=device).view(-1, 1).float()
        x = torch.arange(-xmax, xmax + 1, device=device).view(1, -1).float()
        x_theta = x * torch.cos(torch.tensor(theta, device=device)) + y * torch.sin(torch.tensor(theta, device=device))
        y_theta = -x * torch.sin(torch.tensor(theta, device=device)) + y * torch.cos(torch.tensor(theta, device=device))
        exp_part = torch.exp(-0.5 * (x_theta**2 / sigma_x**2 + y_theta**2 / sigma_y**2))
        cos_part = torch.cos(2 * np.pi * x_theta / lambd + psi)
        kernel = exp_part * cos_part
        return kernel
  
    def extract_filter_features_gpu(self, image):
        features = []
        ksize = 21
        sigma = 3
        lambd = 10
        gamma = 0.5
        psi = 0
        theta = 0
        kernel = self.gabor_kernel_gpu(ksize, sigma, theta, lambd, gamma, psi)
        img_4d = image.unsqueeze(0).unsqueeze(0)
        kernel_4d = kernel.unsqueeze(0).unsqueeze(0)
        filtered = torch.nn.functional.conv2d(img_4d, kernel_4d, padding=ksize//2)
        filtered = filtered.squeeze()
        mean_response = torch.mean(torch.abs(filtered)).item()
        features.append(mean_response)
        std_response = torch.std(filtered).item()
        features.append(std_response)
        return torch.tensor(features, device=device)
  
    def extract_all_features(self, images):
        print("\n" + "=" * 70)
        print("RADIOMIC FEATURE EXTRACTION (GPU-Accelerated)")
        print("=" * 70)
        print("Feature Categories:")
        print(" 1. Statistical: Mean, Variance, Std")
        print(" 2. Texture: GLCM Contrast, Homogeneity")
        print(" 3. Filter-based: Gabor Mean Response, Gabor Std Response")
        print("=" * 70)
      
        all_features = []
        n_images = len(images)
        batch_size = 50
      
        for start_idx in range(0, n_images, batch_size):
            end_idx = min(start_idx + batch_size, n_images)
            if start_idx % 500 == 0:
                print(f"Extracting features from image {start_idx}/{n_images}...")
           
            batch_features = []
            for idx in range(start_idx, end_idx):
                img = images[idx]
                img_min = torch.min(img)
                img_max = torch.max(img)
                img_uint8 = ((img - img_min) / (img_max - img_min + 1e-8) * 255)
              
                stat_features = self.extract_statistical_features_gpu(img_uint8)
                texture_features = self.extract_texture_features_gpu(img_uint8)
                filter_features = self.extract_filter_features_gpu(img_uint8)
              
                combined = torch.cat([stat_features, texture_features, filter_features])
                batch_features.append(combined)
           
            all_features.append(torch.stack(batch_features))
            if GPU_AVAILABLE:
                torch.cuda.empty_cache()
      
        all_features = torch.cat(all_features, dim=0)
      
        print(f"\nTotal features extracted per image: {all_features.shape[1]}")
        print(f"All features stored on: {all_features.device}")
      
        return all_features

# ============================================================================
# 4. FEATURE FUSION AND SCALING (GPU)
# ============================================================================
class FeatureFusion:
    def __init__(self):
        self.mean = None
        self.std = None
  
    def scale_features(self, features, fit=True):
        print("\n" + "=" * 70)
        print("FEATURE SCALING (GPU - Z-Score)")
        print("=" * 70)
      
        if fit:
            self.mean = torch.mean(features, dim=0)
            self.std = torch.std(features, dim=0)
            print("Fitted scaling parameters on GPU")
      
        self.std[self.std == 0] = 1
        scaled = (features - self.mean) / self.std
      
        print(f"Scaled features shape: {scaled.shape}")
        print(f"Mean: {torch.mean(scaled).item():.4f}")
        print(f"Std: {torch.std(scaled).item():.4f}")
      
        return scaled

# ============================================================================
# 5. DIMENSIONALITY REDUCTION (PCA on GPU)
# ============================================================================
class PCA:
    def __init__(self, n_components=50):
        self.n_components = n_components
        self.components = None
        self.mean = None
        self.explained_variance = None
  
    def fit(self, X):
        print("\n" + "=" * 70)
        print(f"DIMENSIONALITY REDUCTION (PCA on GPU - {self.n_components} components)")
        print("=" * 70)
      
        self.mean = torch.mean(X, dim=0)
        X_centered = X - self.mean
        cov_matrix = torch.mm(X_centered.T, X_centered) / (X.shape[0] - 1)
        eigenvalues, eigenvectors = torch.linalg.eigh(cov_matrix)
        idx = torch.argsort(eigenvalues, descending=True)
        eigenvalues = eigenvalues[idx]
        eigenvectors = eigenvectors[:, idx]
        self.components = eigenvectors[:, :self.n_components]
        self.explained_variance = eigenvalues[:self.n_components]
        total_var = torch.sum(eigenvalues)
        explained_var_ratio = self.explained_variance / total_var
        cumulative_var = torch.cumsum(explained_var_ratio, dim=0)
      
        print(f"Original dimension: {X.shape[1]}")
        print(f"Reduced dimension: {self.n_components}")
        print(f"Total variance explained: {cumulative_var[-1].item():.4f}")
        print(f"Computation done on: GPU")
      
        return self
  
    def transform(self, X):
        X_centered = X - self.mean
        return torch.mm(X_centered, self.components)
  
    def fit_transform(self, X):
        self.fit(X)
        return self.transform(X)

# ============================================================================
# 6. DATA SPLITTING (GPU)
# ============================================================================
class DataSplitter:
    @staticmethod
    def shuffle_data(X, y, seed=42):
        torch.manual_seed(seed)
        indices = torch.randperm(len(X), device=device)
        return X[indices], y[indices]
  
    @staticmethod
    def train_test_split(X, y, train_ratio=0.7, seed=42):
        X_shuffled, y_shuffled = DataSplitter.shuffle_data(X, y, seed)
        n_samples = len(X)
        n_train = int(n_samples * train_ratio)
        return X_shuffled[:n_train], X_shuffled[n_train:], y_shuffled[:n_train], y_shuffled[n_train:]

# ============================================================================
# 7. LOGISTIC REGRESSION CLASSIFIER (Full GPU Implementation) - FIXED
# ============================================================================
class LogisticRegressionGPU:
    def __init__(self, learning_rate=0.01, n_iterations=2000, regularization=0.01):
        self.lr = learning_rate
        self.n_iters = n_iterations
        self.l2 = regularization
        self.weights = None
        self.bias = None
        self.n_classes = None
        self.training_history = []

    def fit(self, X, y):
        print("\n" + "-" * 70)
        print("TRAINING: Logistic Regression on GPU (Multiclass)")
        print("-" * 70)

        n_samples, n_features = X.shape
        self.n_classes = len(torch.unique(y))

        # Initialize parameters
        self.weights = torch.randn(n_features, self.n_classes, device=device) * 0.01
        self.bias = torch.zeros(self.n_classes, device=device)

        # One-hot encode labels
        y_onehot = torch.zeros(n_samples, self.n_classes, device=device)
        y_onehot[torch.arange(n_samples), y] = 1

        for i in range(self.n_iters):
            linear = torch.mm(X, self.weights) + self.bias
            y_pred = torch.softmax(linear, dim=1)

            # Cross-entropy loss + L2
            loss = -torch.mean(torch.sum(y_onehot * torch.log(y_pred + 1e-8), dim=1))
            loss += self.l2 * torch.sum(self.weights ** 2)

            # Gradients
            grad_w = (1/n_samples) * torch.mm(X.t(), (y_pred - y_onehot)) + 2 * self.l2 * self.weights
            grad_b = (1/n_samples) * torch.sum(y_pred - y_onehot, dim=0)

            # Update
            self.weights -= self.lr * grad_w
            self.bias -= self.lr * grad_b

            if i % 500 == 0:
                acc = (torch.argmax(y_pred, dim=1) == y).float().mean().item()
                print(f"  Iteration {i}: Loss = {loss.item():.4f}, Accuracy = {acc:.4f}")

        # Store class distribution
        for c in range(self.n_classes):
            count = (y == c).sum().item()
            self.training_history.append({'class': int(c), 'n_samples': count, 'prior': count / n_samples})

        print("Logistic Regression training completed on GPU!")
        return self

    def predict_proba(self, X):
        linear = torch.mm(X, self.weights) + self.bias
        return torch.softmax(linear, dim=1)

    def predict(self, X):
        return torch.argmax(self.predict_proba(X), dim=1)

# ============================================================================
# 8. MODEL EVALUATION
# ============================================================================
class ModelEvaluator:
    @staticmethod
    def confusion_matrix(y_true, y_pred, n_classes=3):
        if isinstance(y_true, torch.Tensor):
            y_true = y_true.cpu().numpy()
        if isinstance(y_pred, torch.Tensor):
            y_pred = y_pred.cpu().numpy()
        cm = np.zeros((n_classes, n_classes), dtype=int)
        for true, pred in zip(y_true, y_pred):
            cm[int(true), int(pred)] += 1
        return cm
  
    @staticmethod
    def accuracy(y_true, y_pred):
        if isinstance(y_true, torch.Tensor) and isinstance(y_pred, torch.Tensor):
            return (y_true == y_pred).float().mean().item()
        return float(np.mean(y_true == y_pred))
  
    @staticmethod
    def precision_recall_f1(y_true, y_pred, n_classes=3):
        cm = ModelEvaluator.confusion_matrix(y_true, y_pred, n_classes)
        precision = np.zeros(n_classes)
        recall = np.zeros(n_classes)
        f1 = np.zeros(n_classes)
        for i in range(n_classes):
            tp = cm[i, i]
            fp = np.sum(cm[:, i]) - tp
            fn = np.sum(cm[i, :]) - tp
            precision[i] = tp / (tp + fp) if (tp + fp) > 0 else 0
            recall[i] = tp / (tp + fn) if (tp + fn) > 0 else 0
            f1[i] = 2 * (precision[i] * recall[i]) / (precision[i] + recall[i]) if (precision[i] + recall[i]) > 0 else 0
        return precision, recall, f1
  
    @staticmethod
    def plot_confusion_matrix(cm, class_names, filename):
        plt.figure(figsize=(10, 8))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                   xticklabels=class_names, yticklabels=class_names)
        plt.title('Confusion Matrix')
        plt.ylabel('True Label')
        plt.xlabel('Predicted Label')
        plt.tight_layout()
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        plt.close()
  
    @staticmethod
    def plot_roc_curve(y_true, y_proba, n_classes, class_names, filename):
        if isinstance(y_true, torch.Tensor):
            y_true = y_true.cpu().numpy()
        if isinstance(y_proba, torch.Tensor):
            y_proba = y_proba.cpu().numpy()
      
        plt.figure(figsize=(12, 8))
        for i in range(n_classes):
            y_true_binary = (y_true == i).astype(int)
            y_scores = y_proba[:, i]
            thresholds = np.linspace(0, 1, 100)
            tpr_list = []
            fpr_list = []
            for thresh in thresholds:
                y_pred_binary = (y_scores >= thresh).astype(int)
                tp = np.sum((y_true_binary == 1) & (y_pred_binary == 1))
                fp = np.sum((y_true_binary == 0) & (y_pred_binary == 1))
                tn = np.sum((y_true_binary == 0) & (y_pred_binary == 0))
                fn = np.sum((y_true_binary == 1) & (y_pred_binary == 0))
                tpr = tp / (tp + fn) if (tp + fn) > 0 else 0
                fpr = fp / (fp + tn) if (fp + tn) > 0 else 0
                tpr_list.append(tpr)
                fpr_list.append(fpr)
           
            fpr_array = np.array(fpr_list)
            tpr_array = np.array(tpr_list)
            sorted_idx = np.argsort(fpr_array)
            fpr_sorted = fpr_array[sorted_idx]
            tpr_sorted = tpr_array[sorted_idx]
            auc = np.trapz(tpr_sorted, fpr_sorted)
            plt.plot(fpr_sorted, tpr_sorted, label=f'{class_names[i]} (AUC = {auc:.3f})', linewidth=2)
      
        plt.plot([0, 1], [0, 1], 'k--', label='Random Classifier')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC Curves - Multiclass Classification')
        plt.legend(loc="lower right")
        plt.grid(alpha=0.3)
        plt.tight_layout()
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        plt.close()
  
    @staticmethod
    def plot_class_distribution(training_history, filename):
        classes = [h['class'] for h in training_history]
        n_samples = [h['n_samples'] for h in training_history]
        priors = [h['prior'] for h in training_history]
      
        fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5))
        ax1.bar(classes, n_samples, color=['blue', 'orange', 'green'])
        ax1.set_xlabel('Class')
        ax1.set_ylabel('Number of Samples')
        ax1.set_title('Training Samples per Class')
        ax1.set_xticks(classes)
        ax1.grid(alpha=0.3)
      
        ax2.bar(classes, priors, color=['blue', 'orange', 'green'])
        ax2.set_xlabel('Class')
        ax2.set_ylabel('Prior Probability')
        ax2.set_title('Class Prior Probabilities')
        ax2.set_xticks(classes)
        ax2.grid(alpha=0.3)
      
        plt.tight_layout()
        plt.savefig(filename, dpi=300, bbox_inches='tight')
        plt.close()

# ============================================================================
# 9. MAIN PIPELINE (Full CUDA with PyTorch)
# ============================================================================
def main():
    output_dir = "output_lr_cuda"
    os.makedirs(output_dir, exist_ok=True)
  
    print("\n" + "=" * 70)
    print("CUDA-ACCELERATED LUNG DISEASE CLASSIFICATION")
    print("PyTorch CUDA - All Processing on GPU - LOGISTIC REGRESSION")
    print("=" * 70)
  
    if GPU_AVAILABLE:
        torch.cuda.empty_cache()
        print(f"\nInitial GPU Memory: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
        print(f"Initial GPU Memory: {torch.cuda.memory_reserved() / 1e9:.2f} GB reserved")
  
    dataset_path = r'C:\Users\Wolf\PAGANI\chest_xray'
    loader = DatasetLoader(dataset_path)
    images, labels = loader.load_dataset(target_size=(256, 256), max_samples_per_class=None)
  
    if GPU_AVAILABLE:
        print(f"GPU Memory after loading: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
  
    preprocessor = DataPreprocessor()

    # CHANGED: augmentation_factor=2
    images_aug, labels_aug = preprocessor.augment_data(images, labels, augmentation_factor=2)
  
    del images, labels
    if GPU_AVAILABLE:
        torch.cuda.empty_cache()
        print(f"GPU Memory after augmentation: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
  
    images_clean, labels_clean = preprocessor.remove_outliers_iqr(images_aug, labels_aug)
    del images_aug, labels_aug
    if GPU_AVAILABLE:
        torch.cuda.empty_cache()
   
    images_norm = preprocessor.normalize_images(images_clean)
    del images_clean
    if GPU_AVAILABLE:
        torch.cuda.empty_cache()
        print(f"GPU Memory after preprocessing: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
  
    feature_extractor = RadiomicFeatureExtractor()
    features = feature_extractor.extract_all_features(images_norm)
    del images_norm
    if GPU_AVAILABLE:
        torch.cuda.empty_cache()
        print(f"GPU Memory after feature extraction: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
  
    fusion = FeatureFusion()
    features_scaled = fusion.scale_features(features, fit=True)
    del features
    if GPU_AVAILABLE:
        torch.cuda.empty_cache()
  
    pca = PCA(n_components=min(7, features_scaled.shape[1]))
    features_pca = pca.fit_transform(features_scaled)
    del features_scaled
    if GPU_AVAILABLE:
        torch.cuda.empty_cache()
        print(f"GPU Memory after PCA: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
  
    class_names = ['Normal', 'Bacterial Pneumonia', 'Viral Pneumonia']
    train_ratio = 0.7
  
    print("\n" + "=" * 70)
    print(f"TRAINING WITH {int(train_ratio*100)}% TRAIN / {int((1-train_ratio)*100)}% TEST SPLIT")
    print("=" * 70)
  
    X_train, X_test, y_train, y_test = DataSplitter.train_test_split(
        features_pca, labels_clean, train_ratio=train_ratio
    )
  
    print(f"\nTrain set: {len(X_train)} samples (GPU)")
    print(f"Test set: {len(X_test)} samples (GPU)")
  
    if GPU_AVAILABLE:
        print(f"GPU Memory before training: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
  
    start_time = time.time()
  
    # Now using proper Logistic Regression
    lr_model = LogisticRegressionGPU(learning_rate=0.01, n_iterations=2000, regularization=0.01)
    lr_model.fit(X_train, y_train)
  
    train_time = time.time() - start_time
  
    if GPU_AVAILABLE:
        print(f"\nGPU Memory after training: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
  
    y_pred = lr_model.predict(X_test)
    y_proba = lr_model.predict_proba(X_test)
  
    cm = ModelEvaluator.confusion_matrix(y_test, y_pred)
    accuracy = ModelEvaluator.accuracy(y_test, y_pred)
    precision, recall, f1 = ModelEvaluator.precision_recall_f1(y_test, y_pred)
  
    print(f"\n" + "=" * 70)
    print("RESULTS (CUDA-Accelerated with PyTorch)")
    print("=" * 70)
    print(f"Accuracy: {accuracy:.4f}")
    print(f"Training Time: {train_time:.2f}s")
    if GPU_AVAILABLE:
        print(f"Processing: 100% on {torch.cuda.get_device_name(0)}")
  
    for i, class_name in enumerate(class_names):
        print(f"\n{class_name}:")
        print(f"  Precision: {precision[i]:.4f}")
        print(f"  Recall: {recall[i]:.4f}")
        print(f"  F1-Score: {f1[i]:.4f}")
  
    cm_filename = os.path.join(output_dir, f'confusion_matrix_LR_CUDA_{int(train_ratio*100)}.png')
    ModelEvaluator.plot_confusion_matrix(cm, class_names, cm_filename)
  
    roc_filename = os.path.join(output_dir, f'roc_curve_LR_CUDA_{int(train_ratio*100)}.png')
    ModelEvaluator.plot_roc_curve(y_test, y_proba, 3, class_names, roc_filename)
  
    class_dist_filename = os.path.join(output_dir, f'class_distribution_LR_CUDA_{int(train_ratio*100)}.png')
    ModelEvaluator.plot_class_distribution(lr_model.training_history, class_dist_filename)
  
    results = {
        'model': 'Logistic Regression (PyTorch CUDA-Accelerated)',
        'gpu': torch.cuda.get_device_name(0) if GPU_AVAILABLE else 'CPU',
        'train_ratio': train_ratio,
        'accuracy': float(accuracy),
        'precision': precision.tolist(),
        'recall': recall.tolist(),
        'f1_score': f1.tolist(),
        'training_time': float(train_time),
        'confusion_matrix': cm.tolist()
    }
  
    results_file = os.path.join(output_dir, 'results_summary_lr_cuda.json')
    with open(results_file, 'w') as f:
        json.dump(results, f, indent=4)
  
    print(f"\nResults saved to: {results_file}")
    print(f"Visualizations saved to: {output_dir}/")
  
    if GPU_AVAILABLE:
        print(f"\nFinal GPU Memory: {torch.cuda.memory_allocated() / 1e9:.2f} GB allocated")
        print(f"Peak GPU Memory: {torch.cuda.max_memory_allocated() / 1e9:.2f} GB")
  
    print("\n" + "=" * 70)
    print("CUDA-ACCELERATED PIPELINE COMPLETED SUCCESSFULLY!")
    print("=" * 70)

if __name__ == "__main__":
    main()

GPU INITIALIZATION
GPU Device: NVIDIA GeForce RTX 3060
GPU Memory: 12.88 GB
CUDA Version: 12.1
CUDA enabled - All processing will run on GPU!

CUDA-ACCELERATED LUNG DISEASE CLASSIFICATION
PyTorch CUDA - All Processing on GPU - LOGISTIC REGRESSION

Initial GPU Memory: 0.00 GB allocated
Initial GPU Memory: 0.00 GB reserved
DATASET LOADING AND QUALITY CHECK

Processing Normal...

Processing Pneumonia_bacterial...

Processing Pneumonia_viral...

DATASET STATISTICS
Normal:
  Total: 1583
  Loaded: 1583
  Rejected: 0
Pneumonia_bacterial:
  Total: 2780
  Loaded: 2780
  Rejected: 0
Pneumonia_viral:
  Total: 1493
  Loaded: 1493
  Rejected: 0

Transferring data to GPU...
Final Dataset Shape: torch.Size([5856, 256, 256])
Labels Shape: torch.Size([5856])
Data location: cuda:0
GPU Memory after loading: 1.54 GB allocated

DATA AUGMENTATION (GPU-Accelerated)
Applying CLAHE enhancement...
 Processing 0/5856...
 Processing 500/5856...
 Processing 1000/5856...
 Processing 1500/5856...
 Processing 2000/58