# Optimized Skin Lesion Detection
## Using Quantum-Inspired Semi-Supervised Segmentation & Secure Federated Learning

This notebook implements a pipeline for skin lesion disease detection that includes:
1.  **Data Loading**: handling HAM10000 dataset.
2.  **Quantum-Inspired Segmentation**: Using Quantum-behaved Particle Swarm Optimization (QPSO) for optimal multi-level thresholding to segment lesions.
3.  **Semi-Supervised/Federated Learning**: Simulating a secure federated learning loop with Differential Privacy.

### 1. Imports and Setup
We'll use TensorFlow for the model and standard libraries for processing.

In [1]:
import os
import glob
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import seaborn as sns

# Configuration
IMG_SIZE = 128
BATCH_SIZE = 32
NUM_CLIENTS = 3  # Number of federated clients to simulate
ROUNDS = 5       # Number of federated rounds
DP_NOISE_SCALE = 0.01 # Scale for Differential Privacy noise

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print("Libraries imported and configuration set.")

Libraries imported and configuration set.


### 2. Data Loading
We attempt to load the HAM10000 dataset. 
**Note:** Please ensure the dataset is extracted in a `data` folder or update the `DATA_PATH` variable below.
The expected structure is:
- `dataset_path/metadata.csv`
- `dataset_path/images/*.jpg`

If data is not found, we create synthetic data for demonstration purposes so the secure FL pipeline can be verified.

In [None]:
import concurrent.futures
import time

def load_data(data_path, sample_size=None):
    """
    Loads HAM10000 data if available, else generates synthetic data.
    """
    metadata_path = os.path.join(data_path, "metadata.csv")
    image_folder = os.path.join(data_path, "images")
    
    # Check if data exists
    if os.path.exists(metadata_path) and os.path.exists(image_folder):
        print(f"Loading real data from {data_path}...")
        df = pd.read_csv(metadata_path)
        
        # Map image IDs to paths
        all_image_paths = {os.path.splitext(os.path.basename(x))[0]: x 
                           for x in glob.glob(os.path.join(image_folder, '*.jpg'))}
        
        df['path'] = df['image_id'].map(all_image_paths.get)
        df = df.dropna(subset=['path']) # Drop missing images
        
        if sample_size and sample_size < len(df):
            print(f"Subsampling dataset to {sample_size} samples for speed...")
            df = df.sample(sample_size, random_state=42)
        
        image_paths = df['path'].tolist()
        labels = pd.Categorical(df['dx']).codes # Convert text labels to codes
        return np.array(image_paths), np.array(labels), len(pd.Categorical(df['dx']).categories)
        
    else:
        print(f"Data not found at {data_path}. Generating synthetic data for demonstration.")
        # Generate random noise images
        num_samples = 100
        synthetic_images = np.random.rand(num_samples, IMG_SIZE, IMG_SIZE, 3).astype(np.float32)
        synthetic_labels = np.random.randint(0, 7, num_samples)
        return synthetic_images, synthetic_labels, 7

def preprocess_image(path_or_array):
    """
    Reads and resizes an image from path, or returns array if already data.
    """
    if isinstance(path_or_array, str):
        img = cv2.imread(path_or_array)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
        img = img / 255.0
        return img.astype(np.float32)
    else:
        # Already an array (synthetic case)
        return path_or_array

# --- Execution ---
DATA_PATH = "./"

# 1. MOUNT GOOGLE DRIVE (If running on Colab)
try:
    from google.colab import drive
    drive.mount('/content/drive')
    DRIVE_PATH = "/content/drive/MyDrive/HAM10000"
    if os.path.exists(DRIVE_PATH):
        DATA_PATH = DRIVE_PATH
        print(f"Success: Found data in Google Drive at {DATA_PATH}")
    else:
        print(f"Drive mounted, but dataset not found at {DRIVE_PATH}")
except ImportError:
    print("Google Colab not detected. Using local machine resources.")

# 2. Local Fallback checks
if DATA_PATH == "./":
    if os.path.exists(r"G:\My Drive\HAM10000"):
        DATA_PATH = r"G:\My Drive\HAM10000"
        print("Success: Found data in Google Drive for Desktop (G:)")
    elif os.path.exists("HAM10000_metadata.csv"): # Check current root
        DATA_PATH = "."
    elif os.path.exists("data"): # Check 'data' folder
        DATA_PATH = "data"
    elif os.path.exists(r"c:\Users\shakti singh\OneDrive\Desktop\Derm project"):
         DATA_PATH = r"c:\Users\shakti singh\OneDrive\Desktop\Derm project"

print(f"Final Data Path used: {DATA_PATH}")

# Set a limit for faster testing (e.g., 2000 images). Set None for full dataset.
SAMPLE_LIMIT = 2000 
X_raw, y_raw, NUM_CLASSES = load_data(DATA_PATH, sample_size=SAMPLE_LIMIT)

# Optimized Parallel Loading
if isinstance(X_raw[0], str):
    print(f"Loading {len(X_raw)} images into memory. Starting parallel processing...")
    start_time = time.time()
    
    # Use ThreadPoolExecutor to speed up I/O bound tasks (reading files)
    with concurrent.futures.ThreadPoolExecutor() as executor:
        X_data = list(executor.map(preprocess_image, X_raw))
        
    X_data = np.array(X_data)
    y_data = y_raw
    print(f"Loading complete in {time.time() - start_time:.2f} seconds.")
else:
    X_data = X_raw
    y_data = y_raw

print(f"Data shape: {X_data.shape}, Labels shape: {y_data.shape}")

# Split for Federated Clients
client_data_partitions = []
data_per_client = len(X_data) // NUM_CLIENTS

for i in range(NUM_CLIENTS):
    start = i * data_per_client
    end = (i + 1) * data_per_client
    client_data_partitions.append((X_data[start:end], y_data[start:end]))
    print(f"Client {i+1} data: {len(client_data_partitions[-1][0])} samples")

Google Colab not detected. Using local machine resources.
Success: Found data in Google Drive for Desktop (G:)
Final Data Path used: G:\My Drive\HAM10000
Loading real data from G:\My Drive\HAM10000...
Subsampling dataset to 2000 samples for speed...
Loading 2000 images into memory. Starting parallel processing...


### 3. Quantum-Inspired Semi-Supervised Segmentation
We verify the "Quantum-inspired" aspect by implementing **Quantum-behaved Particle Swarm Optimization (QPSO)**.
Classic PSO updates velocity and position. QPSO removes velocity and uses a probabilistic update based on a quantum delta potential well, which often converges faster for complex optimization problems like multi-level thresholding (finding the best grayscale thresholds to separate lesion from skin).

Here, we simulate using QPSO to find an optimal threshold for separating the lesion. In a full semi-supervised pipeline, these segmented masks would train a segmentation network (e.g., U-Net). For brevity, we apply the segmentation as a preprocessing mask to highlight the lesion.

In [None]:
def otsu_criterion(threshold, image):
    """
    Objective function: Between-class variance (Otsu's method) to MAXIMIZE.
    For QPSO minimization, we will return -1 * variance.
    """
    # Simple binary thresholding for demonstration
    threshold = int(threshold)
    h, w = image.shape
    total_pixels = h * w
    
    # Pixels less than threshold (Background/Skin)
    bg_mask = image < threshold
    fg_mask = image >= threshold
    
    w0 = np.sum(bg_mask) / total_pixels
    w1 = np.sum(fg_mask) / total_pixels
    
    if w0 == 0 or w1 == 0:
        return 0
    
    mean0 = np.mean(image[bg_mask]) if w0 > 0 else 0
    mean1 = np.mean(image[fg_mask]) if w1 > 0 else 0
    
    # Between class variance
    variance = w0 * w1 * ((mean0 - mean1) ** 2)
    return -variance # Minimize negative variance

def qpso_segmentation_optimizer(image, num_particles=20, max_iter=10):
    """
    Quantum-behaved Particle Swarm Optimization to find optimal threshold.
    """
    gray = cv2.cvtColor((image * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
    
    # Initialize particles (thresholds between 0 and 255)
    particles = np.random.uniform(0, 255, num_particles)
    pbest = particles.copy()
    pbest_val = np.array([otsu_criterion(p, gray) for p in particles])
    
    gbest = pbest[np.argmin(pbest_val)]
    gbest_val = np.min(pbest_val)
    
    # QPSO Parameters
    beta = 0.5 # Contraction-Expansion Coefficient
    
    for it in range(max_iter):
        mbest = np.mean(pbest) # Mean best position
        
        for i in range(num_particles):
            # QPSO Update Equation
            phi = np.random.rand()
            u = np.random.rand()
            
            # Local attractor
            p = (phi * pbest[i] + (1 - phi) * gbest)
            
            # Monte Carlo simulation of quantum state
            if np.random.rand() < 0.5:
                particles[i] = p + beta * abs(mbest - particles[i]) * np.log(1/u)
            else:
                particles[i] = p - beta * abs(mbest - particles[i]) * np.log(1/u)
                
            # Clip to valid range
            particles[i] = np.clip(particles[i], 0, 255)
            
            # Evaluate
            val = otsu_criterion(particles[i], gray)
            if val < pbest_val[i]:
                pbest[i] = particles[i]
                pbest_val[i] = val
                
        # Update global best
        current_gbest_idx = np.argmin(pbest_val)
        if pbest_val[current_gbest_idx] < gbest_val:
            gbest = pbest[current_gbest_idx]
            gbest_val = pbest_val[current_gbest_idx]
            
    return int(gbest)

def apply_quantum_segmentation(image):
    """
    Applies QPSO found threshold to mask the image.
    """
    best_thresh = qpso_segmentation_optimizer(image)
    gray = cv2.cvtColor((image * 255).astype(np.uint8), cv2.COLOR_RGB2GRAY)
    _, mask = cv2.threshold(gray, best_thresh, 255, cv2.THRESH_BINARY)
    
    # Apply mask to original image
    masked_img = cv2.bitwise_and(image, image, mask=mask)
    return masked_img, mask

# Test on one image
if len(X_data) > 0:
    sample_img = X_data[0]
    segmented_img, mask = apply_quantum_segmentation(sample_img)
    
    plt.figure(figsize=(12, 4))
    plt.subplot(1, 3, 1); plt.imshow(sample_img); plt.title("Original")
    plt.subplot(1, 3, 2); plt.imshow(mask, cmap='gray'); plt.title("QPSO Mask")
    plt.subplot(1, 3, 3); plt.imshow(segmented_img); plt.title("Segmented")
    plt.show()

### 4. Setup Secure Federated Learning Loop
We define a simulation where:
1.  **Server**: Initializes global model, distributes weights, aggregates updates.
2.  **Client**: Downloads weights, trains locally on private data, adds **Differential Privacy (DP)** noise to updates (Secure aspect), and uploads to server.

For "Secure", we add Gaussian noise to the model gradients/weights before sending them to the server, simulating Local Differential Privacy.

In [None]:
def create_model(input_shape, num_classes):
    """
    Simple CNN setup for the clients.
    """
    model = models.Sequential([
        layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    return model

class FederatedClient:
    def __init__(self, client_id, data, num_classes):
        self.client_id = client_id
        self.X_train, self.y_train = data
        self.input_shape = self.X_train.shape[1:]
        self.num_classes = num_classes
        self.model = create_model(self.input_shape, self.num_classes)
        
    def set_weights(self, weights):
        self.model.set_weights(weights)
        
    def train(self, epochs=1, batch_size=32, dp_noise=0.0):
        """
        Trains local model and returns weights with OPTIONAL DP Noise (Secure FL).
        """
        self.model.fit(self.X_train, self.y_train, epochs=epochs, batch_size=batch_size, verbose=0)
        
        weights = self.model.get_weights()
        
        # --- SECURE COMPONENT: Differential Privacy ---
        # Add Gaussian noise to weights before sending (Simulating Local DP)
        if dp_noise > 0:
            noisy_weights = []
            for w in weights:
                noise = np.random.normal(0, dp_noise, w.shape)
                noisy_weights.append(w + noise)
            return noisy_weights, len(self.X_train)
        
        return weights, len(self.X_train)
    
    def evaluate(self):
        loss, acc = self.model.evaluate(self.X_train, self.y_train, verbose=0)
        return loss, acc

# --- Federated Averaging (FedAvg) ---
def federated_average(weights_results):
    """
    Aggregates weights using weighted average based on number of samples.
    """
    total_samples = sum([n for _, n in weights_results])
    
    # Initialize summed weights (same shape as model weights)
    new_weights = [np.zeros_like(w) for w in weights_results[0][0]]
    
    for weights, num_samples in weights_results:
        scaling_factor = num_samples / total_samples
        for i in range(len(new_weights)):
            new_weights[i] += weights[i] * scaling_factor
            
    return new_weights

### 5. Running the Federated Loop
We initialize the clients, loop through rounds, and aggregate the secure (noisy) updates.

In [None]:
# Initialize Global Model
global_model = create_model((IMG_SIZE, IMG_SIZE, 3), NUM_CLASSES)
global_weights = global_model.get_weights()

# Initialize Clients
clients = [FederatedClient(i, data, NUM_CLASSES) for i, data in enumerate(client_data_partitions)]

history = {'accuracy': [], 'loss': []}

print(f"Starting Federated Learning for {ROUNDS} rounds with {NUM_CLIENTS} clients...")
print(f"Secure Mode: DP Noise Scale = {DP_NOISE_SCALE}")

for round_num in range(ROUNDS):
    print(f"\n--- Round {round_num + 1} ---")
    
    client_weights_results = []
    
    # 1. Broadcast global weights to clients
    for client in clients:
        client.set_weights(global_weights)
        
        # 2. Local Training (with Secure DP Noise added to output)
        w, n = client.train(epochs=2, batch_size=BATCH_SIZE, dp_noise=DP_NOISE_SCALE)
        client_weights_results.append((w, n))
        
        # Evaluate local performance (optional, just to see client progress)
        l, a = client.evaluate()
        print(f"Client {client.client_id} - Loss: {l:.4f}, Acc: {a:.4f}")
    
    # 3. Secure Aggregation
    global_weights = federated_average(client_weights_results)
    
    # Update global model for evaluation
    global_model.set_weights(global_weights)
    
    # Evaluate Global Model (using Client 1's data as a proxy validation set for demo)
    # In reality, you'd use a held-out test set
    val_loss, val_acc = global_model.evaluate(client_data_partitions[0][0], client_data_partitions[0][1], verbose=0)
    history['accuracy'].append(val_acc)
    history['loss'].append(val_loss)
    print(f"Global Model - Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")

# Plot Results
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(history['accuracy'], marker='o')
plt.title('Global Model Accuracy per Round')
plt.xlabel('Round')
plt.ylabel('Accuracy')

plt.subplot(1, 2, 2)
plt.plot(history['loss'], marker='o', color='orange')
plt.title('Global Model Loss per Round')
plt.xlabel('Round')
plt.ylabel('Loss')

plt.tight_layout()
plt.show()

print("Federated Learning (Secure & Optimized) Simulation Complete.")