<a href="https://colab.research.google.com/github/Akanksha-cell-max/Advanced-Artificial-Intelligence/blob/main/Untitled7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
import cv2
from skimage.util import random_noise

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Configuration
IMG_HEIGHT = 128
IMG_WIDTH = 128
BATCH_SIZE = 32
EPOCHS = 50
LEARNING_RATE = 0.001
NUM_SAMPLES = 2000  # Number of synthetic images to generate

# Data paths
CSV_FILE = '/content/sampleSubmission.csv'
OUTPUT_DIR = 'denoised_output'

# Create output directory
os.makedirs(OUTPUT_DIR, exist_ok=True)


class SyntheticDataGenerator:
    """Generate synthetic clean and noisy images for training"""

    def __init__(self, img_height=128, img_width=128):
        self.img_height = img_height
        self.img_width = img_width

    def add_noise(self, image, noise_type='gaussian'):
        """Add various types of noise to clean images"""
        noisy_image = image.copy()

        if noise_type == 'gaussian':
            # Gaussian noise
            noise = np.random.normal(0, 0.1, image.shape)
            noisy_image = image + noise

        elif noise_type == 'salt_pepper':
            # Salt and pepper noise
            noisy_image = random_noise(image, mode='s&p', amount=0.05)

        elif noise_type == 'speckle':
            # Speckle noise
            noise = np.random.randn(*image.shape)
            noisy_image = image + image * noise * 0.2

        elif noise_type == 'combined':
            # Combined noise (more realistic)
            # Gaussian noise
            noise = np.random.normal(0, 0.05, image.shape)
            noisy_image = image + noise
            # Salt and pepper
            noisy_image = random_noise(noisy_image, mode='s&p', amount=0.02)
            # Add some blur
            kernel_size = np.random.choice([3, 5])
            noisy_image = cv2.GaussianBlur(noisy_image, (kernel_size, kernel_size), 0)

        # Clip values to [0, 1]
        noisy_image = np.clip(noisy_image, 0, 1)
        return noisy_image.astype(np.float32)

    def generate_clean_image(self):
        """Generate a clean synthetic image (document-like)"""
        img = np.ones((self.img_height, self.img_width), dtype=np.float32)

        # Add random text-like patterns
        num_lines = np.random.randint(5, 15)
        for _ in range(num_lines):
            y = np.random.randint(10, self.img_height - 10)
            x_start = np.random.randint(10, 40)
            x_end = np.random.randint(self.img_width - 40, self.img_width - 10)
            thickness = np.random.randint(1, 3)
            color = np.random.uniform(0.0, 0.3)
            cv2.line(img, (x_start, y), (x_end, y), color, thickness)

        # Add random rectangles (like text blocks)
        num_rects = np.random.randint(3, 8)
        for _ in range(num_rects):
            x1 = np.random.randint(5, self.img_width // 2)
            y1 = np.random.randint(5, self.img_height // 2)
            x2 = x1 + np.random.randint(20, 60)
            y2 = y1 + np.random.randint(10, 30)
            color = np.random.uniform(0.0, 0.4)
            cv2.rectangle(img, (x1, y1), (x2, y2), color, -1)

        # Add random circles (like dots)
        num_circles = np.random.randint(5, 15)
        for _ in range(num_circles):
            x = np.random.randint(0, self.img_width)
            y = np.random.randint(0, self.img_height)
            radius = np.random.randint(1, 4)
            color = np.random.uniform(0.0, 0.3)
            cv2.circle(img, (x, y), radius, color, -1)

        # Apply slight blur for realism
        img = cv2.GaussianBlur(img, (3, 3), 0)

        return img

    def generate_dataset(self, num_samples):
        """Generate dataset of clean and noisy image pairs"""
        clean_images = []
        noisy_images = []

        noise_types = ['gaussian', 'salt_pepper', 'speckle', 'combined']

        print(f"Generating {num_samples} synthetic image pairs...")
        for i in range(num_samples):
            if i % 100 == 0:
                print(f"Generated {i}/{num_samples} images")

            # Generate clean image
            clean_img = self.generate_clean_image()

            # Add noise with random type
            noise_type = np.random.choice(noise_types)
            noisy_img = self.add_noise(clean_img, noise_type)

            clean_images.append(clean_img)
            noisy_images.append(noisy_img)

        print(f"Successfully generated {len(clean_images)} image pairs")

        # Convert to numpy arrays and add channel dimension
        clean_images = np.array(clean_images)[..., np.newaxis]
        noisy_images = np.array(noisy_images)[..., np.newaxis]

        return noisy_images, clean_images


class DenoisingAutoencoder:
    """Convolutional Autoencoder for image denoising"""

    def __init__(self, input_shape=(128, 128, 1)):
        self.input_shape = input_shape
        self.model = self.build_model()

    def build_model(self):
        """Build the autoencoder architecture"""

        # Encoder
        inputs = layers.Input(shape=self.input_shape)

        # Encoding layers
        x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
        x = layers.BatchNormalization()(x)
        x = layers.MaxPooling2D((2, 2), padding='same')(x)

        x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
        x = layers.BatchNormalization()(x)
        x = layers.MaxPooling2D((2, 2), padding='same')(x)

        x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
        x = layers.BatchNormalization()(x)
        encoded = layers.MaxPooling2D((2, 2), padding='same')(x)

        # Decoder
        x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(encoded)
        x = layers.BatchNormalization()(x)
        x = layers.UpSampling2D((2, 2))(x)

        x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
        x = layers.BatchNormalization()(x)
        x = layers.UpSampling2D((2, 2))(x)

        x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x)
        x = layers.BatchNormalization()(x)
        x = layers.UpSampling2D((2, 2))(x)

        # Output layer
        decoded = layers.Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x)

        # Create model
        autoencoder = models.Model(inputs, decoded, name='denoising_autoencoder')

        return autoencoder

    def compile_model(self, learning_rate=0.001):
        """Compile the model with optimizer and loss"""
        self.model.compile(
            optimizer=keras.optimizers.Adam(learning_rate=learning_rate),
            loss='mse',
            metrics=['mae']
        )

    def summary(self):
        """Print model summary"""
        return self.model.summary()


def plot_training_history(history):
    """Plot training and validation loss"""
    plt.figure(figsize=(14, 5))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss', linewidth=2)
    plt.plot(history.history['val_loss'], label='Validation Loss', linewidth=2)
    plt.title('Model Loss Over Epochs', fontsize=14, fontweight='bold')
    plt.xlabel('Epoch', fontsize=12)
    plt.ylabel('Loss (MSE)', fontsize=12)
    plt.legend(fontsize=10)
    plt.grid(True, alpha=0.3)

    plt.subplot(1, 2, 2)
    plt.plot(history.history['mae'], label='Training MAE', linewidth=2)
    plt.plot(history.history['val_mae'], label='Validation MAE', linewidth=2)
    plt.title('Model MAE Over Epochs', fontsize=14, fontweight='bold')
    plt.xlabel('Epoch', fontsize=12)
    plt.ylabel('MAE', fontsize=12)
    plt.legend(fontsize=10)
    plt.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.savefig(os.path.join(OUTPUT_DIR, 'training_history.png'), dpi=300, bbox_inches='tight')
    plt.show()


def visualize_results(noisy_images, clean_images, denoised_images, num_samples=5):
    """Visualize noisy, clean, and denoised images"""
    plt.figure(figsize=(15, 3 * num_samples))

    for i in range(num_samples):
        # Noisy image
        plt.subplot(num_samples, 3, i * 3 + 1)
        plt.imshow(noisy_images[i].squeeze(), cmap='gray', vmin=0, vmax=1)
        plt.title('Noisy Input', fontsize=12, fontweight='bold')
        plt.axis('off')

        # Denoised image
        plt.subplot(num_samples, 3, i * 3 + 2)
        plt.imshow(denoised_images[i].squeeze(), cmap='gray', vmin=0, vmax=1)
        plt.title('Denoised Output', fontsize=12, fontweight='bold')
        plt.axis('off')

        # Clean (ground truth) image
        plt.subplot(num_samples, 3, i * 3 + 3)
        plt.imshow(clean_images[i].squeeze(), cmap='gray', vmin=0, vmax=1)
        plt.title('Clean Ground Truth', fontsize=12, fontweight='bold')
        plt.axis('off')

    plt.tight_layout()
    plt.savefig(os.path.join(OUTPUT_DIR, 'denoising_results.png'), dpi=300, bbox_inches='tight')
    plt.show()


def calculate_psnr(original, denoised):
    """Calculate Peak Signal-to-Noise Ratio"""
    mse = np.mean((original - denoised) ** 2)
    if mse == 0:
        return float('inf')
    max_pixel = 1.0
    psnr = 20 * np.log10(max_pixel / np.sqrt(mse))
    return psnr


def calculate_ssim(original, denoised):
    """Calculate Structural Similarity Index"""
    from skimage.metrics import structural_similarity as ssim
    return ssim(original.squeeze(), denoised.squeeze(), data_range=1.0)


def evaluate_model(clean_images, denoised_images):
    """Evaluate model performance using PSNR, SSIM and MSE"""
    psnr_values = []
    ssim_values = []
    mse_values = []

    for i in range(len(clean_images)):
        psnr = calculate_psnr(clean_images[i], denoised_images[i])
        ssim_val = calculate_ssim(clean_images[i], denoised_images[i])
        mse = np.mean((clean_images[i] - denoised_images[i]) ** 2)

        psnr_values.append(psnr)
        ssim_values.append(ssim_val)
        mse_values.append(mse)

    print(f"\n{'='*60}")
    print(f"EVALUATION METRICS")
    print(f"{'='*60}")
    print(f"Average PSNR: {np.mean(psnr_values):.2f} dB (Higher is better)")
    print(f"Average SSIM: {np.mean(ssim_values):.4f} (Range: 0-1, closer to 1 is better)")
    print(f"Average MSE:  {np.mean(mse_values):.6f} (Lower is better)")
    print(f"{'='*60}\n")

    return psnr_values, ssim_values, mse_values


def save_sample_images(noisy_images, denoised_images, clean_images, num_samples=10):
    """Save individual sample images for inspection"""
    samples_dir = os.path.join(OUTPUT_DIR, 'samples')
    os.makedirs(samples_dir, exist_ok=True)

    for i in range(min(num_samples, len(noisy_images))):
        # Save noisy
        noisy_img = (noisy_images[i].squeeze() * 255).astype(np.uint8)
        cv2.imwrite(os.path.join(samples_dir, f'sample_{i}_noisy.png'), noisy_img)

        # Save denoised
        denoised_img = (denoised_images[i].squeeze() * 255).astype(np.uint8)
        cv2.imwrite(os.path.join(samples_dir, f'sample_{i}_denoised.png'), denoised_img)

        # Save clean
        clean_img = (clean_images[i].squeeze() * 255).astype(np.uint8)
        cv2.imwrite(os.path.join(samples_dir, f'sample_{i}_clean.png'), clean_img)

    print(f"✓ Saved {num_samples} sample images to '{samples_dir}/'")


def main():
    """Main training pipeline"""

    print("\n" + "="*70)
    print(" " * 15 + "IMAGE DENOISING AUTOENCODER")
    print("="*70 + "\n")

    # 1. Generate Synthetic Data
    print("[STEP 1/6] Generating synthetic dataset...")
    print("-" * 70)
    data_generator = SyntheticDataGenerator(IMG_HEIGHT, IMG_WIDTH)
    noisy_images, clean_images = data_generator.generate_dataset(NUM_SAMPLES)

    print(f"\n✓ Dataset generated successfully!")
    print(f"  - Noisy images shape: {noisy_images.shape}")
    print(f"  - Clean images shape: {clean_images.shape}")

    # 2. Split data
    print(f"\n[STEP 2/6] Splitting dataset...")
    print("-" * 70)
    X_train, X_val, y_train, y_val = train_test_split(
        noisy_images, clean_images, test_size=0.15, random_state=42
    )

    print(f"✓ Data split completed!")
    print(f"  - Training samples: {len(X_train)}")
    print(f"  - Validation samples: {len(X_val)}")

    # 3. Build model
    print(f"\n[STEP 3/6] Building autoencoder model...")
    print("-" * 70)
    autoencoder = DenoisingAutoencoder(input_shape=(IMG_HEIGHT, IMG_WIDTH, 1))
    autoencoder.compile_model(learning_rate=LEARNING_RATE)

    print("\n✓ Model built successfully!")
    print("\nModel Architecture Summary:")
    autoencoder.summary()

    # 4. Train model
    print(f"\n[STEP 4/6] Training model...")
    print("-" * 70)
    print(f"Configuration:")
    print(f"  - Epochs: {EPOCHS}")
    print(f"  - Batch Size: {BATCH_SIZE}")
    print(f"  - Learning Rate: {LEARNING_RATE}")
    print(f"  - Optimizer: Adam")
    print(f"  - Loss Function: MSE")
    print("-" * 70 + "\n")

    # Callbacks
    callbacks = [
        keras.callbacks.ModelCheckpoint(
            os.path.join(OUTPUT_DIR, 'best_denoising_model.keras'),
            save_best_only=True,
            monitor='val_loss',
            verbose=1
        ),
        keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=15,
            restore_best_weights=True,
            verbose=1
        ),
        keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=7,
            min_lr=1e-7,
            verbose=1
        )
    ]

    history = autoencoder.model.fit(
        X_train, y_train,
        validation_data=(X_val, y_val),
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        callbacks=callbacks,
        verbose=1
    )

    # 5. Evaluate model
    print(f"\n[STEP 5/6] Evaluating model performance...")
    print("-" * 70)

    # Plot training history
    plot_training_history(history)

    # Generate predictions
    print("Generating predictions on validation set...")
    denoised_val = autoencoder.model.predict(X_val, verbose=0)

    # Calculate metrics
    evaluate_model(y_val, denoised_val)

    # 6. Visualize and save results
    print(f"[STEP 6/6] Saving results...")
    print("-" * 70)

    # Visualize results
    visualize_results(X_val, y_val, denoised_val, num_samples=5)

    # Save sample images
    save_sample_images(X_val, denoised_val, y_val, num_samples=10)

    # Save final model
    final_model_path = os.path.join(OUTPUT_DIR, 'final_denoising_model.keras')
    autoencoder.model.save(final_model_path)

    # Save model architecture diagram
    try:
        keras.utils.plot_model(
            autoencoder.model,
            to_file=os.path.join(OUTPUT_DIR, 'model_architecture.png'),
            show_shapes=True,
            show_layer_names=True,
            dpi=150
        )
        print(f"✓ Model architecture diagram saved")
    except:
        print("  (Model architecture diagram not saved - graphviz may not be installed)")

    print(f"\n{'='*70}")
    print("TRAINING COMPLETED SUCCESSFULLY!")
    print(f"{'='*70}")
    print(f"\nAll outputs saved to '{OUTPUT_DIR}/' directory:")
    print(f"  ✓ best_denoising_model.keras - Best model checkpoint")
    print(f"  ✓ final_denoising_model.keras - Final trained model")
    print(f"  ✓ training_history.png - Training/validation curves")
    print(f"  ✓ denoising_results.png - Visual comparison results")
    print(f"  ✓ samples/ - Individual sample images")
    print(f"\n{'='*70}\n")

    return autoencoder, history


def denoise_custom_image(model_path, image_path, output_path=None):
    """Use trained model to denoise a custom image"""

    print(f"\nDenoising image: {image_path}")

    # Load model
    model = keras.models.load_model(model_path)

    # Load and preprocess image
    img = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print(f"Error: Could not load image from {image_path}")
        return

    original_shape = img.shape
    img_resized = cv2.resize(img, (IMG_WIDTH, IMG_HEIGHT))
    img_normalized = img_resized.astype('float32') / 255.0
    img_input = img_normalized[np.newaxis, ..., np.newaxis]

    # Denoise
    print("Processing...")
    denoised = model.predict(img_input, verbose=0)
    denoised_img = (denoised[0].squeeze() * 255).astype(np.uint8)

    # Resize back to original dimensions
    denoised_img = cv2.resize(denoised_img, (original_shape[1], original_shape[0]))

    # Save result
    if output_path is None:
        output_path = os.path.join(OUTPUT_DIR, 'denoised_custom.png')

    cv2.imwrite(output_path, denoised_img)

    # Visualize
    plt.figure(figsize=(14, 5))

    plt.subplot(1, 2, 1)
    plt.imshow(img, cmap='gray')
    plt.title('Original Noisy Image', fontsize=14, fontweight='bold')
    plt.axis('off')

    plt.subplot(1, 2, 2)
    plt.imshow(denoised_img, cmap='gray')
    plt.title('Denoised Output', fontsize=14, fontweight='bold')
    plt.axis('off')

    plt.tight_layout()
    comparison_path = output_path.replace('.png', '_comparison.png')
    plt.savefig(comparison_path, dpi=300, bbox_inches='tight')
    plt.show()

    print(f"✓ Denoised image saved to: {output_path}")
    print(f"✓ Comparison saved to: {comparison_path}")


if __name__ == "__main__":
    # Train the model
    autoencoder, history = main()

    # Example: Denoise a custom image (uncomment and modify path to use)
    # denoise_custom_image(
    #     os.path.join(OUTPUT_DIR, 'final_denoising_model.keras'),
    #     'path/to/your/noisy/image.png',
    #     'path/to/output/denoised.png'
    # )


               IMAGE DENOISING AUTOENCODER

[STEP 1/6] Generating synthetic dataset...
----------------------------------------------------------------------
Generating 2000 synthetic image pairs...
Generated 0/2000 images
Generated 100/2000 images
Generated 200/2000 images
Generated 300/2000 images
Generated 400/2000 images
Generated 500/2000 images
Generated 600/2000 images
Generated 700/2000 images
Generated 800/2000 images
Generated 900/2000 images
Generated 1000/2000 images
Generated 1100/2000 images
Generated 1200/2000 images
Generated 1300/2000 images
Generated 1400/2000 images
Generated 1500/2000 images
Generated 1600/2000 images
Generated 1700/2000 images
Generated 1800/2000 images
Generated 1900/2000 images
Successfully generated 2000 image pairs

✓ Dataset generated successfully!
  - Noisy images shape: (2000, 128, 128, 1)
  - Clean images shape: (2000, 128, 128, 1)

[STEP 2/6] Splitting dataset...
----------------------------------------------------------------------
✓ Dat


[STEP 4/6] Training model...
----------------------------------------------------------------------
Configuration:
  - Epochs: 50
  - Batch Size: 32
  - Learning Rate: 0.001
  - Optimizer: Adam
  - Loss Function: MSE
----------------------------------------------------------------------

Epoch 1/50
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4s/step - loss: 0.0613 - mae: 0.1738
Epoch 1: val_loss improved from inf to 0.11164, saving model to denoised_output/best_denoising_model.keras
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m209s[0m 4s/step - loss: 0.0608 - mae: 0.1729 - val_loss: 0.1116 - val_mae: 0.2419 - learning_rate: 0.0010
Epoch 2/50
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3s/step - loss: 0.0122 - mae: 0.0780
Epoch 2: val_loss did not improve from 0.11164
[1m54/54[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m188s[0m 3s/step - loss: 0.0122 - mae: 0.0779 - val_loss: 0.1158 - val_mae: 0.2141 - learning_rate: 0.0010
Epoc