In [None]:
# Essential imports for CNN exploration
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import cv2
from skimage import data, filters, feature
import seaborn as sns
from scipy import ndimage, signal
import warnings
warnings.filterwarnings('ignore')

# Set style for better visualizations
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

print("🚀 Ready to explore CNNs!")
print(f"TensorFlow version: {tf.__version__}")
print(f"OpenCV version: {cv2.__version__}")

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)


In [None]:
# Let's implement convolution from scratch to understand the operation
def conv2d_manual(image, kernel, stride=1, padding=0):
    """
    Manual implementation of 2D convolution
    
    Args:
        image: Input image (2D numpy array)
        kernel: Convolution kernel/filter (2D numpy array)
        stride: Stride for convolution
        padding: Padding size
    
    Returns:
        Convolved feature map
    """
    # Add padding if specified
    if padding > 0:
        image = np.pad(image, padding, mode='constant', constant_values=0)
    
    # Get dimensions
    img_h, img_w = image.shape
    kernel_h, kernel_w = kernel.shape
    
    # Calculate output dimensions
    out_h = (img_h - kernel_h) // stride + 1
    out_w = (img_w - kernel_w) // stride + 1
    
    # Initialize output
    output = np.zeros((out_h, out_w))
    
    # Perform convolution
    for i in range(0, out_h):
        for j in range(0, out_w):
            # Extract region of interest
            h_start = i * stride
            h_end = h_start + kernel_h
            w_start = j * stride
            w_end = w_start + kernel_w
            
            # Element-wise multiplication and sum
            output[i, j] = np.sum(image[h_start:h_end, w_start:w_end] * kernel)
    
    return output

# Create a sample image and demonstrate convolution
np.random.seed(42)
sample_image = np.random.rand(8, 8)

# Define common computer vision kernels
kernels = {
    'Edge Detection (Sobel X)': np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]),
    'Edge Detection (Sobel Y)': np.array([[-1, -2, -1], [0, 0, 0], [1, 2, 1]]),
    'Blur (Average)': np.ones((3, 3)) / 9,
    'Sharpen': np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]),
    'Identity': np.array([[0, 0, 0], [0, 1, 0], [0, 0, 0]])
}

print("🔍 Manual Convolution Demonstration:")
print("=" * 50)
print(f"Original image shape: {sample_image.shape}")

# Apply different kernels
fig, axes = plt.subplots(2, 3, figsize=(15, 10))
fig.suptitle('Convolution with Different Kernels', fontsize=16, fontweight='bold')

# Show original image
axes[0, 0].imshow(sample_image, cmap='gray')
axes[0, 0].set_title('Original Image')
axes[0, 0].axis('off')

# Apply each kernel
for idx, (name, kernel) in enumerate(list(kernels.items())[:5]):
    row = (idx + 1) // 3
    col = (idx + 1) % 3
    
    # Perform convolution
    convolved = conv2d_manual(sample_image, kernel, padding=1)
    
    # Display result
    axes[row, col].imshow(convolved, cmap='gray')
    axes[row, col].set_title(f'{name}\nOutput: {convolved.shape}')
    axes[row, col].axis('off')
    
    print(f"{name}: {sample_image.shape} -> {convolved.shape}")

plt.tight_layout()
plt.show()

print("\n📊 Key Observations:")
print("• Convolution creates feature maps that highlight specific patterns")
print("• Different kernels detect different features (edges, blur, etc.)")
print("• Output size depends on input size, kernel size, stride, and padding")
print("• Each kernel acts as a feature detector")


In [None]:
# Implement pooling operations from scratch
def max_pool2d(feature_map, pool_size=2, stride=2):
    """
    Manual implementation of max pooling
    
    Args:
        feature_map: Input feature map (2D array)
        pool_size: Size of pooling window
        stride: Stride for pooling
    
    Returns:
        Pooled feature map
    """
    h, w = feature_map.shape
    out_h = (h - pool_size) // stride + 1
    out_w = (w - pool_size) // stride + 1
    
    output = np.zeros((out_h, out_w))
    
    for i in range(out_h):
        for j in range(out_w):
            h_start = i * stride
            h_end = h_start + pool_size
            w_start = j * stride
            w_end = w_start + pool_size
            
            # Take maximum value in the window
            output[i, j] = np.max(feature_map[h_start:h_end, w_start:w_end])
    
    return output

def avg_pool2d(feature_map, pool_size=2, stride=2):
    """
    Manual implementation of average pooling
    """
    h, w = feature_map.shape
    out_h = (h - pool_size) // stride + 1
    out_w = (w - pool_size) // stride + 1
    
    output = np.zeros((out_h, out_w))
    
    for i in range(out_h):
        for j in range(out_w):
            h_start = i * stride
            h_end = h_start + pool_size
            w_start = j * stride
            w_end = w_start + pool_size
            
            # Take average value in the window
            output[i, j] = np.mean(feature_map[h_start:h_end, w_start:w_end])
    
    return output

# Build a complete CNN from scratch using TensorFlow
def create_simple_cnn(input_shape, num_classes):
    """
    Create a simple CNN architecture
    
    Args:
        input_shape: Shape of input images (height, width, channels)
        num_classes: Number of output classes
    
    Returns:
        Compiled Keras model
    """
    model = keras.Sequential([
        # First convolutional block
        layers.Conv2D(32, (3, 3), input_shape=input_shape, name='conv1'),
        layers.Activation('relu', name='relu1'),
        layers.MaxPooling2D((2, 2), name='pool1'),
        
        # Second convolutional block
        layers.Conv2D(64, (3, 3), name='conv2'),
        layers.Activation('relu', name='relu2'),
        layers.MaxPooling2D((2, 2), name='pool2'),
        
        # Third convolutional block
        layers.Conv2D(128, (3, 3), name='conv3'),
        layers.Activation('relu', name='relu3'),
        layers.MaxPooling2D((2, 2), name='pool3'),
        
        # Flatten and dense layers
        layers.Flatten(name='flatten'),
        layers.Dense(512, name='dense1'),
        layers.Activation('relu', name='relu4'),
        layers.Dropout(0.5, name='dropout'),
        layers.Dense(num_classes, activation='softmax', name='output')
    ])
    
    return model

# Demonstrate pooling operations
# Create a sample feature map with clear patterns
test_feature_map = np.array([
    [1, 3, 2, 4, 1, 2, 3, 1],
    [2, 8, 7, 1, 4, 5, 2, 3],
    [3, 2, 9, 6, 2, 1, 4, 2],
    [1, 4, 3, 8, 3, 6, 1, 4],
    [4, 1, 2, 5, 7, 2, 8, 1],
    [2, 6, 4, 1, 2, 9, 3, 5],
    [3, 2, 7, 8, 1, 4, 6, 2],
    [1, 5, 2, 3, 4, 1, 2, 7]
], dtype=float)

# Apply pooling operations
max_pooled = max_pool2d(test_feature_map, pool_size=2, stride=2)
avg_pooled = avg_pool2d(test_feature_map, pool_size=2, stride=2)

# Visualize pooling effects
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
fig.suptitle('Pooling Operations Comparison', fontsize=16, fontweight='bold')

# Original feature map
im1 = axes[0].imshow(test_feature_map, cmap='viridis', interpolation='nearest')
axes[0].set_title(f'Original Feature Map\n{test_feature_map.shape}')
axes[0].set_xlabel('Width')
axes[0].set_ylabel('Height')
plt.colorbar(im1, ax=axes[0])

# Add grid lines for clarity
for i in range(test_feature_map.shape[0] + 1):
    axes[0].axhline(i - 0.5, color='white', linewidth=0.5)
for j in range(test_feature_map.shape[1] + 1):
    axes[0].axvline(j - 0.5, color='white', linewidth=0.5)

# Max pooled
im2 = axes[1].imshow(max_pooled, cmap='viridis', interpolation='nearest')
axes[1].set_title(f'Max Pooled\n{max_pooled.shape}')
axes[1].set_xlabel('Width')
axes[1].set_ylabel('Height')
plt.colorbar(im2, ax=axes[1])

# Average pooled
im3 = axes[2].imshow(avg_pooled, cmap='viridis', interpolation='nearest')
axes[2].set_title(f'Average Pooled\n{avg_pooled.shape}')
axes[2].set_xlabel('Width')
axes[2].set_ylabel('Height')
plt.colorbar(im3, ax=axes[2])

plt.tight_layout()
plt.show()

# Create and display CNN architecture
model = create_simple_cnn((32, 32, 3), 10)  # CIFAR-10 like input
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

print("\n🏗️ CNN Architecture Summary:")
print("=" * 50)
model.summary()

print("\n📊 Pooling Results:")
print(f"Original size: {test_feature_map.shape}")
print(f"Max pooled size: {max_pooled.shape}")
print(f"Average pooled size: {avg_pooled.shape}")
print(f"Size reduction: {test_feature_map.size} -> {max_pooled.size} ({max_pooled.size/test_feature_map.size:.1%})")

print("\n🔍 Key Insights:")
print("• Max pooling preserves strongest activations (important features)")
print("• Average pooling provides smoother representation")
print("• Both reduce spatial dimensions while preserving important information")
print("• Pooling provides translation invariance")
