# Computer Vision

## Learning Objectives
By the end of this lesson, you will be able to:
- Understand how computers process images
- Build image classification models
- Use convolutional neural networks (CNNs)
- Apply pre-trained models to real problems

## Core Concepts
- **Pixel**: Individual dots that make up an image (like tiny colored squares)
- **CNN**: Neural network designed specifically for images
- **Feature Map**: Patterns the network learns to recognize (edges, shapes)
- **Transfer Learning**: Using pre-trained models for new tasks
- **Classification**: Determining what object is in an image

## 1. Understanding Images as Data

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim
import warnings
warnings.filterwarnings('ignore')

print("👁️ COMPUTER VISION FUNDAMENTALS")

# Understanding images as numbers
print("📸 IMAGES AS DATA:")

# Create a simple 8x8 image (like a tiny digit)
simple_image = np.array([
    [0, 0, 1, 1, 1, 1, 0, 0],
    [0, 1, 1, 0, 0, 1, 1, 0],
    [1, 1, 0, 0, 0, 0, 1, 1],
    [1, 0, 0, 0, 0, 0, 0, 1],
    [1, 0, 0, 0, 0, 0, 0, 1],
    [1, 1, 0, 0, 0, 0, 1, 1],
    [0, 1, 1, 0, 0, 1, 1, 0],
    [0, 0, 1, 1, 1, 1, 0, 0]
])

print(f"Image as numbers:")
print(simple_image)
print(f"Shape: {simple_image.shape} (8 rows, 8 columns)")

# Visualize the image
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 4))

ax1.imshow(simple_image, cmap='gray')
ax1.set_title('As Image')
ax1.axis('off')

ax2.imshow(simple_image, cmap='gray')
ax2.set_title('With Pixel Values')
for i in range(8):
    for j in range(8):
        ax2.text(j, i, simple_image[i, j], ha='center', va='center', color='red')

plt.tight_layout()
plt.show()

# Color images have 3 channels (Red, Green, Blue)
print(f"\n🌈 COLOR IMAGES:")
color_pixel = np.array([255, 0, 0])  # Pure red
print(f"Red pixel: R={color_pixel[0]}, G={color_pixel[1]}, B={color_pixel[2]}")

# Create sample colored image (3x3 pixels, 3 colors)
color_image = np.random.randint(0, 256, (3, 3, 3))
print(f"Color image shape: {color_image.shape} (height, width, colors)")

# Generate synthetic image dataset for classification
print(f"\n🎯 CREATING IMAGE DATASET:")

def create_simple_shapes(n_samples=100):
    """Create simple shape images for classification"""
    images = []
    labels = []
    
    for i in range(n_samples):
        # Create 16x16 image
        img = np.zeros((16, 16))
        
        if i % 2 == 0:  # Circle
            center = (8, 8)
            radius = 4
            y, x = np.ogrid[:16, :16]
            mask = (x - center[0])**2 + (y - center[1])**2 <= radius**2
            img[mask] = 1
            labels.append(0)  # Circle = 0
        else:  # Square
            img[4:12, 4:12] = 1
            labels.append(1)  # Square = 1
        
        images.append(img.flatten())  # Flatten to 1D array
    
    return np.array(images), np.array(labels)

# Create dataset
X_images, y_shapes = create_simple_shapes(200)
print(f"Created {len(X_images)} images")
print(f"Image shape: {X_images[0].shape} pixels")
print(f"Classes: 0=Circle, 1=Square")

# Visualize sample images
fig, axes = plt.subplots(2, 4, figsize=(12, 6))
for i in range(8):
    row = i // 4
    col = i % 4
    
    image = X_images[i].reshape(16, 16)
    label = y_shapes[i]
    shape = "Circle" if label == 0 else "Square"
    
    axes[row, col].imshow(image, cmap='gray')
    axes[row, col].set_title(f'{shape}')
    axes[row, col].axis('off')

plt.tight_layout()
plt.show()

print(f"✅ Images are just arrays of numbers")
print(f"✅ Grayscale: 2D array (height × width)")
print(f"✅ Color: 3D array (height × width × channels)")
print(f"✅ Each pixel value represents brightness (0-255)")

## 2. Convolutional Neural Networks (CNNs)

## 3. Image Classification Project

# Practice Exercises

In [None]:
# Build a Convolutional Neural Network
print("🧠 BUILDING A CNN FOR IMAGE CLASSIFICATION")

# Convert data to PyTorch tensors
X_tensor = torch.FloatTensor(X_images).reshape(-1, 1, 16, 16)  # (batch, channels, height, width)
y_tensor = torch.LongTensor(y_shapes)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X_tensor, y_tensor, test_size=0.2, random_state=42)

print(f"Training data: {X_train.shape}")
print(f"Test data: {X_test.shape}")

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        # Convolutional layers
        self.conv1 = nn.Conv2d(1, 8, kernel_size=3, padding=1)  # 1 input channel, 8 output channels
        self.conv2 = nn.Conv2d(8, 16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)  # Reduce size by half
        
        # Dense layers
        self.fc1 = nn.Linear(16 * 4 * 4, 32)  # After pooling: 16x16 -> 8x8 -> 4x4
        self.fc2 = nn.Linear(32, 2)  # 2 classes (circle, square)
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
    
    def forward(self, x):
        # Convolutional layers
        x = self.pool(self.relu(self.conv1(x)))  # 16x16 -> 8x8
        x = self.pool(self.relu(self.conv2(x)))  # 8x8 -> 4x4
        
        # Flatten for dense layers
        x = x.view(-1, 16 * 4 * 4)
        
        # Dense layers
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x

# Create and train model
model = SimpleCNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

print(f"Model architecture: {model}")

# Training loop
print(f"\n🔥 TRAINING CNN:")
losses = []
accuracies = []

for epoch in range(30):
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    
    # Backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    # Calculate accuracy
    _, predicted = torch.max(outputs.data, 1)
    accuracy = (predicted == y_train).float().mean()
    
    losses.append(loss.item())
    accuracies.append(accuracy.item())
    
    if epoch % 10 == 0:
        print(f"Epoch {epoch}: Loss = {loss.item():.4f}, Accuracy = {accuracy:.3f}")

# Test the model
model.eval()
with torch.no_grad():
    test_outputs = model(X_test)
    _, test_predictions = torch.max(test_outputs.data, 1)
    test_accuracy = (test_predictions == y_test).float().mean()

print(f"\nTest accuracy: {test_accuracy:.3f} ({test_accuracy*100:.1f}%)")

# Visualize results
fig, axes = plt.subplots(2, 3, figsize=(15, 8))

# Training progress
axes[0, 0].plot(losses)
axes[0, 0].set_title('Training Loss')
axes[0, 0].set_xlabel('Epoch')

axes[0, 1].plot(accuracies)
axes[0, 1].set_title('Training Accuracy')
axes[0, 1].set_xlabel('Epoch')

# Sample predictions
for i in range(4):
    row = (i // 2) + (1 if i >= 2 else 0)
    col = (i % 2) + (2 if i < 2 else 0)
    if col > 2: continue
    
    image = X_test[i].squeeze().numpy()
    actual = y_test[i].item()
    predicted = test_predictions[i].item()
    
    actual_label = "Circle" if actual == 0 else "Square"
    pred_label = "Circle" if predicted == 0 else "Square"
    color = 'green' if actual == predicted else 'red'
    
    axes[row, col].imshow(image, cmap='gray')
    axes[row, col].set_title(f'Actual: {actual_label}\nPredicted: {pred_label}', color=color)
    axes[row, col].axis('off')

plt.tight_layout()
plt.show()

# Practice Exercises
print(f"\n📚 PRACTICE EXERCISES:")

# Exercise 1: Different shapes
print(f"\nExercise 1: Add Triangle Shape")
print("Modify the dataset to include triangles (3 classes total)")

# Exercise 2: Real images
print(f"\nExercise 2: Real Image Classification")
print("Try the CIFAR-10 dataset with 10 real object classes")

# Exercise 3: Data augmentation
print(f"\nExercise 3: Data Augmentation")
print("Apply rotations, flips, and zoom to increase dataset size")

# Exercise 4: Transfer learning
print(f"\nExercise 4: Transfer Learning")
print("Use a pre-trained model (ResNet, VGG) for better accuracy")

# Exercise 5: Object detection
print(f"\nExercise 5: Object Detection")
print("Find and locate multiple objects in the same image")

# Key insights
print(f"\n💡 KEY INSIGHTS:")
print(f"✅ CNNs use convolution to detect features (edges, shapes)")
print(f"✅ Pooling reduces image size while keeping important info")
print(f"✅ Multiple layers learn increasingly complex patterns")
print(f"✅ Data augmentation helps prevent overfitting")
print(f"✅ Transfer learning saves time and improves accuracy")

print(f"\n🚀 NEXT STEPS:")
print(f"1. Try larger, real image datasets (CIFAR-10, ImageNet)")
print(f"2. Use pre-trained models (ResNet, EfficientNet)")
print(f"3. Learn object detection and segmentation")
print(f"4. Experiment with GANs for image generation")
print(f"5. Apply computer vision to real business problems")

## Real-time Object Detection

Build an object detection system that can process images and video streams.

In [None]:
"""
COMPLETE COMPUTER VISION TUTORIAL
=================================

This tutorial teaches you computer vision from the ground up.
We'll work with images step by step and build real applications.
"""

# =============================================================================
# PART 1: UNDERSTANDING IMAGES AS DATA
# =============================================================================

print("PART 1: HOW COMPUTERS SEE IMAGES")
print("=" * 50)

# Create a simple image to understand the basics
import numpy as np
import matplotlib.pyplot as plt

# Create a simple 8x8 grayscale image (like a tiny digit)
simple_image = np.array([
    [0, 0, 1, 1, 1, 1, 0, 0],
    [0, 1, 1, 1, 1, 1, 1, 0],
    [1, 1, 1, 0, 0, 1, 1, 1],
    [1, 1, 0, 0, 0, 0, 1, 1],
    [1, 1, 0, 0, 0, 0, 1, 1],
    [1, 1, 1, 0, 0, 1, 1, 1],
    [0, 1, 1, 1, 1, 1, 1, 0],
    [0, 0, 1, 1, 1, 1, 0, 0]
])

print("Simple 8x8 image as numbers:")
print(simple_image)
print(f"Image shape: {simple_image.shape}")
print(f"Data type: {simple_image.dtype}")
print(f"Min value: {simple_image.min()}, Max value: {simple_image.max()}")

# Visualize the image
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))

# Show as numbers
ax1.imshow(simple_image, cmap='gray')
ax1.set_title('Image as Pixels')
ax1.set_xlabel('Column (x)')
ax1.set_ylabel('Row (y)')

# Add text annotations showing pixel values
for i in range(8):
    for j in range(8):
        ax1.text(j, i, str(simple_image[i, j]), ha='center', va='center', 
                color='red' if simple_image[i, j] == 0 else 'white', fontsize=8)

# Show as heatmap with values
im = ax2.imshow(simple_image, cmap='gray')
ax2.set_title('Same Image (Grayscale)')
plt.colorbar(im, ax=ax2)

plt.tight_layout()
plt.show()

print("\nKey Insights:")
print("• Images are just 2D arrays of numbers")
print("• Each number represents pixel intensity (0=black, 1=white)")
print("• Computer vision = finding patterns in these numbers")

# =============================================================================
# PART 2: COLOR IMAGES (RGB)
# =============================================================================

print("\n\nPART 2: COLOR IMAGES - RED, GREEN, BLUE CHANNELS")
print("=" * 50)

# Create a simple 4x4 color image
color_image = np.zeros((4, 4, 3), dtype=np.uint8)  # 4x4 pixels, 3 color channels

# Make some colored pixels
color_image[0, 0] = [255, 0, 0]    # Red pixel
color_image[0, 1] = [0, 255, 0]    # Green pixel  
color_image[0, 2] = [0, 0, 255]    # Blue pixel
color_image[0, 3] = [255, 255, 0]  # Yellow (Red + Green)

color_image[1, 0] = [255, 0, 255]  # Magenta (Red + Blue)
color_image[1, 1] = [0, 255, 255]  # Cyan (Green + Blue)
color_image[1, 2] = [255, 255, 255] # White (All colors)
color_image[1, 3] = [128, 128, 128] # Gray

print(f"Color image shape: {color_image.shape}")
print(f"Red channel (top-left pixel): {color_image[0, 0, 0]}")
print(f"Green channel (top-left pixel): {color_image[0, 0, 1]}")
print(f"Blue channel (top-left pixel): {color_image[0, 0, 2]}")

# Visualize color channels
fig, axes = plt.subplots(2, 2, figsize=(10, 8))

# Original color image
axes[0, 0].imshow(color_image)
axes[0, 0].set_title('Original Color Image')

# Red channel only
red_channel = color_image.copy()
red_channel[:, :, [1, 2]] = 0  # Zero out green and blue
axes[0, 1].imshow(red_channel)
axes[0, 1].set_title('Red Channel Only')

# Green channel only
green_channel = color_image.copy()
green_channel[:, :, [0, 2]] = 0  # Zero out red and blue
axes[1, 0].imshow(green_channel)
axes[1, 0].set_title('Green Channel Only')

# Blue channel only
blue_channel = color_image.copy()
blue_channel[:, :, [0, 1]] = 0  # Zero out red and green
axes[1, 1].imshow(blue_channel)
axes[1, 1].set_title('Blue Channel Only')

for ax in axes.flat:
    ax.set_xticks([])
    ax.set_yticks([])

plt.tight_layout()
plt.show()

# =============================================================================
# PART 3: IMAGE OPERATIONS AND FILTERS
# =============================================================================

print("\n\nPART 3: IMAGE FILTERS - DETECTING FEATURES")
print("=" * 50)

# Create a larger test image with patterns
test_image = np.zeros((12, 12))

# Add vertical lines
test_image[:, 2] = 1
test_image[:, 5] = 1
test_image[:, 8] = 1

# Add horizontal lines
test_image[3, :] = 1
test_image[7, :] = 1

print("Test image with vertical and horizontal lines:")
plt.figure(figsize=(8, 6))
plt.imshow(test_image, cmap='gray')
plt.title('Test Image with Lines')
plt.colorbar()
plt.show()

# Define filters (kernels) for edge detection
def apply_filter(image, kernel):
    """Apply a filter/kernel to an image"""
    kernel_size = kernel.shape[0]
    pad = kernel_size // 2
    
    # Add padding to handle edges
    padded_image = np.pad(image, pad, mode='constant', constant_values=0)
    filtered_image = np.zeros_like(image)
    
    # Apply filter to each position
    for i in range(image.shape[0]):
        for j in range(image.shape[1]):
            # Extract region under kernel
            region = padded_image[i:i+kernel_size, j:j+kernel_size]
            # Apply kernel (element-wise multiply and sum)
            filtered_image[i, j] = np.sum(region * kernel)
    
    return filtered_image

# Edge detection kernels
vertical_edge_kernel = np.array([
    [-1, 0, 1],
    [-1, 0, 1],
    [-1, 0, 1]
])

horizontal_edge_kernel = np.array([
    [-1, -1, -1],
    [ 0,  0,  0],
    [ 1,  1,  1]
])

# Apply filters
vertical_edges = apply_filter(test_image, vertical_edge_kernel)
horizontal_edges = apply_filter(test_image, horizontal_edge_kernel)

# Visualize results
fig, axes = plt.subplots(2, 2, figsize=(12, 8))

axes[0, 0].imshow(test_image, cmap='gray')
axes[0, 0].set_title('Original Image')

axes[0, 1].imshow(vertical_edges, cmap='gray')
axes[0, 1].set_title('Vertical Edge Detection')

axes[1, 0].imshow(horizontal_edges, cmap='gray')  
axes[1, 0].set_title('Horizontal Edge Detection')

# Combined edges
combined_edges = np.sqrt(vertical_edges**2 + horizontal_edges**2)
axes[1, 1].imshow(combined_edges, cmap='gray')
axes[1, 1].set_title('Combined Edge Detection')

for ax in axes.flat:
    ax.set_xticks([])
    ax.set_yticks([])

plt.tight_layout()
plt.show()

print("Filter/Kernel Explanation:")
print("• Filters are small matrices that slide over the image")
print("• They detect specific features (edges, corners, textures)")
print("• Convolutional Neural Networks learn these filters automatically")

# =============================================================================
# PART 4: BUILDING A SIMPLE CONVOLUTIONAL NEURAL NETWORK
# =============================================================================

print("\n\nPART 4: CONVOLUTIONAL NEURAL NETWORK (CNN)")
print("=" * 50)

# Create a simple CNN for image classification
class SimpleCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(SimpleCNN, self).__init__()
        
        # Convolutional layers (feature extractors)
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, padding=1)  # Input: 1 channel, Output: 16
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1) # Input: 16, Output: 32
        
        # Pooling layer (reduces size)
        self.pool = nn.MaxPool2d(2, 2)
        
        # Fully connected layers
        self.fc1 = nn.Linear(32 * 7 * 7, 128)  # 32 channels * 7x7 size = 1568
        self.fc2 = nn.Linear(128, num_classes)
        
        # Activation and dropout
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        
    def forward(self, x):
        # Convolutional layers with pooling
        x = self.pool(self.relu(self.conv1(x)))  # 28x28 → 14x14
        x = self.pool(self.relu(self.conv2(x)))  # 14x14 → 7x7
        
        # Flatten for fully connected layers
        x = x.view(x.size(0), -1)  # Reshape to (batch_size, features)
        
        # Fully connected layers
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        
        return x

# Create and test the CNN
cnn = SimpleCNN(num_classes=10)
print("CNN Architecture:")
print(cnn)

# Test with fake image data (like MNIST: 28x28 grayscale images)
batch_size = 4
fake_images = torch.randn(batch_size, 1, 28, 28)  # 4 images, 1 channel, 28x28

print(f"\nInput shape: {fake_images.shape}")

# Forward pass through the network
cnn.eval()
with torch.no_grad():
    output = cnn(fake_images)

print(f"Output shape: {output.shape}")
print(f"Output (raw scores): {output[0]}")  # First image's scores

# Convert to probabilities
probabilities = F.softmax(output, dim=1)
predicted_classes = torch.argmax(probabilities, dim=1)

print(f"Probabilities for first image: {probabilities[0]}")
print(f"Predicted class for first image: {predicted_classes[0].item()}")

# =============================================================================
# PART 5: UNDERSTANDING CONVOLUTION STEP BY STEP
# =============================================================================

print("\n\nPART 5: UNDERSTANDING CONVOLUTION OPERATION")
print("=" * 50)

def visualize_convolution(image, kernel, title="Convolution"):
    """Visualize how convolution works step by step"""
    
    # Apply convolution
    result = apply_filter(image, kernel)
    
    # Create visualization
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
    
    # Original image
    axes[0].imshow(image, cmap='gray')
    axes[0].set_title('Input Image')
    axes[0].grid(True, alpha=0.3)
    
    # Kernel
    im1 = axes[1].imshow(kernel, cmap='RdBu', vmin=-1, vmax=1)
    axes[1].set_title('Filter/Kernel')
    plt.colorbar(im1, ax=axes[1])
    
    # Add values to kernel visualization
    for i in range(kernel.shape[0]):
        for j in range(kernel.shape[1]):
            axes[1].text(j, i, f'{kernel[i,j]:.1f}', ha='center', va='center')
    
    # Result
    im2 = axes[2].imshow(result, cmap='gray')
    axes[2].set_title(f'Output: {title}')
    plt.colorbar(im2, ax=axes[2])
    
    for ax in axes:
        ax.set_xticks([])
        ax.set_yticks([])
    
    plt.tight_layout()
    plt.show()
    
    return result

# Test different filters
filters = {
    'Vertical Edges': np.array([[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]]),
    'Horizontal Edges': np.array([[-1, -1, -1], [0, 0, 0], [1, 1, 1]]),
    'Blur': np.array([[1, 1, 1], [1, 1, 1], [1, 1, 1]]) / 9,
    'Sharpen': np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
}

# Create a more interesting test image
interesting_image = np.zeros((16, 16))
interesting_image[3:6, 2:14] = 1    # Horizontal bar
interesting_image[2:14, 8:11] = 1   # Vertical bar
interesting_image[10:13, 3:7] = 1   # Small rectangle

print("Testing different filters on the same image:")
for filter_name, kernel in filters.items():
    result = visualize_convolution(interesting_image, kernel, filter_name)

# =============================================================================
# PART 6: FEATURE MAPS AND WHAT CNNs LEARN
# =============================================================================

print("\n\nPART 6: WHAT CNNs LEARN - FEATURE MAPS")
print("=" * 50)

# Simulate what different layers of a CNN might detect
def create_feature_examples():
    """Create examples of features that CNN layers typically learn"""
    
    # Layer 1: Simple features (edges, corners)
    edge_detector = np.array([[-1, 0, 1], [-1, 0, 1], [-1, 0, 1]])
    corner_detector = np.array([[-1, -1, 0], [-1, 0, 1], [0, 1, 1]])
    
    # Layer 2: More complex patterns
    # (In real CNNs, these would be learned, not hand-crafted)
    
    # Create test patterns
    test_patterns = {
        'Vertical Line': np.zeros((8, 8)),
        'Corner': np.zeros((8, 8)),
        'Cross': np.zeros((8, 8)),
        'Circle': np.zeros((8, 8))
    }
    
    # Vertical line
    test_patterns['Vertical Line'][:, 4] = 1
    
    # Corner
    test_patterns['Corner'][2:6, 2] = 1
    test_patterns['Corner'][5, 2:6] = 1
    
    # Cross
    test_patterns['Cross'][:, 4] = 1
    test_patterns['Cross'][4, :] = 1
    
    # Circle (approximate)
    center = 4
    for i in range(8):
        for j in range(8):
            distance = np.sqrt((i - center)**2 + (j - center)**2)
            if 2 <= distance <= 3:
                test_patterns['Circle'][i, j] = 1
    
    return test_patterns

test_patterns = create_feature_examples()

# Visualize what different "neurons" detect
fig, axes = plt.subplots(2, 4, figsize=(16, 8))

for idx, (pattern_name, pattern) in enumerate(test_patterns.items()):
    row = idx // 2
    col = (idx % 2) * 2
    
    # Show original pattern
    axes[row, col].imshow(pattern, cmap='gray')
    axes[row, col].set_title(f'{pattern_name}')
    
    # Show edge detection response
    edge_response = apply_filter(pattern, edge_detector)
    axes[row, col + 1].imshow(edge_response, cmap='RdBu')
    axes[row, col + 1].set_title(f'{pattern_name} - Edge Response')

for ax in axes.flat:
    ax.set_xticks([])
    ax.set_yticks([])

plt.tight_layout()
plt.show()

print("CNN Feature Hierarchy:")
print("• Layer 1: Detects simple features (edges, corners)")
print("• Layer 2: Combines simple features into patterns (shapes)")
print("• Layer 3: Combines patterns into parts (eyes, wheels)")
print("• Layer 4: Combines parts into objects (faces, cars)")

# =============================================================================
# PART 7: TRANSFER LEARNING - USING PRE-TRAINED MODELS
# =============================================================================

print("\n\nPART 7: TRANSFER LEARNING CONCEPT")
print("=" * 50)

# Transfer learning: use models trained on large datasets for your task
print("Transfer Learning Process:")
print("1. Start with pre-trained model (trained on millions of images)")
print("2. Remove the final classification layer")
print("3. Add your own classification layer for your specific task")
print("4. Fine-tune on your data")

# Simulate transfer learning architecture
class TransferLearningModel(nn.Module):
    def __init__(self, num_classes_new_task=5):
        super(TransferLearningModel, self).__init__()
        
        # Pre-trained feature extractor (frozen)
        self.feature_extractor = nn.Sequential(
            nn.Conv2d(3, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(64, 128, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.AdaptiveAvgPool2d((1, 1))
        )
        
        # New classifier for your task
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(64, num_classes_new_task)
        )
        
        # Freeze feature extractor (don't train these layers)
        for param in self.feature_extractor.parameters():
            param.requires_grad = False
            
    def forward(self, x):
        # Extract features using pre-trained layers
        features = self.feature_extractor(x)
        # Classify using new layers
        output = self.classifier(features)
        return output

# Create transfer learning model
transfer_model = TransferLearningModel(num_classes_new_task=5)

# Count trainable vs frozen parameters
total_params = sum(p.numel() for p in transfer_model.parameters())
trainable_params = sum(p.numel() for p in transfer_model.parameters() if p.requires_grad)

print(f"\nTransfer Learning Model:")
print(f"Total parameters: {total_params:,}")
print(f"Trainable parameters: {trainable_params:,}")
print(f"Frozen parameters: {total_params - trainable_params:,}")
print(f"Training efficiency: Only {trainable_params/total_params:.1%} of parameters need training!")

# Test with sample data
sample_rgb_images = torch.randn(2, 3, 32, 32)  # 2 RGB images, 32x32
output = transfer_model(sample_rgb_images)
print(f"Input shape: {sample_rgb_images.shape}")
print(f"Output shape: {output.shape}")

# =============================================================================
# PART 8: OBJECT DETECTION CONCEPT
# =============================================================================

print("\n\nPART 8: OBJECT DETECTION - FINDING OBJECTS IN IMAGES")
print("=" * 50)

# Object detection = Classification + Localization
print("Object Detection Tasks:")
print("1. Classification: What objects are in the image?")
print("2. Localization: Where are these objects located?")
print("3. Output: Bounding boxes + class labels")

# Simulate object detection on a simple image
def simulate_object_detection():
    """Simulate object detection results"""
    
    # Create a simple scene
    scene = np.zeros((20, 20))
    
    # Add objects
    scene[5:8, 5:8] = 0.7    # Object 1 (car)
    scene[12:15, 10:13] = 0.9  # Object 2 (person)
    scene[2:4, 15:17] = 0.5   # Object 3 (bike)
    
    # Simulate detection results
    detections = [
        {'class': 'car', 'confidence': 0.95, 'bbox': [5, 5, 3, 3]},
        {'class': 'person', 'confidence': 0.87, 'bbox': [12, 10, 3, 3]},
        {'class': 'bike', 'confidence': 0.72, 'bbox': [2, 15, 2, 2]}
    ]
    
    # Visualize
    plt.figure(figsize=(10, 5))
    
    plt.subplot(1, 2, 1)
    plt.imshow(scene, cmap='gray')
    plt.title('Input Image')
    plt.grid(True, alpha=0.3)
    
    plt.subplot(1, 2, 2)
    plt.imshow(scene, cmap='gray')
    
    # Draw bounding boxes
    colors = ['red', 'blue', 'green']
    for i, detection in enumerate(detections):
        x, y, w, h = detection['bbox']
        confidence = detection['confidence']
        class_name = detection['class']
        
        # Draw rectangle
        rect = plt.Rectangle((x-0.5, y-0.5), w, h, linewidth=2, 
                           edgecolor=colors[i], facecolor='none')
        plt.gca().add_patch(rect)
        
        # Add label
        plt.text(x, y-1, f"{class_name} {confidence:.2f}", 
                color=colors[i], fontweight='bold', fontsize=8)
    
    plt.title('Object Detection Results')
    plt.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.show()
    
    return detections

detections = simulate_object_detection()

print("Detection Results:")
for detection in detections:
    print(f"• {detection['class']}: {detection['confidence']:.2%} confidence at {detection['bbox']}")

# =============================================================================
# SUMMARY AND NEXT STEPS
# =============================================================================

print("\n\nSUMMARY: COMPUTER VISION CONCEPTS MASTERED")
print("=" * 60)

print("""
✅ Fundamental Concepts:
   • Images as numerical data (pixels, channels)
   • Color spaces (RGB) and grayscale conversion
   • Image filtering and feature detection
   • Convolution operation and kernels
   • Feature maps and hierarchical learning

✅ Neural Network Architectures:
   • Convolutional Neural Networks (CNNs)
   • Pooling layers and dimensionality reduction
   • Transfer learning and pre-trained models
   • Feature extraction vs fine-tuning

✅ Practical Applications:
   • Image classification (categorizing images)
   • Object detection (finding and locating objects)
   • Feature detection (edges, corners, textures)
   • Transfer learning for custom tasks

✅ Technical Skills:
   • Building CNNs with PyTorch
   • Applying image filters and transformations
   • Understanding feature hierarchies
   • Implementing transfer learning

🎯 Next Steps:
   • Work with real datasets (MNIST, CIFAR-10, ImageNet)
   • Explore advanced architectures (ResNet, EfficientNet, YOLO)
   • Learn about data augmentation techniques
   • Build end-to-end computer vision applications
""")

print("🎉 Congratulations! You now understand how computers see and interpret images!")
print("You're ready to build real computer vision applications!")