# Implementation: Zero Convolution

**Goal**: Initialize a new layer to do nothing (Identity).

In [None]:
import torch
import torch.nn as nn

class ZeroConv2d(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
        
        # Initialize weights and bias to Zero
        nn.init.zeros_(self.conv.weight)
        nn.init.zeros_(self.conv.bias)
        
    def forward(self, x):
        return self.conv(x)

# 1. Setup
original_features = torch.randn(1, 32, 64, 64)
control_features = torch.randn(1, 32, 64, 64)

zero_layer = ZeroConv2d(32, 32)

# 2. Forward
# Output = Original + Zero(Control)
# Initially, Zero(Control) is 0. So Output == Original.
# This ensures training doesn't break the pre-trained model at step 0.
output = original_features + zero_layer(control_features)

print(f"Difference from Original: {(output - original_features).abs().sum().item()}")
print("Training will slowly make the ZeroConv non-zero.")

## Conclusion
Zero Convolution is the secret sauce that allows fine-tuning massive models without 'Catastrophic Forgetting'.