In [1]:
import torch 
import torch.nn as nn 
import torch.optim as optim 
import torch.nn.functional as F 


In [2]:
class CustomCNN(nn.Module):
    def __init__(self, in_channel=1, num_classes=10):  # grayscale: 1 (in_channel)
        super(CustomCNN,self).__init__()
        self.convolution1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
        self.convolution2 = nn.Conv2d(16,32,kernel_size=3,padding=1)
        self.fully_conn1 = nn.Linear(32*7*7, 128)   # Assumption: img_dim: 28x28 (standard)
        self.fully_conn2 = nn.Linear(128,num_classes)

    def forward(self, x):
        x = F.relu(self.convolution1(x))
        x = F.max_pool2d(x,2)
        x = F.relu(self.convolution2(x)) 
        x = F.max_pool2d(x,2) 
        x = F.view(x.size(0),-1) 
        x = F.relu(self.fully_conn1(x)) 
        x = self.fully_conn2(x) 
        return x

In [3]:
def _normal_weights(w):
    if isinstance(w, nn.Conv2d) or isinstance(w,nn.Linear):
        nn.init.normal_(w.weight, mean=0.0, std=1.0)
        if w.bias is not None:
            nn.init.constant_(w.bias,0) 

In [4]:

model = CustomCNN()

model.apply(_normal_weights)

CustomCNN(
  (convolution1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (convolution2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fully_conn1): Linear(in_features=1568, out_features=128, bias=True)
  (fully_conn2): Linear(in_features=128, out_features=10, bias=True)
)

In [5]:
print(model.convolution1.weight.mean(), model.convolution1.weight.std()) 

tensor(0.0788, grad_fn=<MeanBackward0>) tensor(1.0611, grad_fn=<StdBackward0>)


In [6]:
print("Weights of convplution 1:\n",model.convolution1.weight.view(-1)[:10])
print("Bias of convolution 1:\n",model.convolution1.bias.view(-1)[:10]) 

Weights of convplution 1:
 tensor([ 1.2457, -0.7043,  0.5459, -0.9097,  1.4759, -1.4047,  0.6211,  1.0741,
        -0.5830, -0.1025], grad_fn=<SliceBackward0>)
Bias of convolution 1:
 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], grad_fn=<SliceBackward0>)


In [7]:
print("Weights of convplution 2:\n",model.convolution2.weight.view(-1)[:10])
print("Bias of convolution 2:\n",model.convolution2.bias.view(-1)[:10]) 

Weights of convplution 2:
 tensor([ 0.2568, -0.9365,  0.3769, -0.0768, -0.1548, -1.0791,  0.4776, -0.2012,
         0.5727, -1.0997], grad_fn=<SliceBackward0>)
Bias of convolution 2:
 tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], grad_fn=<SliceBackward0>)


In [8]:
pass

In [4]:
import torch 
import torch.nn as nn 

### Implementation of VGG16 Architecture using custom normal weight initialization 

In [5]:
class VGG16(nn.Module):
    def __init__(self, num_classes=1000, dropout=0.5):
        super(VGG16,self).__init__()
        self.features = nn.Sequential(
        # Block 1: 64 filters 
            nn.Conv2d(3,64,kernel_size=3,padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(64,64,kernel_size=3,padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2,stride=2),

        # Block 2: 128 filters 
            nn.Conv2d(64,128,kernel_size=3,padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(128,128,kernel_size=3,padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2,stride=2),

        # Block 3: 256 filters
            nn.Conv2d(128, 256, kernel_size=3, padding=1), 
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1), 
            nn.ReLU(inplace=True),
            nn.Conv2d(256, 256, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),   
            
        # Block 4: 512 filters
            nn.Conv2d(256, 512, kernel_size=3, padding=1),
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1), 
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1), 
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),  

        # Block 5: 512 filters 
             nn.Conv2d(512, 512, kernel_size=3, padding=1), 
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1), 
            nn.ReLU(inplace=True),
            nn.Conv2d(512, 512, kernel_size=3, padding=1), 
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2), 

        )
        self.avgpool = nn.AdaptiveAvgPool2d((7,7))
        self.classifier = nn.Sequential(
            # First FC layer
            nn.Linear(512*7*7, 4096),
            nn.ReLU(True),
            nn.Dropout(dropout),

            # Second FC layer 
            nn.Linear(4096,4096),
            nn.ReLU(True),
            nn.Dropout(dropout),

            # Classification layer 
            nn.Linear(4096,num_classes) 
        )
        self.initialize_weights()
        def forward(self, x):
            x = self.features(x)
            x = self.avgpool(x)
            x = torch.flatten(x, 1)
            x = self.classifier(x)
            return x

    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
                with torch.no_grad():
                    m.weight.normal_(0, 1)
                    min_val = m.weight.min()
                    max_val = m.weight.max()
                    m.weight.data = (m.weight - min_val) / (max_val - min_val)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.1)

def create_vgg16(num_classes=1000, pretrained=False):
    model = VGG16(num_classes=num_classes)
    return model

if __name__ == "__main__":
    print("=" * 50)
    print("VGG16 Architecture Test")
    print("=" * 50)
    model = create_vgg16(num_classes=1000)
    print("\nVGG16 Model Architecture:")
    print(model)

VGG16 Architecture Test

VGG16 Model Architecture:
VGG16(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): Max

In [6]:
first_convolution = model.features[0]
print("First Conv2d Layer Weights (flattened):") 
print(first_convolution.weight.view(-1)[:20]) 

First Conv2d Layer Weights (flattened):
tensor([0.4210, 0.5411, 0.5408, 0.5490, 0.4543, 0.8152, 0.3289, 0.4788, 0.4554,
        0.5310, 0.7891, 0.3671, 0.5218, 0.5192, 0.5168, 0.3257, 0.3718, 0.6717,
        0.9036, 0.5286], grad_fn=<SliceBackward0>)


### Implementation of DenseNet Architecture using custom normal weight initialization 

In [14]:
class DenseLayer(nn.Module):
    def __init__(self, in_channels, growth_rate):
        super(DenseLayer, self).__init__()
        self.bn = nn.BatchNorm2d(in_channels)
        self.relu = nn.ReLU(inplace=True)
        self.conv = nn.Conv2d(in_channels, growth_rate, kernel_size=3, padding=1, bias=False)

    def forward(self, x):
        out = self.conv(self.relu(self.bn(x)))
        return torch.cat([x, out], 1)  

class DenseBlock(nn.Module):
    def __init__(self, num_layers, in_channels, growth_rate):
        super(DenseBlock, self).__init__()
        layers = []
        for i in range(num_layers):
            layers.append(DenseLayer(in_channels + i * growth_rate, growth_rate))
        self.block = nn.Sequential(*layers)

    def forward(self, x):
        return self.block(x)

class TransitionLayer(nn.Module):
    def __init__(self, in_channels, out_channels):
        super(TransitionLayer, self).__init__()
        self.bn = nn.BatchNorm2d(in_channels)
        self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
        self.pool = nn.AvgPool2d(kernel_size=2, stride=2)

    def forward(self, x):
        x = self.conv(self.bn(x))
        x = self.pool(x)
        return x

class DenseNet(nn.Module):
    def __init__(self, in_channels=3, num_classes=10, growth_rate=12):
        super(DenseNet, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 24, kernel_size=3, padding=1)
        
        self.block1 = DenseBlock(num_layers=3, in_channels=24, growth_rate=growth_rate)
        self.trans1 = TransitionLayer(in_channels=24 + 3 * growth_rate, out_channels=64)
        
        self.pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(64, num_classes)

    def forward(self, x):
        x = self.conv1(x)                          # Initial conv
        x = self.block1(x)                         # Dense block
        x = self.trans1(x)                         # Transition layer
        x = self.pool(x)                           # Global avg pool
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

    def initialize_weights(self):
        for m in self.modules():
            if isinstance(m, (nn.Conv2d, nn.Linear)):
                with torch.no_grad():
                    m.weight.normal_(0, 1)
                    min_val = m.weight.min()
                    max_val = m.weight.max()
                    m.weight.data = (m.weight - min_val) / (max_val - min_val)
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0.1)
        
if __name__ == "__main__":
    print("="*50)
    model = DenseNet()
    print(model)
    print("="*50)
    print("\n First Convolution Layer (conv) weights (first 10):") 
    #first_convolution = model.features[0]
    print(model.conv1.weight.view(-1)[:10]) 


DenseNet(
  (conv1): Conv2d(3, 24, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (block1): DenseBlock(
    (block): Sequential(
      (0): DenseLayer(
        (bn): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv): Conv2d(24, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (1): DenseLayer(
        (bn): BatchNorm2d(36, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv): Conv2d(36, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (2): DenseLayer(
        (bn): BatchNorm2d(48, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (conv): Conv2d(48, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
    )
  )
  (trans1): TransitionLayer(
    (bn): BatchNorm2d(60, eps=1e-05, momentum=0.1, affine=True, 