In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms


In [None]:
# transform the images to a tensor and normalize it so that the mean is 0.5 (so the data is centered at 0 :[-0.5 to 0.5]. And Std. dev. is 0.5 which means the range is [-1, 1])
data_transform = torchvision.transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

#download the CIFAR10 dataset. Further split the train set to train and test sets.
train_set = torchvision.datasets.CIFAR10(root = '/content/drive/MyDrive/DL_review/CNNs', train=True, download=True, transform=data_transform)
train_set, val_set = torch.utils.data.random_split(train_set, [int(len(train_set)*0.8), int(len(train_set)*0.2)])
test_set = torchvision.datasets.CIFAR10(root = '/content/drive/MyDrive/DL_review/CNNs', train=False, download=True, transform=data_transform)

# This is how you would load the data if it is from a local directory with structure (data/train/class1/img1.jpg)
# train_set = torchvision.datasets.ImageFolder(root='/content/drive/MyDrive/DL_review/CNNs/dataset', transform=data_transform)

#load the downloaded data with a batch size. set shuffle=True for the train set.
train_loader = torch.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_set, batch_size=64, shuffle=False)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=64, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


In [None]:
import torch.nn.functional as F

In [None]:
# Let's build a custom CNN for training. This CNN is similar to VGG16. It uses only 3 X 3 filters and has max_pooling layer after every block.

class CustomCNN(nn.Module):

  def __init__(self):

    super(CustomCNN, self).__init__()


    self.conv_1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride=1, padding=1)
    self.bn1 = nn.BatchNorm2d(num_features=64)
    self.relu_1 = nn.ReLU()
    self.conv_2 = nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
    self.bn2 = nn.BatchNorm2d(num_features=64)
    self.relu_2 = nn.ReLU()
    self.max_pool_1 = nn.MaxPool2d(kernel_size=2, stride=2)


    self.conv_3 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1)
    self.bn3 = nn.BatchNorm2d(num_features=128)
    self.relu_3 = nn.ReLU()
    self.conv_4 = nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1)
    self.bn4 = nn.BatchNorm2d(num_features=128)
    self.relu_4 = nn.ReLU()
    self.max_pool_2 = nn.MaxPool2d(kernel_size=2, stride=2)

    self.conv_5 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1)
    self.relu_5 = nn.ReLU()
    self.conv_6 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1)
    self.relu_6 = nn.ReLU()
    self.max_pool_3 = nn.MaxPool2d(kernel_size=2, stride=2)

    self.fc1 = nn.Linear(4 * 4* 256, 512)
    self.relu_fc1 = nn.ReLU()

    self.fc2 = nn.Linear(512, 10)


  def forward(self, x):

    x = self.max_pool_1(self.relu_2(self.bn2(self.conv_2(self.relu_1(self.bn1(self.conv_1(x)))))))

    x = self.max_pool_2(self.relu_4(self.bn4(self.conv_4(self.relu_3(self.bn3(self.conv_3(x)))))))

    # print(x)

    x = self.max_pool_3(self.relu_6(self.conv_6(self.relu_5(self.conv_5(x)))))

    x = torch.flatten(x,1)

    x = self.relu_fc1(self.fc1(x))

    x = self.fc2(x)

    return x



In [None]:
def init_weights(m):
  if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
    nn.init.kaiming_normal_(m.weight, nonlinearity='relu') #Initialization of weights with He
    if m.bias is not None:
      nn.init.constant(m.bias, 0)

In [None]:
# Checking if everything is right. Took an image data from the train set and ran it through the model.
# Unsqueezing as the model expects a batch of inputs. The output shape is as expected.

cnn = CustomCNN()

cnn.apply(init_weights)

# data = train_set.dataset[0][0].unsqueeze(0)

# print(cnn(data).shape)

  nn.init.constant(m.bias, 0)


CustomCNN(
  (conv_1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_1): ReLU()
  (conv_2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_2): ReLU()
  (max_pool_1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv_3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_3): ReLU()
  (conv_4): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn4): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu_4): ReLU()
  (max_pool_2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv_5): Conv2d(128, 256, kernel_size=(3, 3), stride=(1,

## Defining Loss Function and Optimizer

We will be using CrossEntropy loss as we are dealing with classification. You should have noticed that we didn't add a Softmax layer to the model That is because the nn.CrossEntropyLoss function applies the Softmax function.

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()

optimizer = optim.Adam(cnn.parameters(), lr=0.001)

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [None]:
# Dictionary to store zero activation percentages
activation_stats = {}

# Hook function to track activations
def activation_hook(module, input, output):
    zero_activations = (output == 0).float().sum().item()
    total_activations = output.numel()
    percentage_zeros = 100 * (zero_activations / total_activations)

    activation_stats[module] = percentage_zeros

    print(f"{module}: {percentage_zeros:.2f}% of activations are zero.")

# Register hooks for all ReLU layers
for name, module in cnn.named_modules():
    if isinstance(module, nn.ReLU):
        module.register_forward_hook(activation_hook)

# Run a sample batch through the model
cnn.to(device)
images, _ = next(iter(train_loader))  # Get one batch
images = images.to(device)

# Forward pass to collect activation stats
# with torch.no_grad():
cnn(images)

ReLU(): 49.75% of activations are zero.
ReLU(): 50.14% of activations are zero.
ReLU(): 49.60% of activations are zero.
ReLU(): 50.19% of activations are zero.
ReLU(): 49.25% of activations are zero.
ReLU(): 50.01% of activations are zero.
ReLU(): 53.45% of activations are zero.


tensor([[-7.8741e-01, -1.4695e+00, -2.2932e+00, -2.2524e+00, -7.0744e-01,
          2.2715e+00,  5.9934e-01, -8.8993e-01,  1.6695e+00, -6.8386e-02],
        [-2.4575e+00, -1.5489e+00, -3.3655e-01, -1.9399e+00, -1.8773e-01,
          1.1105e+00,  1.1105e+00, -9.5691e-01,  2.4761e-01,  1.1870e+00],
        [-4.9822e-01, -1.2594e+00, -6.4023e-01, -1.4078e+00, -6.3174e-01,
          1.6941e+00,  6.5590e-01, -1.1169e+00,  1.8179e+00, -4.5276e-01],
        [-1.3631e+00, -1.0368e+00, -1.0750e+00, -2.1930e+00, -1.2486e-01,
          2.0455e+00,  1.9853e+00, -4.7206e-01,  1.5076e+00,  2.9280e-03],
        [-1.3576e+00, -8.1563e-01, -2.6726e+00, -2.4300e+00,  7.1973e-01,
          1.9257e+00,  4.9036e-01, -1.3794e+00,  2.0626e+00,  3.8015e-02],
        [-1.3104e+00, -1.8936e+00, -3.9785e-01, -2.5656e+00, -7.7043e-01,
          2.7915e+00,  8.6541e-01, -1.7169e+00,  1.6483e+00,  6.8361e-01],
        [-1.1819e+00, -1.8333e+00, -1.7381e+00, -3.1166e+00,  6.5176e-01,
          3.2931e+00,  6.1629e-0

In [None]:
n_epochs = 20

for epoch in range(n_epochs):
  cnn.to(device)
  cnn.train()
  running_loss = 0.0

  for images, labels in train_loader:

    images, labels = images.to(device), labels.to(device)

    optimizer.zero_grad()

    preds = cnn(images)

    batch_loss = criterion(preds, labels)

    batch_loss.backward()

    optimizer.step()

    running_loss += batch_loss.item()

  cnn.eval()
  running_val_loss = 0.0

  with torch.no_grad():

    for images, labels in val_loader:

      images, labels = images.to(device), labels.to(device)

      preds = cnn(images)

      batch_val_loss = criterion(preds, labels)

      running_val_loss += batch_val_loss.item()

  print(f'Epoch {epoch+1} completed. loss: {running_loss/len(train_loader)}, validation loss : {running_val_loss/len(val_loader)}')




