In [3]:
import torch
import torch.nn as nn

In [13]:
class Network(torch.nn.Module):
    def __init__(self):

        super(Network, self).__init__()
        
        self.initial_conv_layers = nn.Sequential(
                                nn.Conv2d(3, 16, 9),
                                nn.PReLU(),
                                nn.Conv2d(16, 32, 7),
                                nn.PReLU()
                              )
        
        self.gdf_conv1 = nn.Conv2d(32, 16, 9)
        self.gdf_max1 = nn.MaxPool2d(2)
        self.gdf_avg1 = nn.AvgPool2d(2)
        self.gdf_prelu1 = nn.PReLU()
        
        self.gdf_conv2 = nn.Conv2d(16, 32, 7)
        self.gdf_max2 = nn.MaxPool2d(2)
        self.gdf_avg2 = nn.AvgPool2d(2)
        self.gdf_prelu2 = nn.PReLU()
        
        self.gdf_conv3_block = nn.Sequential(
                            nn.Conv2d(32, 16, 7, 1, 3),
                            nn.PReLU(),
#                             nn.Conv2d(16, 8, 7, 1, 3),
                            nn.Conv2d(16, 8, 7, 1, (4,5)), #Trying to get all WxH same
                            nn.PReLU()
                          )
        
        self.gdf_adp = nn.AdaptiveMaxPool2d(64)
        self.gdf_adp_prelu = nn.PReLU()
        
        self.gdf_fc_block = nn.Sequential(
            nn.Linear(64*64*16, 512),
            nn.PReLU(),
            nn.Linear(512, 5)
        )
        
        self.cc1_conv1 = nn.Conv2d(32, 20, 7)
        self.cc1_max1 = nn.MaxPool2d(2)
        self.cc1_avg1 = nn.AvgPool2d(2)
        self.cc1_prelu1 = nn.PReLU()
        
        self.cc1_conv2 = nn.Conv2d(20, 40, 5)
        self.cc1_max2 = nn.MaxPool2d(2)
        self.cc1_avg2 = nn.AvgPool2d(2)
        self.cc1_prelu2 = nn.PReLU()
        
        self.cc1_conv3_block = nn.Sequential(
                            nn.Conv2d(40, 20, 5, 1, 2),
                            nn.PReLU(),
#                             nn.Conv2d(20, 10, 5, 1, 2), 
                            nn.Conv2d(20, 10, 5, 1, 3), # Trying to make all WxH same 
                            nn.PReLU()
                          )
        
        
        self.cc2_conv1 = nn.Conv2d(32, 24, 5)
        self.cc2_max1 = nn.MaxPool2d(2)
        self.cc2_avg1 = nn.AvgPool2d(2)
        self.cc2_prelu1 = nn.PReLU()
        
        self.cc2_conv2 = nn.Conv2d(24, 48, 3)
        self.cc2_max2 = nn.MaxPool2d(2)
        self.cc2_avg2 = nn.AvgPool2d(2)
        self.cc2_prelu2 = nn.PReLU()
        
        self.cc2_conv3_block = nn.Sequential(
                            nn.Conv2d(48, 24, 3, 1, 1),
                            nn.PReLU(),
                            nn.Conv2d(24, 12, 3, 1, 1),
                            nn.PReLU()
                          )
        

        self.out_block = nn.Sequential(
                        nn.Conv2d(30, 24, 3, 1, 1),
                        nn.PReLU(),
                        nn.Conv2d(24, 32, 3, 1, 1),
                        nn.PReLU(),
                        nn.ConvTranspose2d(32, 16, 3, 2, 1),
                        nn.PReLU(),
                        nn.ConvTranspose2d(16, 8, 3, 2, 1),
                        nn.PReLU(),
                        nn.Conv2d(8, 8, 1),
                        nn.PReLU(),
                        nn.Conv2d(8, 1, 1)
                    )

    def forward(self, x):
        pre_x = self.initial_conv_layers(x)
        
        # sub task 1
        
        gd_x = self.gdf_conv1(pre_x)
        gd_x = self.gdf_max1(gd_x) + self.gdf_avg1(gd_x)
        gd_x = self.gdf_prelu1(gd_x)
        
        gd_x = self.gdf_conv2(gd_x)
        gd_x = self.gdf_max2(gd_x) + self.gdf_avg2(gd_x)
        gd_x = self.gdf_prelu2(gd_x)
        
        gd_out = self.gdf_conv3_block(gd_x)
        
        gd_b = self.gdf_adp(gd_out)
        gd_b = self.gdf_adp_prelu(gd_b)
        gd_b = self.gdf_fc_block(gd_b)
        
        # sub task 2
        
        # First branch
        cc_1 = self.cc1_conv1(pre_x)
        cc_1 = self.cc1_max1(cc_1) + self.cc1_avg1(cc_1)
        cc_1 = self.cc1_prelu1(cc_1)
        
        cc_1 = self.cc1_conv2(cc_1)
        cc_1 = self.cc1_max2(cc_1) + self.cc1_avg2(cc_1)
        cc_1 = self.cc1_prelu2(cc_1)
        
        cc_1 = self.cc1_conv3_block(cc_1)
        
        # Second branch
        cc_2 = self.cc2_conv1(pre_x)
        cc_2 = self.cc2_max1(cc_2) + self.cc2_avg1(cc_2)
        cc_2 = self.cc2_prelu1(cc_2)
        
        cc_2 = self.cc2_conv2(cc_2)
        cc_2 = self.cc2_max2(cc_2) + self.cc2_avg2(cc_2)
        cc_2 = self.cc2_prelu2(cc_2)
        
        cc_2 = self.cc2_conv3_block(cc_2)
        
        cc_out = torch.cat((gd_out, cc1, cc2), dim=-1)
        
        out = self.out_block(cc_out)        
        
        return gd_b, out

In [14]:
net = Network()

In [15]:
net

Network(
  (initial_conv_layers): Sequential(
    (0): Conv2d(3, 16, kernel_size=(9, 9), stride=(1, 1))
    (1): PReLU(num_parameters=1)
    (2): Conv2d(16, 32, kernel_size=(7, 7), stride=(1, 1))
    (3): PReLU(num_parameters=1)
  )
  (gdf_conv1): Conv2d(32, 16, kernel_size=(9, 9), stride=(1, 1))
  (gdf_max1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (gdf_avg1): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (gdf_prelu1): PReLU(num_parameters=1)
  (gdf_conv2): Conv2d(16, 32, kernel_size=(7, 7), stride=(1, 1))
  (gdf_max2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (gdf_avg2): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (gdf_prelu2): PReLU(num_parameters=1)
  (gdf_conv3_block): Sequential(
    (0): Conv2d(32, 16, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
    (1): PReLU(num_parameters=1)
    (2): Conv2d(16, 8, kernel_size=(7, 7), stride=(1, 1), padding=(4, 5))
    (3): PReLU(num_parameters=1)
  )
  (gdf_

In [16]:
def model_summary(model):
  print("model_summary")
  print()
  print("Layer_name"+"\t"*7+"Number of Parameters")
  print("="*100)
  model_parameters = [layer for layer in model.parameters() if layer.requires_grad]
  layer_name = [child for child in model.children()]
  j = 0
  total_params = 0
  print("\t"*10)
  for i in layer_name:
    print()
    param = 0
    try:
      bias = (i.bias is not None)
    except:
      bias = False  
    if not bias:
      param =model_parameters[j].numel()+model_parameters[j+1].numel()
      j = j+2
    else:
      param =model_parameters[j].numel()
      j = j+1
    print(str(i)+"\t"*3+str(param))
    total_params+=param
  print("="*100)
  print(f"Total Params:{total_params}")       

model_summary(net)

model_summary

Layer_name							Number of Parameters
										

Sequential(
  (0): Conv2d(3, 16, kernel_size=(9, 9), stride=(1, 1))
  (1): PReLU(num_parameters=1)
  (2): Conv2d(16, 32, kernel_size=(7, 7), stride=(1, 1))
  (3): PReLU(num_parameters=1)
)			3904

Conv2d(32, 16, kernel_size=(9, 9), stride=(1, 1))			1

MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)			25120

AvgPool2d(kernel_size=2, stride=2, padding=0)			41473

PReLU(num_parameters=1)			17

Conv2d(16, 32, kernel_size=(7, 7), stride=(1, 1))			25088

MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)			33

AvgPool2d(kernel_size=2, stride=2, padding=0)			25104

PReLU(num_parameters=1)			6273

Sequential(
  (0): Conv2d(32, 16, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3))
  (1): PReLU(num_parameters=1)
  (2): Conv2d(16, 8, kernel_size=(7, 7), stride=(1, 1), padding=(4, 5))
  (3): PReLU(num_parameters=1)
)			9

AdaptiveMaxPool2d(output_size=64)			33554433

PReLU(num_param

In [17]:
from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad: continue
        param = parameter.numel()
        table.add_row([name, param])
        total_params+=param
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params
    
count_parameters(net)

+------------------------------+------------+
|           Modules            | Parameters |
+------------------------------+------------+
| initial_conv_layers.0.weight |    3888    |
|  initial_conv_layers.0.bias  |     16     |
| initial_conv_layers.1.weight |     1      |
| initial_conv_layers.2.weight |   25088    |
|  initial_conv_layers.2.bias  |     32     |
| initial_conv_layers.3.weight |     1      |
|       gdf_conv1.weight       |   41472    |
|        gdf_conv1.bias        |     16     |
|      gdf_prelu1.weight       |     1      |
|       gdf_conv2.weight       |   25088    |
|        gdf_conv2.bias        |     32     |
|      gdf_prelu2.weight       |     1      |
|   gdf_conv3_block.0.weight   |   25088    |
|    gdf_conv3_block.0.bias    |     16     |
|   gdf_conv3_block.1.weight   |     1      |
|   gdf_conv3_block.2.weight   |    6272    |
|    gdf_conv3_block.2.bias    |     8      |
|   gdf_conv3_block.3.weight   |     1      |
|     gdf_adp_prelu.weight     |  

33822945

In [18]:
# path = 'crowd_model.pt'

# torch.save(net.state_dict(), path)