In [None]:
import profile
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.utils.prune as prune
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import torch.optim.lr_scheduler
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR
from torch.autograd import Variable
from torch.utils.data import random_split
from torchvision.models import alexnet
from torch.utils.tensorboard import SummaryWriter
import time
import random
import math
# torch.cuda.set_per_process_memory_fraction(0.8)
import sys

In [None]:
class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)

In [None]:
writer=SummaryWriter("trial/v4-l2/third")
random_seed=42
torch.manual_seed(random_seed)
random.seed(random_seed)
np.random.seed(random_seed)
train_batch_size = 32
test_batch_size=32
num_classes = 100
learning_rate = 0.001
num_epochs = 100
lambda_l2 = 0.00001
pruning_amount = 0.2


best_model = None
best_accuracy = 0.0

pruning_epochs = 10
pruning_rate = 0.05
lambda_l1 = 0.0001
# lambda_train=0.00001

# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
   torch.cuda.manual_seed_all(random_seed)
   device=torch.device("cuda")
else:
   device=torch.device("cpu")
transform_train = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

transform_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])


train_dataset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform_train)
test_dataset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform_test)
train_size = int(0.9 * len(train_dataset))
val_size = len(train_dataset) - train_size
test_size = len(test_dataset)

train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=train_batch_size, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=test_batch_size, shuffle=False, num_workers=2)
valid_loader = torch.utils.data.DataLoader(val_dataset, batch_size=test_batch_size, shuffle=False, num_workers=2)

In [None]:

class Bottleneck(nn.Module):
    def __init__(self, in_planes, growth_rate):
        super(Bottleneck, self).__init__()
        self.bn1 = nn.BatchNorm2d(in_planes)
        self.conv1 = nn.Conv2d(in_planes, 4*growth_rate, kernel_size=1, bias=False)
        self.bn2 = nn.BatchNorm2d(4*growth_rate)
        self.conv2 = nn.Conv2d(4*growth_rate, growth_rate, kernel_size=3, padding=1, bias=False)

    def forward(self, x):
        out = self.conv1(F.relu(self.bn1(x)))
        out = self.conv2(F.relu(self.bn2(out)))
        out = torch.cat([out,x], 1)
        return out


class Transition(nn.Module):
    def __init__(self, in_planes, out_planes):
        super(Transition, self).__init__()
        self.bn = nn.BatchNorm2d(in_planes)
        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=1, bias=False)

    def forward(self, x):
        out = self.conv(F.relu(self.bn(x)))
        out = F.avg_pool2d(out, 2)
        return out


class DenseNet(nn.Module):
    def __init__(self, block, nblocks, growth_rate=12, reduction=0.5, num_classes=10):
        super(DenseNet, self).__init__()
        self.growth_rate = growth_rate

        num_planes = 2*growth_rate
        self.conv1 = nn.Conv2d(3, num_planes, kernel_size=3, padding=1, bias=False)

        self.dense1 = self._make_dense_layers(block, num_planes, nblocks[0])
        num_planes += nblocks[0]*growth_rate
        out_planes = int(math.floor(num_planes*reduction))
        self.trans1 = Transition(num_planes, out_planes)
        num_planes = out_planes

        self.dense2 = self._make_dense_layers(block, num_planes, nblocks[1])
        num_planes += nblocks[1]*growth_rate
        out_planes = int(math.floor(num_planes*reduction))
        self.trans2 = Transition(num_planes, out_planes)
        num_planes = out_planes

        self.dense3 = self._make_dense_layers(block, num_planes, nblocks[2])
        num_planes += nblocks[2]*growth_rate
        out_planes = int(math.floor(num_planes*reduction))
        self.trans3 = Transition(num_planes, out_planes)
        num_planes = out_planes

        self.dense4 = self._make_dense_layers(block, num_planes, nblocks[3])
        num_planes += nblocks[3]*growth_rate

        self.bn = nn.BatchNorm2d(num_planes)
        self.linear = nn.Linear(num_planes, num_classes)

    def _make_dense_layers(self, block, in_planes, nblock):
        layers = []
        for i in range(nblock):
            layers.append(block(in_planes, self.growth_rate))
            in_planes += self.growth_rate
        return nn.Sequential(*layers)

    def forward(self, x):
        out = self.conv1(x)
        out = self.trans1(self.dense1(out))
        out = self.trans2(self.dense2(out))
        out = self.trans3(self.dense3(out))
        out = self.dense4(out)
        out = F.avg_pool2d(F.relu(self.bn(out)), 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out
model=DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)


In [None]:
model
# last_layer=model.linear

In [None]:
def evaluate(model, valid_loader, criterion):
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for data in valid_loader:
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    loss = total_loss / len(valid_loader)
    accuracy = 100 * correct / total
    return loss, accuracy

def calculate_l1_norm_of_filters(model):
  l1_normalisation_values={}
  for name,layer in model.named_children():
    if isinstance(layer,nn.Conv2d):
      print(f"{layer} is being l1 norm found")
      filters=layer.weight
      l1_norm_of_filter=[]
      for idx,filter in enumerate(filters):
        l1_norm=torch.sum(torch.abs(filter)).item()
        l1_norm_of_filter.append(l1_norm)
      l1_normalisation_values[name]=l1_norm_of_filter
  print(f"\n\nThe original l1 norms for this is {l1_normalisation_values}\n\n\n")
  return l1_normalisation_values

def calculate_threshold_l1_norm_of_filters(l1_normalisation_values,percentage_to_prune):
  threshold_values={}
  for filter in l1_normalisation_values:
    filter_values=l1_normalisation_values[filter]
    sorted_filter_values=sorted(filter_values)
    threshold_index=int(len(filter_values)*percentage_to_prune)
    threshold_value=sorted_filter_values[threshold_index]
    threshold_values[filter]=threshold_value
  return threshold_values

def index_remove(tensor, dim, index, removed=False):
    if tensor.is_cuda:
        tensor = tensor.cpu()
    size_ = list(tensor.size())
    # print(f"The size of the tensor was {size_}")
    new_size = tensor.size(dim) - len(index)
    # print(f"The new size of the tensor is {new_size}")
    size_[dim] = new_size
    new_size = size_

    select_index = list(set(range(tensor.size(dim))) - set(index))
    # print(f"The selected index sizes are {len(select_index)}")
    new_tensor = torch.index_select(tensor, dim, torch.tensor(select_index))

    if removed:
        return new_tensor, torch.index_select(tensor, dim, torch.tensor(index))

    return new_tensor

def get_new_conv(in_channels, conv, dim, channel_index, independent_prune_flag=False):
  # print(f"Doing for the layer {conv}")
  # print(f"Layer has {conv.in_channels} and should have {in_channels} as in channels")
  # print(f"Layer has {conv.out_channels} and should have {int(conv.out_channels - len(channel_index))} as in channels")

  new_conv = torch.nn.Conv2d(in_channels=in_channels,
                                   out_channels=int(conv.out_channels - len(channel_index)),
                                   kernel_size=conv.kernel_size,
                                   stride=conv.stride, padding=conv.padding, dilation=conv.dilation)

  new_conv.weight.data = index_remove(conv.weight.data, 0, channel_index)
  # print(f"The layer {conv} has {len(new_conv.weight)} weights now ")
  # new_conv.bias.data = index_remove(conv.bias.data, 0, channel_index)
  # print(f"The layer {conv} has {len(new_conv.bias)} bias now ")

  return new_conv

def calculate_l1_norm_of_outputs(model):
    l1_normalisation_values = {}
    for name, layer in model.named_children():
        if isinstance(layer, nn.Linear):
            weights = layer.weight
            l1_norm_of_neurons = torch.sum(torch.abs(weights), dim=1).tolist()
            l1_normalisation_values[name] = l1_norm_of_neurons
            # print(f"Layer {name} (Neurons): L1 norm length is {len(l1_normalisation_values[name])}")
    return l1_normalisation_values

# Calculate L1 norm of inputs
def calculate_l1_norm_of_inputs(model):
    l1_normalisation_values = {}
    for name, layer in model.named_children():
        if isinstance(layer, nn.Linear):
            weights = layer.weight
            l1_norm_of_inputs = torch.sum(torch.abs(weights), dim=0).tolist()
            l1_normalisation_values[name] = l1_norm_of_inputs
            # print(f"Layer {name} (Inputs): L1 norm length is {len(l1_normalisation_values[name])}")
    return l1_normalisation_values

# Calculate threshold L1 norm
def calculate_threshold_l1_norm(values, percentage_to_prune):
    threshold_values = {}
    for layer_name, vals in values.items():
        sorted_vals = sorted(vals)
        threshold_index = int(len(sorted_vals) * percentage_to_prune)
        threshold_value = sorted_vals[threshold_index]
        threshold_values[layer_name] = threshold_value
    return threshold_values

# Prune layer
def prune_layer(layer, outputs_to_prune, inputs_to_prune):
    in_features = layer.in_features - len(inputs_to_prune)
    out_features = layer.out_features - len(outputs_to_prune)

    new_linear_layer = nn.Linear(in_features, out_features, bias=True)

    keep_outputs = list(set(range(layer.out_features)) - set(outputs_to_prune))
    keep_inputs = list(set(range(layer.in_features)) - set(inputs_to_prune))

    # print(f"Pruning Layer: Keep neurons {keep_neurons}, Keep inputs {keep_inputs}")

    new_linear_layer.weight.data = layer.weight.data[keep_outputs][:, keep_inputs]
    new_linear_layer.bias.data = layer.bias.data[keep_outputs]

    return new_linear_layer


In [None]:

# l1_norm_outputs = calculate_l1_norm_of_outputs(model)
# l1_norm_inputs = calculate_l1_norm_of_inputs(model)
# threshold_outputs = calculate_threshold_l1_norm(l1_norm_outputs, pruning_rate)
# threshold_inputs = calculate_threshold_l1_norm(l1_norm_inputs, pruning_rate)


In [None]:

# filters_to_remove=[]
# def prune_filters(model,threshold_values,l1_norm_inputs,l1_norm_outputs,threshold_inputs,threshold_outputs):
#   next_channel=3
#   for name,layer in model.named_children():
#     filters_to_remove=[]
#     if isinstance(layer,nn.Conv2d):
#       filters=layer.weight
#       num_filters_to_prune=0

#       for idx, filter in enumerate(filters):
#         l1_norm = torch.sum(torch.abs(filter)).item()
#         if l1_norm < threshold_values[name]:
#           num_filters_to_prune+=1
#           layer.weight.data[idx].zero_()
#           filters_to_remove.append(idx)

#       if num_filters_to_prune > 0:
#         in_channels = next_channel
#         out_channels = layer.out_channels - num_filters_to_prune
#         new_conv_layer=get_new_conv(in_channels,layer,0,filters_to_remove).to(device)
#         setattr(model, name, new_conv_layer)
#         next_channel=out_channels

#     elif isinstance(layer, nn.BatchNorm2d):
#       new_batch_norm_2d_layer=nn.BatchNorm2d(num_features=next_channel).to(device)
#       setattr(model,name,new_batch_norm_2d_layer)
#       del new_batch_norm_2d_layer

#     elif isinstance(layer, nn.Linear):
#           if layer==last_layer:
#             outputs_to_prune=[]
#           else:
#             outputs_to_prune = [idx for idx, l1 in enumerate(l1_norm_outputs[name]) if l1 < threshold_outputs[name]]
#           inputs_to_prune = [idx for idx, l1 in enumerate(l1_norm_inputs[name]) if l1 < threshold_inputs[name]]
#           new_layer= prune_layer(layer, outputs_to_prune, inputs_to_prune)
#           setattr(model, name, new_layer)
#   return model


# def check_pruning(model):
#   print("\nLayer and filter sizes \n ------------------------------------")
#   for name,module in model.named_modules():
#     if isinstance(module,nn.Conv2d):
#       print(f"Layer: {name}, Filter Size: {module.out_channels}")


# print(f"Model is on device: {next(model.parameters()).device}")
# rand_input=torch.randn(1,3,244,244).to(device)
# # writer.add_graph(model,rand_input)

# total_step = len(train_loader)


# writer.add_text("Lambda valye",f"The lambda is {lambda_l1}",0)
# def update_inputs_channels(model):
#   prev_channels=3
#   for name,module in model.named_children():
#     if isinstance(module,nn.Conv2d):
#       in_channels=prev_channels
#       module.weight.data = module.weight.data[:, :in_channels, :, :]
#       module.in_channels=in_channels
#       prev_channels=module.out_channels
#   return model

# def prune_model(model,pruning_rate,l1_norm_inputs,l1_norm_outputs,threshold_inputs,threshold_outputs):
#    l1_norm_values=calculate_l1_norm_of_filters(model)
#    threshold_values=calculate_threshold_l1_norm_of_filters(l1_norm_values,pruning_rate)
#    model=prune_filters(model,threshold_values,l1_norm_inputs,l1_norm_outputs,threshold_inputs,threshold_outputs)
#    model=update_inputs_channels(model)
#    return model

def print_remaining_filters(model):
   print("\nThe filters are \n -----------------------------------")
   for name,module in model.named_modules():
      if isinstance(module,nn.Conv2d):
         print(f"{name} has {module.out_channels} remaining filters")

def print_conv_layer_shapes(model):
    print("\nLayer and shape of the filters \n -----------------------------")
    for name, module in model.named_modules():
        if isinstance(module, nn.Conv2d):
            print(f"Conv layer: {name}, Weight shape: {module.weight.shape}  Bias shape: {module.bias.shape if module.bias is not None else 'No bias'}")

def calculate_regularization_loss(model):
    regularization_loss = 0
    for name, layer in model.named_children():
        if isinstance(layer, nn.Conv2d):
            filters = layer.weight
            for filter in filters:
                l2_norm = torch.norm(filter, p=2)
                regularization_loss += l2_norm
    return regularization_loss

def custom_loss(outputs, labels, model, criterion, lambda_l1):
    l1_norm = 0
    for param in model.parameters():
        l1_norm += torch.sum(torch.abs(param))
    # Cross-entropy loss
    ce_loss = criterion(outputs, labels)
    # Total loss with L1 regularization
    total_loss = ce_loss - lambda_l1 * l1_norm
    return total_loss


def print_loss_and_custom_loss(outputs, labels, model, criterion, lambda_l1,epoch):

    l1_norm = 0
    for param in model.parameters():
        l1_norm += torch.sum(torch.abs(param))
    # Cross-entropy loss
    ce_loss = criterion(outputs, labels)
    # Total loss with L1 regularization
    total_loss = ce_loss - lambda_l1 * l1_norm

    print(f"\n\nThe l1 norm as loss : {l1_norm}")

    print(f"Cross entropy loss : {ce_loss}")

    print(f"Regularisation loss : (lambda_l1*l1_norm) {lambda_l1*l1_norm}")

    print(f"Total loss : (ce_loss-lambda_l1*l1_norm) {total_loss}")

    writer.add_scalar('Loss/L1_norm', l1_norm, epoch)
    writer.add_scalar('Loss/Cross_entropy', ce_loss, epoch)
    writer.add_scalar('Loss/Regularisation', lambda_l1 * l1_norm, epoch)
    writer.add_scalar('Loss/Total', total_loss, epoch)
    return total_loss


# regularization_factor=0.1
# criterion = nn.CrossEntropyLoss()
# optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4)
# scheduler = StepLR(optimizer,step_size=1,gamma=0.7)

def l1_norm(model):
    l1 = 0
    for param in model.parameters():
        l1 += torch.sum(torch.abs(param))
    return l1



In [None]:

def train(model, criterion, optimizer, scheduler , train_loader, valid_loader, num_epochs, lambda_l1):

    print("\n\nStarted training  \n")
    best_model = None
    best_accuracy = 0.0

    for epoch in range(num_epochs):
        i=0
        start_time = time.time()
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for batch_idx, (inputs, labels) in enumerate(train_loader):  # Added batch_idx for printing batch count
            optimizer.zero_grad()
            inputs = inputs.to(device)
            labels = labels.to(device)
            outputs = model(inputs)
            loss = custom_loss(outputs, labels, model, criterion, lambda_l1)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        scheduler.step()
        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = 100 * correct / total

        valid_loss, valid_acc = evaluate(model, valid_loader, criterion)
        end_time=time.time()
        total_time=end_time-start_time
        # Print epoch loss and accuracy

        print('\nEpoch [{}/{}], Train Loss: {:.4f}, Train Accuracy: {:.2f}%, Valid Loss: {:.4f}, Valid Accuracy: {:.2f}%, Time: {:.2f}s'.format(
            epoch + 1, num_epochs, epoch_loss, epoch_acc, valid_loss, valid_acc,total_time))
        writer.add_scalar('Loss/Train', epoch_loss, epoch)
        writer.add_scalar('Accuracy/Train', epoch_acc, epoch)
        writer.add_scalar('Loss/Valid', valid_loss, epoch)
        writer.add_scalar('Accuracy/Valid', valid_acc, epoch)

        if valid_acc > best_accuracy:
            best_accuracy = valid_acc
            best_model = model.state_dict()
    print("best accuracy is ",best_accuracy)
    return model

# def complete_train(model):

#   l1_norm_outputs = calculate_l1_norm_of_outputs(model)
#   l1_norm_inputs = calculate_l1_norm_of_inputs(model)
#   threshold_outputs = calculate_threshold_l1_norm(l1_norm_outputs, pruning_rate)
#   threshold_inputs = calculate_threshold_l1_norm(l1_norm_inputs, pruning_rate)

#   print("\nBefore pruning:\n")
#   print_conv_layer_shapes(model)

#   model=prune_model(model,pruning_rate,l1_norm_inputs,l1_norm_outputs,threshold_inputs,threshold_outputs)

#   print("\nAfter pruning:\n")
#   print_conv_layer_shapes(model)

#   print("\n Pruned Filter Sizes \n")
#   check_pruning(model)

  # criterion = nn.CrossEntropyLoss()
  # optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4)
  # scheduler = StepLR(optimizer,step_size=1,gamma=0.7)



#   print("The model that we are using is \n",model)
#   l1_pre_maximising=l1_norm(model)
#   print(f"\n\n Pre training L1 Norm: {l1_pre_maximising}\n\n")

# model=train(model, criterion, optimizer, scheduler, train_loader, valid_loader, num_epochs, lambda_l1 )

#   l1_post_maximising=l1_norm(model)
#   print(f"\n\nPost training L1 Norm: {l1_post_maximising}\n\n")

#   return model

# def prune(model):
#    for _ in range(20):
#       complete_train(model)

# prune(model)
# writer.close()

In [None]:
model

In [None]:
model=DenseNet(Bottleneck, [6,12,24,16], growth_rate=32)


In [None]:
# model

In [None]:
def update_inputs_channels(bottleneck_layer,prev_channels):
  for name,module in bottleneck_layer.named_children():
    if isinstance(module,nn.Conv2d):
      in_channels=prev_channels
      module.weight.data = module.weight.data[:, :in_channels, :, :]
      module.in_channels=in_channels
      prev_channels=module.out_channels
  return model

In [None]:
def prune_filters_of_bottleneck(bottleneck_layer,threshold_values,next_input):
  next_channel=next_input
  for name,layer in bottleneck_layer.named_children():
    filters_to_remove=[]
    if isinstance(layer,nn.Conv2d):
      filters=layer.weight
      num_filters_to_prune=0

      for idx, filter in enumerate(filters):
        l1_norm = torch.sum(torch.abs(filter)).item()
        # print(l1_norm)
        # print(threshold_values[name])
        # print(name)
        if l1_norm < threshold_values[name]:
          num_filters_to_prune+=1
          layer.weight.data[idx].zero_()
          filters_to_remove.append(idx)
      if num_filters_to_prune > 0:
        # print("The number of filters to prune are",num_filters_to_prune)
        in_channels = next_channel
        out_channels = layer.out_channels - num_filters_to_prune
        # print("In channels are ",in_channels)
        # print("The out features originally are ",layer.out_channels)
        # print("Out channels are ",out_channels)
        # print(f"Filters to prune are {num_filters_to_prune}")
        # print(f"The number of filters to prune are {filters_to_remove}")
        # print(f"Making a new layer for {layer}")
        # print(f"The in channels are {in_channels}")
        # print(f"The out channels are {out_channels}")
        # print(f"The filters to remove are {filters_to_remove}")
        new_conv_layer=get_new_conv(in_channels,layer,0,filters_to_remove).to(device)
        # print("The new convolution layer is ",new_conv_layer)
        setattr(bottleneck_layer, name, new_conv_layer)
        next_channel=out_channels

    elif isinstance(layer, nn.BatchNorm2d):
      new_batch_norm_2d_layer=nn.BatchNorm2d(num_features=next_channel).to(device)
      setattr(bottleneck_layer, name, new_batch_norm_2d_layer)
      del new_batch_norm_2d_layer

    elif isinstance(layer,nn.Linear):
      in_channels=next_channel
      out_features=layer.out_features
      has_bias=True if layer.bias is not None else False
      new_linear_layer=nn.Linear(in_channels,out_features,bias=has_bias).to(device)
      # setattr(bottleneck_layer, name, new_linear_layer)
      del new_linear_layer
  return next_channel

In [None]:
def prune_bottleneck(bottleneck_layer,prev):
  l1_norm_values=calculate_l1_norm_of_filters(bottleneck_layer)
  print(l1_norm_values)
  threshold_values=calculate_threshold_l1_norm_of_filters(l1_norm_values,pruning_rate)
  print(threshold_values)
  prev_out=prune_filters_of_bottleneck(bottleneck_layer,threshold_values,prev)
  update_inputs_channels(bottleneck_layer,prev)
  print(prev_out)
  return prev_out

In [None]:
model

In [None]:
len(model.dense1[0].conv1.weight.data)

In [None]:
model.dense1[0].conv1.weight.data

In [None]:
times=0
prev_t=0
for name,layer in model.named_children():
  if isinstance(layer,nn.Conv2d):
    # print(layer)
    prev_t=layer.out_channels
    print()
    # print(next_input)
    print()
  elif isinstance(layer,nn.BatchNorm2d):
    # print(layer)
    new_batch_norm_2d_layer=nn.BatchNorm2d(num_features=prev_t).to(device)
    setattr(model,name,new_batch_norm_2d_layer)
    print()
  elif isinstance(layer,nn.Linear):
    new_layer=nn.Linear(in_features=prev_t,out_features=layer.out_features,bias=True).to(device)
    setattr(model,name,new_layer)
    # print(layer)
    print()
  elif isinstance(layer,Transition):
    for n,module in layer.named_children():
      if isinstance(module, nn.BatchNorm2d):
        print("The input channel is ",prev_t)
        new_batch_norm_2d_layer=nn.BatchNorm2d(num_features=prev_t).to(device)
        setattr(layer,n,new_batch_norm_2d_layer)

        print()
      elif isinstance(module,nn.Conv2d):
        print("The input channel is ",prev_t)
        new_conv = nn.Conv2d(
                    in_channels=prev_t,  # Set to prev_t which is the updated number of input channels
                    out_channels=module.out_channels,
                    kernel_size=module.kernel_size,
                    stride=module.stride,
                    padding=module.padding,
                    dilation=module.dilation,
                    groups=module.groups,
                    bias=module.bias is not None
                ).to(device)



        setattr(layer, n, new_conv)
        prev_t=module.out_channels
  elif isinstance(layer,nn.Sequential):
    for n,module in layer.named_children():
      if isinstance(module, Bottleneck):
        print("Pruning only for ",module)
        prev_out=prune_bottleneck(module,prev_t)
        prev_t+=prev_out

    #     # break
        # print("new")
  # if times==1:
  #    break



In [None]:
# for name, module in model.named_modules():
#   print()
#     if name.startswith('dense1') or name.startswith('dense2') or name.startswith('dense3'):
#         print(f" {name}:")
#         for sub_name, sub_module in module.named_children():
#             print(f" 1{sub_name}: {sub_module}")


In [None]:
model

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate, momentum=0.9, weight_decay=5e-4)
scheduler = StepLR(optimizer,step_size=1,gamma=0.7)

model=train(model, criterion, optimizer, scheduler, train_loader, valid_loader, num_epochs, lambda_l1 )



Started training  



  self.pid = os.fork()


In [None]:
len(model.dense1[0].conv1.weight.data)

In [None]:
model.dense1[0].conv1.weight.data

In [None]:
model.dense1[0].conv1.weight.data.shape

In [None]:
model.dense1[0].conv2.weight.data.shape

In [None]:
model.dense1[1].conv1.weight.data.shape

In [None]:
model.dense1[1].conv2.weight.data.shape

In [None]:
input_tensor = torch.randn(1, 3, 572, 572)


def print_size(module, input, output):
    print(f"{module.__class__.__name__} output size: {output.size()}")


for layer in model.children():
    layer.register_forward_hook(print_size)


with torch.no_grad():
    output = model(input_tensor)

print("Final output size:", output.size())

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

# Define a sample input tensor
sample_input = torch.randn(1, 3, 32, 32)  # Assuming input size is 32x32 with 3 channels

# Initialize your DenseNet model
# model = DenseNet()

# Perform a forward pass
output = model(sample_input)
print("Output shape: ", output.shape)

# Define a simple loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Dummy target
target = torch.tensor([0])  # Assuming a single-class output for simplicity

# Perform a forward pass
output = model(sample_input)

# Compute loss
loss = criterion(output, target)
print("Loss: ", loss.item())

# Perform a backward pass
optimizer.zero_grad()
loss.backward()

# Check gradients
for name, param in model.named_parameters():
    if param.grad is not None:
        print(f"{name} gradient: {param.grad.sum().item()}")

# Perform a training step
optimizer.step()

# Ensure no errors occurred during the forward and backward passes
print("Model forward and backward pass successful.")