In [None]:
%run Data_Constructor.ipynb

## 1.&nbsp;Model


**define model**

In [6]:
class DoubleConv(nn.Module):
    """
    A double convolution module used to extract features.

    Args:
        in_channels (int): number of input channels. For example for an
            input of shape (batch_size, 3, img_size, img_size) in_channels
            is 3.
        out_channels (int): number of output_channels desired. For example
            if the desired output shape is (batch_size, 3, img_size, img_size)
            in_channels is 3.
        kernel_size (int): A kernel of shape (kernel_size, kernel_size)
            will be applied to the imgs during both Conv2d layers.
        bias (bool): whether or not to add a bias to the Conv2d layers.
    """

    def __init__(self, in_channels, out_channels, kernel_size=3, bias=True):

        super().__init__()
        self.conv = nn.Sequential(
            
            nn.Conv2d(
                in_channels,
                out_channels,
                kernel_size,
                stride=1,
                padding="same",
                bias=bias,
            ),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(
                out_channels,
                out_channels,
                kernel_size,
                stride=1,
                padding="same",
                bias=bias,
            ),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(inplace=True),
        )

    def forward(self, x):
        return self.conv(x)


class ObjectCounter(nn.Module):
    """An object counting model that uses multiple conv layers and then
    two fully connected layers to determine how many instances of different
    classes of objects (shapes) are in an image.

    Args:
        img_size (int): model will take images of shape
            (3, img_size, img_size).
        in_channels (int): number of input channels. For example for an
            put of shape (batch_size, 3, img_size, img_size) in_channels
            is 3.
        num_classes (int): number of output classes desired. The output
            shape of the model will be (batch_size, num_classes).
        features (List[int]): A list specifying the number of features to
            be used in each DoubleConv layer. Note that for the model to
            work the image_size must be divisable by {(2** len(features))}.
        kernel_size (int): A kernel of shape (kernel_size, kernel_size)
            will be applied to the imgs during both Conv2d layers.
        fc_intermediate_size (int): Size of the output of the first
            fully connected layer (fc1) and size of the input of the second
            fully connected layer (fc2).
        bias (bool): whether or not to add a bias to the Conv2d layers.
        track_x_shape (bool): whether or not to track the shape of x.
    """

    def __init__(
        self,
        img_size=256,
        in_channels=3,
        num_classes=3,
        features=[16, 32],
        kernel_size=3,
        fc_intermediate_size=10,
        bias=True,
        track_x_shape=False,
    ):

        super().__init__()

        self.img_size = img_size
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.features = features
        self.kernel_size = kernel_size
        self.fc_intermediate_size = fc_intermediate_size
        self.bias = True
        self.track_x_shape = track_x_shape

        final_size = self.img_size / (2 ** len(self.features))

        if (final_size % 1) != 0:
            raise ValueError(f"image_size must be divisable by {(2** len(features))}.")

        self.final_size = int(final_size)
        self.final_feature_size = self.features[-1]
        self.fc_in_size = self.final_feature_size * self.final_size**2

        self.sigmoid = nn.Sigmoid()
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(self.fc_in_size, self.fc_intermediate_size)
        self.fc2 = nn.Linear(self.fc_intermediate_size, self.num_classes)
        self.feature_extractor = nn.ModuleList()

        if self.track_x_shape:
            self.x_shape_tracker = []

        # Feature extractor
        for feature in features:
            self.feature_extractor.append(
                DoubleConv(
                    in_channels, feature, kernel_size=self.kernel_size, bias=self.bias
                )
            )
            in_channels = feature

    def forward(self, x):
        self.track_shape( x, "input shape")
        for i, module in enumerate(self.feature_extractor):

            x = module(x)
            self.track_shape( x, f"feature_extractor layer {i}")
            x = self.pool(x)
            self.track_shape( x, f"max pool layer {i}")

        x = x.reshape(x.shape[0], -1)
        self.track_shape( x, "reshape")

        x = self.fc1(x)
        self.track_shape( x, "fc1")

        x = self.relu(x)
        self.track_shape( x, "relu")

        x = self.fc2(x)
        self.track_shape( x, "fc2 (output shape)")

        return x

    def track_shape(self, x, description):
        if self.track_x_shape:
            self.x_shape_tracker.append((f"{description}:\n\t {x.shape}"))

        return None

**sanity check model**

In [7]:
img_size = 256
batch_size = 13

x = torch.rand((batch_size, 3, img_size,img_size))

model = DoubleConv(3, 4, kernel_size = 11)
logits = model(x)
print(f"Input shape: {x.shape} " )
print(f"Output shape: {logits.shape}")

Input shape: torch.Size([13, 3, 256, 256]) 
Output shape: torch.Size([13, 4, 256, 256])


In [8]:
model = ObjectCounter(img_size = img_size, in_channels = 3, num_classes = 2, features=[7,11,15], kernel_size = 7, track_x_shape = True )
logits = model(x)

print("model.x_shape_tracker: \n")
for shape in model.x_shape_tracker:
    print(shape, "\n")

print(f"Example output: {logits[0]}\n")

print(f"model.final_size: {model.final_size}")
print(f"model.final_feature_size: {model.final_feature_size}")
print(f"model.fc_in_size: {model.fc_in_size}\n")

print(model.feature_extractor[0].conv[0])
print(model.feature_extractor[0].conv[0].weight.shape)
print(model.feature_extractor[0].conv[0].bias.shape)

model.x_shape_tracker: 

input shape:
	 torch.Size([13, 3, 256, 256]) 

feature_extractor layer 0:
	 torch.Size([13, 7, 256, 256]) 

max pool layer 0:
	 torch.Size([13, 7, 128, 128]) 

feature_extractor layer 1:
	 torch.Size([13, 11, 128, 128]) 

max pool layer 1:
	 torch.Size([13, 11, 64, 64]) 

feature_extractor layer 2:
	 torch.Size([13, 15, 64, 64]) 

max pool layer 2:
	 torch.Size([13, 15, 32, 32]) 

reshape:
	 torch.Size([13, 15360]) 

fc1:
	 torch.Size([13, 10]) 

relu:
	 torch.Size([13, 10]) 

fc2 (output shape):
	 torch.Size([13, 2]) 

Example output: tensor([-0.2813, -0.0374], grad_fn=<SelectBackward0>)

model.final_size: 32
model.final_feature_size: 15
model.fc_in_size: 15360

Conv2d(3, 7, kernel_size=(7, 7), stride=(1, 1), padding=same)
torch.Size([7, 3, 7, 7])
torch.Size([7])


## 2.&nbsp;Overfit a Small Batch


**overfit function**

In [9]:
def overfit(imgs, labels, model, optimizer,  device,  epochs= 100):

    loss_fn = nn.MSELoss().to(device)

    model = model.to(device)
    model.train()

    # Formatting for input to model.
    imgs_normed = imgs.float() / 255.0
    imgs_normed = imgs_normed.to(device)
    labels = labels.float().to(device)

    for epoch in range(epochs):

        logits = model(imgs_normed).to(device)
        loss = loss_fn(logits, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if epoch %25 == 0:
            print(f"epoch: {epoch}")
            print(f"loss: {loss:.6f}\n")

    return None

**small batch to overfit**

In [10]:
obj_counting_dm = ObjectCounting_DM(
                                train_val_size = 10,
                                img_size = 256,
                                batch_size=4,
                                train_val_split = (.8,.2),
                                shapes_per_image = (0,4),
                                class_probs=(1,1,1),
                                rand_seed= 123456,
                                object_count = True,
                                class_map={
                                    0: {"name": "background", "gs_range": (200, 255), "target_color": (255,255,255)},
                                    1: {"name": "rectangle", "gs_range": (0, 100), "target_color": (255, 0, 0)},
                                    2: {"name": "line", "gs_range": (0, 100), "target_color": (0, 255, 0)},
                                    3: {"name": "donut", "gs_range": (0, 100), "target_color": (0, 0, 255)},
                                },
                                dataloader_shuffle={"train": False, "val": False, "test": False},
                                    )

# Visualize and understand some random images
obj_counting_dm.setup(stage = "fit")
dataiter = iter(obj_counting_dm.train_dataloader())

imgs, labels = next(dataiter)

Setting up fit stage.


**run overfit function**

In [11]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = ObjectCounter(img_size = imgs.shape[-1], in_channels = 3, num_classes = 3, features=[4,8], fc_intermediate_size = 10, bias = True )
optimizer = torch.optim.Adam(model.parameters(), lr=3e-6)
overfit(imgs, labels, model, optimizer,  device, epochs = 1000)

epoch: 0
loss: 0.847755

epoch: 25
loss: 0.559450

epoch: 50
loss: 0.390993

epoch: 75
loss: 0.285023

epoch: 100
loss: 0.219508

epoch: 125
loss: 0.176054

epoch: 150
loss: 0.144657

epoch: 175
loss: 0.120050

epoch: 200
loss: 0.100053

epoch: 225
loss: 0.083733

epoch: 250
loss: 0.070526

epoch: 275
loss: 0.059936

epoch: 300
loss: 0.051575

epoch: 325
loss: 0.045040

epoch: 350
loss: 0.039987

epoch: 375
loss: 0.036107

epoch: 400
loss: 0.033141

epoch: 425
loss: 0.030858

epoch: 450
loss: 0.029378

epoch: 475
loss: 0.028485

epoch: 500
loss: 0.027824

epoch: 525
loss: 0.027244

epoch: 550
loss: 0.026691

epoch: 575
loss: 0.026151

epoch: 600
loss: 0.025618

epoch: 625
loss: 0.025090

epoch: 650
loss: 0.024567

epoch: 675
loss: 0.024049

epoch: 700
loss: 0.023535

epoch: 725
loss: 0.023026

epoch: 750
loss: 0.022521

epoch: 775
loss: 0.022023

epoch: 800
loss: 0.021529

epoch: 825
loss: 0.021042

epoch: 850
loss: 0.020560

epoch: 875
loss: 0.020085

epoch: 900
loss: 0.019615

epoch:

**get predictions**

In [13]:
model.eval()

# Formatting for input to model.
imgs_normed = imgs.float() / 255.0
imgs_normed = imgs_normed.to(device)
with torch.no_grad():
    logits = model(imgs_normed)

preds = logits.round().long().to(device)
print(f"\nlogits:\n {logits}")
print(f"\npreds:\n {preds}")
print(f"\nlabels:\n {labels}")


logits:
 tensor([[-0.0582,  0.1710,  0.2199],
        [ 0.8337,  1.0292,  1.1120],
        [ 1.2014,  0.9485,  1.8048],
        [-0.0730,  1.0167,  0.0680]], device='cuda:0')

preds:
 tensor([[0, 0, 0],
        [1, 1, 1],
        [1, 1, 2],
        [0, 1, 0]], device='cuda:0')

labels:
 tensor([[0, 0, 0],
        [1, 1, 1],
        [1, 1, 2],
        [0, 1, 0]])


**visualize overfit predictions**

In [14]:
%matplotlib inline
@interact
def vizualize_targets_predictions(
                                 show_labels = widgets.Checkbox(value=False,description='display_labels'),
                                 show_preds = widgets.Checkbox(value=False,description='display_preds'),
                                 display_size = widgets.IntSlider(value=30,min=2,max=50,step=1),
                                ):

    result_images = [imgs[i] for i in range(len(imgs))]

    if  show_labels:
        result_images = [add_labels(img, label, obj_counting_dm.class_map, object_count = True) for img, label in zip(result_images, labels)]
    if  show_preds:
        result_images = [add_labels(img, pred, obj_counting_dm.class_map, object_count = True, pred = True) for img, pred in zip(result_images, preds)]

    grid = make_grid(result_images)
    show(grid, figsize = (display_size,display_size))

interactive(children=(Checkbox(value=False, description='display_labels'), Checkbox(value=False, description='…

## 3.&nbsp;Build Lightning Module



In [36]:
class LightningObjCounter(pl.LightningModule):

    def __init__(self, in_channels=3, num_classes=3, img_size = 256, features = [4,8], kernel_size = 3, fc_intermediate_size = 10, bias = True, lr = 5e-6):
        super().__init__()

        # LM attributes.
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.img_size = img_size
        self.features = features
        self.kernel_size = kernel_size
        self.fc_intermediate_size = fc_intermediate_size
        self.bias = True
        self.lr = lr

        # Log hyperparameters.
        self.save_hyperparameters()

        # Metrics.
        self.train_acc = torchmetrics.Accuracy(num_classes=4,task="multiclass", multidim_average = 'samplewise')
        self.train_f1 = torchmetrics.F1Score(num_classes=4,task="multiclass", multidim_average = 'samplewise')
        self.val_acc = torchmetrics.Accuracy(num_classes=4,task="multiclass", multidim_average = 'samplewise')
        self.val_f1 = torchmetrics.F1Score(num_classes=4,task="multiclass", multidim_average = 'samplewise')

        # Loss function.
        self.loss = nn.MSELoss()

        # Model.
        self.model = ObjectCounter(img_size = self.img_size,
                                    in_channels = self.in_channels,
                                    num_classes = self.num_classes,
                                    features = self.features,
                                    kernel_size = self.kernel_size,
                                    fc_intermediate_size = self.fc_intermediate_size,
                                    bias = self.bias )

        # Sample input. Used for logging the model graph.
        self.example_input_array = torch.rand(16,3,self.img_size, self.img_size)

    def forward(self, imgs):

        imgs_normed = imgs.float() / 255.0
        return self.model(imgs_normed)

  # Utility function.
    def custom_histogram_adder(self):
        # iterating through all parameters
        for name,params in self.named_parameters():
            self.logger.experiment.add_histogram(name,params,self.current_epoch)

    def mse_loss(self, logits, labels):
        labels = labels.float()
        return self.loss(logits, labels)

    def training_step(self, train_batch, batch_idx):

        # Grab images and labels from batch.
        imgs, labels = train_batch

        logits = self.forward(imgs)

        # Calculate loss.
        loss = self.mse_loss(logits, labels)

        preds = logits.clamp(min = 0).round().long()
        # Log step metrics.
        self.train_acc(preds, labels)
        self.train_f1(preds, labels)

        self.log('Loss/train_loss', loss)
#         self.log('Acc/train_acc', self.train_acc, on_step = True)
#         self.log('F1/train_f1', self.train_f1, on_step = True)

        return loss


    def validation_step(self, val_batch, batch_idx):

        # Log histograms.
        if batch_idx ==0:
            self.custom_histogram_adder()

        # Grab images and labels from batch.
        imgs, labels = val_batch
        # labels = F.one_hot(labels, num_classes = self.max_class_occurance+1)

        logits = self.forward(imgs)

        # Calculate loss.
        loss = self.mse_loss(logits, labels)

        preds = logits.clamp(min = 0).round().long()
        # Log step metrics.
        self.val_acc(preds, labels)
        self.val_f1(preds, labels)


        self.log('Loss/val_loss', loss)
#         self.log('Acc/val_acc', self.val_acc, on_step = True)
#         self.log('F1/val_f1', self.val_f1, on_step = True)

    def configure_optimizers(self):
        # Remember to make the optim input self.model.parameters()!
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.lr)
        return optimizer



## 4.&nbsp; Train



**define trainer**

In [56]:
# Create a datamodule.
obj_counting_dm = ObjectCounting_DM(
                                train_val_size = 500,
                                img_size = 256,
                                train_val_split = (.9,.1),
                                test_size = 100,
                                batch_size = 16,
                                dataloader_shuffle={"train": True, "val": True, "test": False},
                                shapes_per_image = (0,3),
                                class_probs=(1,1,0),
                                rand_seed= 23456,
                                class_map={
                                    0: {"name": "background", "gs_range": (240, 255), "target_color": (0,0,0)},
                                    1: {"name": "rectangle", "gs_range": (0, 50), "target_color": (255, 0, 0)},
                                    2: {"name": "line", "gs_range": (50, 100), "target_color": (0, 255, 0)},
                                    3: {"name": "donut", "gs_range": (100, 125), "target_color": (0, 0, 255)},
                                },
                                object_count = True,
                                )

# Create callback for ModelCheckpoints.
checkpoint_callback = ModelCheckpoint(filename='{epoch:02d}', save_top_k = 50, monitor = "Loss/val_loss", every_n_epochs = 1)

# Create Instance of Lightning Module.
obj_counting_lm = LightningObjCounter(in_channels = 3,
                                        num_classes = 2,
                                        img_size = obj_counting_dm.img_size,
                                        features = [8,12],
                                        kernel_size = 3,
                                        fc_intermediate_size = 10,
                                        lr = 1e-6)

# Define Logger.
logger = TensorBoardLogger("tb_logs", name="object_counting", log_graph = True)

# -----------Set device.------------------
device = "gpu" if torch.cuda.is_available() else "cpu"

# Create an instance of a Trainer.
trainer = pl.Trainer(logger = logger, callbacks = [checkpoint_callback], accelerator = device, max_epochs = 15, log_every_n_steps = 5)

# Fit.
trainer.fit(obj_counting_lm , obj_counting_dm)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Setting up fit stage.


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type               | Params | In sizes          | Out sizes
---------------------------------------------------------------------------------
0 | train_acc | MulticlassAccuracy | 0      | ?                 | ?        
1 | train_f1  | MulticlassF1Score  | 0      | ?                 | ?        
2 | val_acc   | MulticlassAccuracy | 0      | ?                 | ?        
3 | val_f1    | MulticlassF1Score  | 0      | ?                 | ?        
4 | loss      | MSELoss            | 0      | ?                 | ?        
5 | model     | ObjectCounter      | 494 K  | [16, 3, 256, 256] | [16, 2]  
---------------------------------------------------------------------------------
494 K     Trainable params
0         Non-trainable params
494 K     Total params
1.978     Total estimated model params size (MB)


Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=15` reached.


**tensorboard**

In [57]:
# Note: If using firefox turn off enhanced tracking protection for the following to work.
%load_ext tensorboard
%tensorboard --logdir tb_logs/

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 8044), started 0:18:48 ago. (Use '!kill 8044' to kill it.)

## 9.&nbsp; Visualize Predictions


**get test set**

In [40]:
obj_counting_dm.setup(stage = "test")
test_dataiter = iter(obj_counting_dm.test_dataloader())

Setting up test stage.


**execute following cell again to see more test data**

In [42]:
imgs, labels = next(test_dataiter)

**visualize predictions on test set**

In [60]:
%matplotlib inline
version = 0

@interact
def vizualize_labels_preds( epoch = widgets.IntSlider(value=14,min=0,max=49,step=1),
                                 show_labels = widgets.Checkbox(value=False,description='display_labels'),
                                 show_preds = widgets.Checkbox(value=True,description='display_preds'),
                                 display_size = widgets.IntSlider(value=30,min=2,max=50,step=1),
                        ):
    
    PATH = 'tb_logs/object_counting/version_{}/checkpoints/epoch={:02d}.ckpt'.format(version, epoch)
    # Determine the device and move the model to it
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    loaded_lm = LightningObjCounter.load_from_checkpoint(PATH)
    loaded_lm.to(device)
    
    with torch.no_grad():
        # Also move the inputs to the same device as the model 
        imgs_device = torch.stack([img.to(device) for img in imgs])
        logits = loaded_lm(imgs_device)
    preds = logits.round().long()
    
    result_images = [imgs[i] for i in range(len(imgs))]
    
    if  show_labels:
        result_images = [add_labels(img, label, obj_counting_dm.class_map, object_count = True) for img, label in zip(result_images, labels)]
    if  show_preds:
        result_images = [add_labels(img, pred, obj_counting_dm.class_map, pred = True, object_count = True) for img, pred in zip(result_images, preds)]
    
    grid = make_grid(result_images)
    show(grid, figsize = (display_size,display_size))

interactive(children=(IntSlider(value=14, description='epoch', max=49), Checkbox(value=False, description='dis…

**visualize first conv layer**

In [63]:
%matplotlib inline
version = 0

@interact
def vizualize_labels_preds( epoch = widgets.IntSlider(value=14,min=0,max=49,step=1),
                            display_size = widgets.IntSlider(value=20,min=2,max=50,step=1),
                        ):

    PATH = 'tb_logs/object_counting/version_{}/checkpoints/epoch={:02d}.ckpt'.format(version, epoch)
    loaded_lm = LightningObjCounter.load_from_checkpoint(PATH)

    kernels = loaded_lm.model.feature_extractor[0].conv[0].weight.data.clone()

    kernel_list = [kernel/ kernel.mean() for kernel in kernels]

    show(kernel_list, figsize = (display_size, display_size))

    print(f"Shape of first conv layer weights: {kernels.shape}")

interactive(children=(IntSlider(value=14, description='epoch', max=49), IntSlider(value=20, description='displ…