In [1]:
from __future__ import absolute_import, print_function
import time
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torchvision.transforms as T
from torchvision.datasets import ImageFolder
from torchvision.models import resnet18, ResNet18_Weights
from torch.utils.data import DataLoader
from tqdm import tqdm
from PIL import Image

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print(device)

  from .autonotebook import tqdm as notebook_tqdm


cuda:0


In [2]:
weights = None  # ResNet18_Weights.DEFAULT
model = resnet18(weights=weights)

# reshape last layer.
in_features = model.fc.in_features
model.fc = nn.Linear(in_features, 10)
model.load_state_dict(torch.load(r"C:\Users\Noel\Documents\THESIS\PYTHON-THINGIES\Saved Model Parameters\resnet18_torchvision\test40_epoch198.pth"))
# model.load_state_dict(torch.load(r"C:\Users\Noel\Documents\THESIS STUFF\PYTHON-THINGIES\Saved Model Parameters\resnet18_torchvision\test39_epoch348.pth"))
# Set model to evaluation mode and send to device
model.to(device).eval()

layers_of_interest = [name for name, _ in model.named_modules() if "conv" in name or "fc" in name]

print(layers_of_interest)


['conv1', 'layer1.0.conv1', 'layer1.0.conv2', 'layer1.1.conv1', 'layer1.1.conv2', 'layer2.0.conv1', 'layer2.0.conv2', 'layer2.1.conv1', 'layer2.1.conv2', 'layer3.0.conv1', 'layer3.0.conv2', 'layer3.1.conv1', 'layer3.1.conv2', 'layer4.0.conv1', 'layer4.0.conv2', 'layer4.1.conv1', 'layer4.1.conv2', 'fc']


This cell works, but is technically wrong and would not 'fly' if the notebook was to be converted to an actual python script.

In [3]:
# layer_activations = {}

# def hook_wrapper(name: str):
#     def hook_fn(module: nn.Module, input: torch.Tensor, output: torch.Tensor) -> None:
#         layer_activations[name] = output
#     return hook_fn


# for name, layer in model.named_modules():
#     if name in layers_of_interest:
#         layer.register_forward_hook(hook_wrapper(name))

To combat this, I made the hooks into objects of a hook class, holding both the output and the hook function, thus creating dictionary entries 
of a key/value pair of name/Hook_Layer object.

In [4]:
class Hook_Layer():
    def __init__(self, layer) -> None:
        self.hook = layer.register_forward_hook(self.hook_fn)
        self.output = None

    def hook_fn(self, layer, input, output):
        self.output = output
    
    def __call__(self):
        return self.output

layer_activations = {}
for name, layer in model.named_modules():
    if name in layers_of_interest:
        layer_activations[name] = Hook_Layer(layer)

print(layer_activations.values())

dict_values([<__main__.Hook_Layer object at 0x000002092A3704F0>, <__main__.Hook_Layer object at 0x000002092A373DF0>, <__main__.Hook_Layer object at 0x000002092A319930>, <__main__.Hook_Layer object at 0x000002092A31BD60>, <__main__.Hook_Layer object at 0x000002092A31B640>, <__main__.Hook_Layer object at 0x000002092A31BCA0>, <__main__.Hook_Layer object at 0x000002092A31BDF0>, <__main__.Hook_Layer object at 0x000002092A31BF70>, <__main__.Hook_Layer object at 0x000002092A31B880>, <__main__.Hook_Layer object at 0x000002092A31B490>, <__main__.Hook_Layer object at 0x000002092A31B9A0>, <__main__.Hook_Layer object at 0x000002092A31A8C0>, <__main__.Hook_Layer object at 0x000002091F09F6A0>, <__main__.Hook_Layer object at 0x000002091F09CA30>, <__main__.Hook_Layer object at 0x000002091F09FBE0>, <__main__.Hook_Layer object at 0x000002091F09FA00>, <__main__.Hook_Layer object at 0x000002091F09F250>, <__main__.Hook_Layer object at 0x0000020928C1D330>])


In [5]:
# Create a dataset class that extends ImageFolder while
# simultaneously returning a 3 way Tuple, instead of the
# original that contains 2 elements.
# For that reason we must define a new __getitem__ method.
class ImageFolderWithPaths(ImageFolder):
    """Dataset class extending ImageFolder dataset,
        returning Tuple.
        
        Returns:
                Tuple[img[torch.Tensor],
                      label[int],
                      path[str]]
        """
    def __getitem__(self, index: int):
        # Super the __getitem__ of base class
        img, label = super().__getitem__(index)
        # Extract the path of each image in the dataset
        path = self.imgs[index][0]
        # Return new tuple with 3 elements
        return (img, label, path)

    

In [6]:
batch_size = 32


transforms = T.Compose([T.Resize(224),
                        T.CenterCrop(224),
                        T.ToTensor(),
                        T.Normalize([0.5162, 0.4644, 0.3975],
                                    [0.2724, 0.2640, 0.2574])
                        ])

dataset = ImageFolderWithPaths(root=r"C:\Users\Noel\Documents\THESIS\Data\artbench-10-imagefolder-split\train",
                               transform=transforms)
dataloader = DataLoader(dataset=dataset, batch_size=batch_size, shuffle=False, num_workers=0)
print("Dataloader Initialized. Note that workers tend to take some time to \
initialize but speed up performance when loading.")
# ================================================================
data = []

with torch.no_grad():
    for images, labels, paths in tqdm(dataloader, total=len(dataloader)):
        # Send stuff to GPU if available.
        images = images.to(device)
        labels = labels.to(device)
        # Make Forward Pass.
        outputs = model(images)
        _, preds = torch.max(outputs, dim=1)
        # path_list = []
        # for path in paths:
        #     path_list.append(path)

        for i, image in enumerate(images):
            private_dict = {}
            # Three entries regarding the image identification.
            # private_dict['path'] = path_list[i]
            private_dict['path'] = paths[i]
            private_dict['class_label'] = labels[i].item()
            private_dict['prediction'] = preds[i].item()
            # Iterate over all available layers.
            for key, hook_object in layer_activations.items():
                tensor_out = hook_object()  # .output  # modified from original script to accommodate objects 
                if key == 'fc':
                    # The array to store is a 32 by 10 array, each batch
                    output = torch.unbind(tensor_out, dim=0)
                else:
                    # The array will have a final shape of 32 by num_channels
                    # in specific layer
                    b, c, _, _ = tensor_out.shape
                    output = torch.unbind(tensor_out.view(b, c, -1).mean(2), dim=0)
                private_dict[key] = output[i].cpu().numpy()
            data.append(private_dict)

Dataloader Initialized. Note that workers tend to take some time to initialize but speed up performance when loading.


100%|██████████| 1563/1563 [09:41<00:00,  2.69it/s]


In [7]:
df = pd.DataFrame(data, copy=False)
# df.head()

In [8]:
df.to_parquet('mister_bombastic.parquet')

In [9]:
df = pd.read_parquet('mister_bombastic.parquet')
df.head()

Unnamed: 0,path,class_label,prediction,conv1,layer1.0.conv1,layer1.0.conv2,layer1.1.conv1,layer1.1.conv2,layer2.0.conv1,layer2.0.conv2,...,layer2.1.conv2,layer3.0.conv1,layer3.0.conv2,layer3.1.conv1,layer3.1.conv2,layer4.0.conv1,layer4.0.conv2,layer4.1.conv1,layer4.1.conv2,fc
0,C:\Users\Noel\Documents\THESIS\Data\artbench-1...,0,0,"[0.0016563197, 0.0018733189, 0.010826878, -0.0...","[-0.026101764, 0.0067682825, -0.001344579, -0....","[-0.00785604, -0.0019926813, -0.0014436285, 0....","[0.0028614, -0.014331536, -0.046783783, -0.016...","[-0.0044514844, -0.001805535, -0.0006865202, 7...","[0.04262494, 0.0021756005, -0.013024934, -0.00...","[-0.003084354, -0.0065309214, 0.00090644066, -...",...,"[0.00067446235, 0.0005606876, -0.0015958078, -...","[-0.0044795997, -0.011539165, -0.0010799773, 0...","[0.0043403497, -0.00073351635, -0.004013875, -...","[-0.006286729, -0.006061548, -0.0017987228, -0...","[-0.0010823872, -0.00049231807, 0.00045699335,...","[0.004319747, -0.0038736386, -0.0012159029, -0...","[0.0030934964, -0.0026555464, -0.0016695211, 0...","[-0.019888747, -0.028423533, -0.033426933, -0....","[0.005542255, -0.00591258, -0.002114803, 0.003...","[2.687022, -1.8395364, 0.3305611, 0.23578836, ..."
1,C:\Users\Noel\Documents\THESIS\Data\artbench-1...,0,0,"[0.007811984, 0.0026374848, 0.029927984, 0.001...","[-0.02929811, 0.00971622, -0.0027664285, -0.04...","[-0.007225382, -0.002738186, -0.0018223127, 0....","[0.0032268965, -0.016775412, -0.03242467, -0.0...","[-0.0043967604, -0.0014821774, -0.00013861818,...","[0.022461481, -0.005948322, -0.01442291, 0.002...","[-0.0023115382, -0.0071422546, 0.0013413049, -...",...,"[0.0021233468, -0.00028703758, -0.0024945668, ...","[-0.0061367066, -0.014896164, -0.0024919945, 0...","[0.0043521025, 0.002539336, -0.0012006802, -0....","[-0.007020349, -0.0039902707, -0.0028367362, -...","[0.0014683898, -0.0028340772, 0.001536036, -0....","[0.0016917218, -0.003143768, -0.0025534672, -0...","[-0.0010998292, -0.0042141187, 0.002519661, 0....","[-0.011230027, -0.008059322, -0.043528866, -0....","[-0.0018441506, -0.008070072, 0.0032382244, 0....","[3.0294607, -2.0632322, 1.2511841, -0.77761006..."
2,C:\Users\Noel\Documents\THESIS\Data\artbench-1...,0,4,"[-0.008487643, 0.005118001, -0.014777913, -0.0...","[-0.029362127, 0.0062077283, 0.00017906123, -0...","[-0.004812627, -0.006541779, -0.0041759363, 0....","[0.0058906977, -0.01621344, -0.03043364, -0.00...","[-0.0049165604, -0.003268905, -0.00011420803, ...","[0.015007748, -0.0058819056, -0.01497363, 0.00...","[-0.001936691, -0.0068201153, 0.0016645775, -0...",...,"[0.0021193507, 0.0012197032, -0.0007168956, -0...","[0.0013595052, -0.0043439325, -0.0024235428, 0...","[0.0021163472, 0.0014380276, -0.0043710493, -0...","[-0.007848415, -0.005266671, -0.00334007, -0.0...","[-0.0010947401, -0.0029571636, -0.001889891, -...","[0.008512906, -0.0050275438, -0.005112506, -0....","[0.00855786, -0.0016940689, -0.0013939008, -0....","[0.0036366167, -0.03181726, -0.033085782, -0.0...","[0.015249659, -0.004128873, -0.00070088403, -0...","[0.8403911, -1.779323, 0.27013713, 2.655955, 2..."
3,C:\Users\Noel\Documents\THESIS\Data\artbench-1...,0,0,"[0.01826533, -0.00792668, 0.025790656, 0.02472...","[-0.024748346, 0.0059171636, -0.00017024176, -...","[-0.0051999507, -0.010966855, -0.004348401, 0....","[0.0001571894, -0.016753618, -0.034592282, -0....","[-0.005560358, -0.0030032212, -0.00011486512, ...","[0.0263832, -0.0003248272, -0.013039168, -0.00...","[-0.0040669437, -0.0071080253, -0.0005199605, ...",...,"[8.405957e-05, 0.0009021588, -0.0027369321, -0...","[-0.00351822, -0.008379908, 0.0026134152, -0.0...","[-0.0018894593, -0.0015658035, -0.00272331, -0...","[-0.002691798, -0.0050760666, 0.0008792347, -0...","[-0.0010645017, -0.0010001322, 0.003852807, -0...","[0.0021367166, -0.0026231012, -0.0015724587, -...","[0.0017275345, -0.004858299, -0.00093183515, -...","[-0.03138517, -0.0044886386, -0.033422235, -0....","[0.005188577, -0.007521276, -0.0007447905, -0....","[3.9865477, -2.3431652, 1.6100606, -0.6076985,..."
4,C:\Users\Noel\Documents\THESIS\Data\artbench-1...,0,0,"[0.027078122, -0.008443646, 0.051015243, 0.039...","[-0.026177162, 0.0065190736, 0.00024276198, -0...","[-0.0059613874, -0.008935566, -0.003007448, 0....","[0.0002229528, -0.018794015, -0.034964673, -0....","[-0.006275654, -0.0021889596, -0.00072812533, ...","[0.021942142, -0.0045654294, -0.01335301, -0.0...","[-0.0036854828, -0.0071870056, 0.00013485037, ...",...,"[0.00057739177, 0.0007380469, -0.0024360092, -...","[-0.0058170995, -0.009044081, 0.0014859766, 0....","[-0.0010266366, -0.00069762947, -0.0010764772,...","[-0.003297243, -0.0039386414, 1.9378922e-05, -...","[0.0013137296, -0.0020847747, 0.0023553122, -0...","[0.0012763804, -0.0046626395, -0.001982532, -0...","[-0.0002964988, -0.0035020828, -0.00015094524,...","[-0.03726418, -0.016626092, -0.045196183, -0.0...","[0.0011557271, -0.007666288, 0.000874853, 0.00...","[3.8069081, -2.0400584, 0.9332707, -1.0741653,..."
