# Penultimate layer saving
This notebook aims to save the penultimate outputs in different files to create a new dataset.

## Import dependencies

In [1]:
import torch
from torchvision import datasets, transforms
from torchvision.models import resnet50, ResNet50_Weights
from torch.utils.data import DataLoader
from tqdm import tqdm
import json
import os

## Set up hyperparameters

#### For ImageNet dataset

In [2]:
root = '..\data\ImageNet_2012'# root address for ImageNet dataset
split = 'val' # train or val for ImageNet
transform = transforms.Compose([
    transforms.Resize(256), # Sets the size of the smallest side of the image to 256 
    transforms.CenterCrop(224), # Selects the square with side 224 from the center of the image
    transforms.ToTensor(), # Convert PIL image into tensor with values within [0, 1]
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), # normalizes tensor values 
])

#### For model inference and save outputs

In [3]:
batch_size = 256 # batch length to accelerate inference
num_workers = 4 # number of subprocesses 
pin_memory = True 
number_batch_size_segmentation = 200 # number of batch per files saved
segmentation_index = 0 # initiate counter for files
min_size_segmentation = number_batch_size_segmentation*batch_size # number of output tensors for each complete file

# address for outputs
saving_folder = f'../data/saved_outputs/{split}' 
if not os.path.exists(f"{saving_folder}/penultimate_layer_outputs"):
    os.makedirs(f"{saving_folder}/penultimate_layer_outputs")

## Initialization 

In [4]:
# Set device
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
# Define elements to get penultimate layer outputs
avgpool_list = torch.tensor([]).to(DEVICE) 
activation = {}
def getActivation(name):
  # get the penultimate layer activation
  def hook(model, input, output):
    activation[name] = output.detach()
  return hook

In [6]:
# dataset initialization
dataset = datasets.ImageNet(root, split=split, transform=transform)

# model initialization
model = resnet50(weights=ResNet50_Weights.DEFAULT)
model = model.to(DEVICE)

In [8]:
# Initialize getActivation
h = model.avgpool.register_forward_hook(getActivation('avgpool'))

# Initialize data loader for batch subdivision
dataloader = DataLoader(dataset, batch_size=batch_size, num_workers=num_workers, pin_memory=pin_memory) # modify parameters according to device specifications

## Evaluation

In [10]:
model.eval() # desactivate specific layers/parts of the model for evaluation
correct_predictions=0
with torch.no_grad(): # turning off gradients computation
    for images, labels in tqdm(dataloader, desc="Evaluation"):

        if avgpool_list.shape[0] >= min_size_segmentation:
            # Save tensor in optimized PyTorch format
            torch.save(avgpool_list, f'{saving_folder}/penultimate_layer_outputs/penultimate_layer_outputs_{segmentation_index}.pt')
            avgpool_list = torch.tensor([]).to(DEVICE)
            segmentation_index+=1
            
        # Copy images/labels tensors to GPU
        images = images.to(DEVICE)
        labels = labels.to(DEVICE)
        
        # Recover program predictions and reduce accuracy
        outputs = model(images)
        
        # Concatenation in avgpool_list of penultimate outputs for the current batch
        avgpool_list = torch.cat([avgpool_list, activation['avgpool'].squeeze()],dim=0) 

        # Compute correct predictions for the batch and increment correct_predictions
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == labels).sum().item()

# Save the last file
torch.save(avgpool_list, f'{saving_folder}/penultimate_layer_outputs/penultimate_layer_outputs_{segmentation_index}.pt')

Evaluation: 100%|█████████████████████████████████████████████████████████████████████████████████████████████| 196/196 [03:05<00:00,  1.05it/s]


In [16]:
# Print accuracy
accuracy = 100 * correct_predictions / len(dataset)
print(f'Accuracy: {accuracy}%')

Accuracy: 80.342%


## Save results
Outputs are already saved during evaluation.

In [14]:
# Save labels in a single file
labels_list = torch.tensor(dataset.targets)
torch.save(labels_list, f'{saving_folder}/penultimate_layer_labels.pt')

In [15]:
# Save data as JSON to easily construct a dataset
data = {
    "last_file_indice" : segmentation_index,
    "min_size_segmentation" : min_size_segmentation
}
with open(f'{saving_folder}/penultimate_layer_outputs/data.json', 'w') as js:
    json.dump(data, js)
