In [8]:
import torch
from tqdm import tqdm_notebook
import pickle
import nbimporter
import os

from p1_GetVectorFromImage import GetVector

In [3]:
def MakeVectorDictionary(nn, out_layer, dataset, index_values, file_name=None):
    """
    Creates a dictionary with the row indices as key and the output tensor as value. The CNN is set to evaluation mode
    after which every image is run through the CNN. The output tensor of the 'avgpool' layer is stored as value in the 
    dictionary. After every image has been passed through the network, the dictionary is saved on the local harddrive.
    
    Args:
        nn (torchvision.model): a CNN which has a 'avgpool' layer before the fully connected layers
        out_layer: a layer of the nn, from which the output will be extracted
        dataset (torch.utils.data.Dataset): a dataset made with the function 'BrodenDataset'
        index_values (list): a list of integers refering to the row indices of the images labelled with a specific concept
                            in the training dataframe
        file_name (.pickle): default is None, the dictionary will not be stored.
                            If specified, the name with which the dictionary will be stored on the local harddrive. 
                            It must end with '.pickle'. The file will be stored in the data folder
        
    
    Returns:
        dictionary
    """
    
    #check if a GPU is available, otherwise run it on CPU
    device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
    nn.to(device)
    # set the neural network to evaluation mode
    nn.eval()
    
    # create a dictionary in which the tensors will be stored
    vector_dict = {}
    
    # loop through every index, push the image belonging to the index through the network and extract the tensor from
    # the desired output layer
    for idx in tqdm_notebook(index_values):
        try:
            sample = dataset[idx]
            img = sample['image']
            if torch.cuda.is_available():
                img = img.float().cuda()
            else:
                img = img.float()
            vector_img = GetVector(img, nn, out_layer)
            vector_dict[str(idx)] = vector_img
        except:
            print('index:', idx)
            
    # write the dictionary to the local harddrive
    if file_name is not None:
        with open(os.path.join('../data/', file_name), 'wb') as handle:
            pickle.dump(vector_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    
    return vector_dict