In [1]:
import os
import numpy as np
import torch
import random
import nbimporter
import pickle

from tqdm import tqdm_notebook
from sklearn import linear_model

from p1_MakeVectorDictionary import MakeVectorDictionary

Importing Jupyter notebook from p1_MakeVectorDictionary.ipynb
Importing Jupyter notebook from p1_GetVectorFromImage.ipynb


In [2]:
def GetCav(concept, num_concept_imgs, num_counter_imgs, training_dataframe,
          basenet, out_layer, broden_dataset):
    
    
    # create filenames to store the dictionaries of tensors
    concept_filename = concept + '_' + str(num_concept_imgs) + 'imgs.pickle'
    counter_filename = 'counter_' + concept + '_' + str(num_counter_imgs) + 'imgs.pickle'
    
    
    
    #### GET RANDOM CONCEPT IMAGES
    
    # get the row indices of images which are labelled with the concept
    concept_idxs = training_dataframe.loc[training_dataframe[concept] == 1, 'image'].index.tolist()
    #print('The concept "%s" is present in %d images' % (concept, len(concept_idxs)))
    
    # if the amount of random concept images is larger than the total amount of images labelled with the concept,
    # raise a ValueError
    if num_concept_imgs > len(concept_idxs):
        raise ValueError ('The number of specified concept images (%d) is larger than the total amount of images labelled with the concept (%d)' 
                          % (num_concept_imgs, len(concept_idxs)))
        
    # create a dictionary storing the tensors if it does not exist yet, otherwise read the dictionary
    if not os.path.exists(os.path.join('../data/', concept_filename)):
        
        random_concept_imgs = random.sample(concept_idxs, num_concept_imgs)
        #print('Selecting random concept images')
        
        concept_vector_dict = MakeVectorDictionary(basenet, out_layer, broden_dataset, random_concept_imgs, concept_filename)
    else:
        with open(os.path.join('../data/', concept_filename), 'rb') as handle:
            concept_vector_dict = pickle.load(handle)
        
        random_concept_imgs = list(map(lambda x: int(x), list(concept_vector_dict.keys())))
    
    
    
    #### GET RANDOM COUNTER IMAGES
    #print('Selecting random counter images')
    
    # create a dictionary storing the tensors of the random counter examples
    if not os.path.exists(os.path.join('../data/', counter_filename)):
        # get row indices of images not labelled with the concept
        counter_idxs = [i for i in range(len(training_dataframe)) if i not in concept_idxs]
    
        # sample random images for the counter images
        random_counter_imgs = random.sample(counter_idxs, num_counter_imgs)
        
        counter_vector_dict = MakeVectorDictionary(basenet, out_layer, broden_dataset, random_counter_imgs, counter_filename)
    else:
        with open(os.path.join('../data/', counter_filename), 'rb') as handle:
            counter_vector_dict = pickle.load(handle)
        
        random_counter_imgs = list(map(lambda x: int(x), list(counter_vector_dict.keys())))
    
    
    ##### TRAIN A LINEAR CLASSIFIER
    
    # The linear classifier requires a matrix of np.arrays to fit a model on. 
    # A tensor is initialized to which the other tensors can be added
    
    concept_keys = list(concept_vector_dict.keys())
    concept_tensors = concept_vector_dict[concept_keys[0]].unsqueeze(0)
    for i in range(1, num_concept_imgs):
        temp_concept_tensor = concept_vector_dict[concept_keys[i]].unsqueeze(0)
        concept_tensors = torch.cat((concept_tensors, temp_concept_tensor),0)
    
    counter_keys = list(counter_vector_dict.keys())
    counter_tensors = counter_vector_dict[counter_keys[0]].unsqueeze(0)
    for i in range(1, num_counter_imgs):
        temp_tensor = counter_vector_dict[counter_keys[i]].unsqueeze(0)
        counter_tensors = torch.cat((counter_tensors, temp_tensor),0)
    
    # concatenate all tensors to the same array
    X = torch.cat((concept_tensors, counter_tensors), 0)
    X = X.numpy()
    
    # create labels for the tensors
    # 1 = concepts, 0 = not concept
    y = np.ones(num_concept_imgs)
    y = np.append(y, np.zeros(num_counter_imgs))
    
    # create a linear model and fit it to the data
    lm = linear_model.SGDClassifier()
    lm.fit(X, y)
    
    cav = lm.coef_
    
    return random_concept_imgs, random_counter_imgs, cav, lm