In [2]:
import collections
import nbimporter

from p1_GetCosineSimilarityDistance import GetCosineSimilarityDistance

Importing Jupyter notebook from p1_GetCosineSimilarityDistance.ipynb


In [3]:
def SubsetConceptImages(reference_image_dict, concept, vector_dictionary, similarity_threshold, overlap):
    """
    Subsets images which have the concept label, based on the cosine similarity with the reference images, the similarity threshold
    and the amount of times it falls within the similarity threshold
    
    Args:
        reference_imaga_dict (dictionary): a dictionary containing concepts as key and list of reference images a value
        
        concept (string): concept in the Broden dataset
        
        vector_dictionary (dictionary): a dictionary as created by the function 'MakeVectorDictionary'
        
        similarity_threshold (float): value ranging from 1 to -1, not including '1'. This defines how similar the tensors must be compared to the 
            reference tensors. A cosine similarity closer to 1 is more similar to the reference vector. All tensors between the
            similarity threshold - 1 are selected
            
        overlap (list): the amount of times a tensor falls within the similarity threshold. E.g when 5 reference images are used and 
            the overlap = [4,5], only the tensors which are within the similarity threshold of 4 or 5 reference images are selected 
        
    Returns:
        list of indices as strings of the images matching the similarity threshold and the overlap criteria 
    """
    
    imgs_idx = []

    for img in reference_image_dict[concept]:
        imgs_idx.extend(GetCosineSimilarityDistance(img, vector_dictionary, similarity_threshold))

    cnt = collections.Counter(imgs_idx)
    concept_imgs = [key for key in cnt if cnt[key] in overlap]
    
    return concept_imgs
    
    
