# Custom Concept Sampler

In [None]:
import os
import pandas as pd
import numpy as np
import random

In [None]:
training_df = pd.read_csv('../data/training_data.csv')

In [None]:
class ConceptSampler(Sampler):
    
    '''
    The ConceptSampler returns a randomized list of image indices in which the specified concept is present.
    The randomized list is an iterable object
    
    Args:
    training_df: dataframe containing the training data
    concept: a string of the concept
    
    Returns:
    Iterable object of list containing indices
    '''
    
    def __init__(self, training_df, concept):
        self.temp_df = training_df
        self.concept = concept
        
        #check if the concept is present as a concept
        if self.concept not in self.temp_df.columns:
            raise TypeError('Concept %s is not in dataset' % self.concept)
        
        #get the index values of all images in which the concepts is present
        self.idxs = self.temp_df.loc[self.temp_df[self.concept] == 1, 'image'].index.tolist() 
        
    def __iter__(self):
        
        #randomize the list of indices
        self.random_list = random.sample(self.idxs, len(self.idxs))
        return iter(self.random_list)
    
    def __len__(self):
        return len(self.idxs) # the amount of images in which the concept is present
