# Training Mean

This notebook calculates the mean (by color channel) of the training images. This value put in the keras-frcnn config.py file for network training and testing.

In [1]:
import os
import CONSTANTS as c
import IMAGEUTILS as im
import XMLUTILS as xu
import numpy as np

In [2]:
class MeanFinder():
    """
    calculates the mean (by color channel) of the specifed directories.
    This value put in the keras-frcnn config.py file for network training
    and testing. 
    """
    
    def __init__(self, allowed_dirs):
        self.allowed_dirs = allowed_dirs # only include if from these dirs
        
        
    def clear_DS(self, file, filepath):
        """
        clears the .DS_Store files that OSX Finder automatically creates
        
        :param file: the name of the file
        :type  file: string
        :param filepath: the full filepath of the file
        :type  filepath: string
        """
        if file == '.DS_Store':
            os.unlink(filepath)
        
        
    def process_annotation(self, annotation, img_filepath):
        """
        computes the mean of the image at img_filepath if the image
        contains objects
        
        :param annotation: a pascal VOC XML annotation
        :type  annotation: Element

        :param img_filepath: the filepath of the image on training machine
        :type  img_filepath: string
        """
        if len(annotation.findall('object')) > 0:
            return np.mean(im.imread(img_filepath), axis=(0,1))
        
            
    def file_standards(self, file, filepath):
        """
        checks to make sure that annotation should be included
        
        The file must be an xml file
        The filepath must not be from xml_preds
        One of the allowed directory names must be in the filepath
        
        :param file: the name of the file
        :type  file: string
        :param filepath: the full filepath of the file
        :type  filepath: string
        
        :returns: whether or not the file/filepath should be included
        :rtype  : bool
        """
        is_xml   = bool(file[-4:] == c.XML_EXT)
        not_pred = bool('xml_preds' not in filepath)
        allowed_dir = any([dir_ in filepath for dir_ in self.allowed_dirs])
        return (all([is_xml, not_pred, allowed_dir]))
        
        
    def format_filepath(self, filepath):
        """
        change from xml file to img file
        
        :returns: the formatted filepath
        :rtype  : string
        """
        return filepath.replace('xmls', 'imgs').replace(c.XML_EXT, c.IMG_EXT)
        
        
    def process_files(self):
        """
        walk through all files and create a list of means
        
        :returns: the mean of the images
        :rtype  : ndarray
        """
        means = []
        #iterate through all files
        for root, dirs, files in os.walk(c.BASE_DIR):
            for file in sorted(files):
                filepath = os.path.join(root, file)

                # only allow images in the standardized format that have been annotated
                if self.file_standards(file, filepath):
                    annotation = xu.open_xml_file(filepath)
                    img_filepath = self.format_filepath(filepath)
                    means.append(self.process_annotation(annotation, img_filepath))
                    
        means = [mean for mean in means if mean is not None]
        return(np.mean(means, axis=(0)))

In [3]:
train_mean = MeanFinder(c.TRAIN_VID_KEYS)
print("Train mean: " + str(train_mean.process_files()))

Train mean: [ 102.82855653  126.34588601  104.67462882]
