## Calculate Image Stats

We want to normalize our inputs according to the pixel means and standard deviations of our images. We'll go ahead and calculate these values here so we can pass them to `.normalize()` like we do `imagenet_stats`.

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline
import os
import numpy
import cv2
import numpy as np
from tqdm import tqdm

In [10]:
train_images = os.listdir('data/train')
test_images = os.listdir('data/test')

train_images = ['data/train/' + s for s in train_images]
test_images = ['data/test/' + s for s in test_images]
all_images = train_images + test_images

In [11]:
def getMeans(paths):
    redSum = 0
    greenSum = 0
    blueSum = 0

    for path in paths:
        image = cv2.imread(path)
        
        red = np.reshape(image[:,:,0], -1)
        green = np.reshape(image[:,:,1], -1)
        blue = np.reshape(image[:,:,2], -1)

        redMean = red.mean()
        greenMean = green.mean()
        blueMean = blue.mean()

        redSum = redSum + redMean
        greenSum = greenSum + greenMean
        blueSum = blueSum + blueMean
        
    redMean = redSum/len(paths)
    greenMean = greenSum/len(paths)
    blueMean = blueSum/len(paths)
    
    return (redMean, greenMean, blueMean)

In [12]:
print(getMeans(train_images))
print(getMeans(test_images))

(177.59684112797635, 139.29448542581807, 179.12519968286247)
(177.20040400962523, 136.2245005942287, 176.3861362725302)


In [13]:
redMean, greenMean, blueMean = getMeans(all_images)

In [14]:
def getStds(paths, redMean, greenMean, blueMean):
    redSum = 0
    greenSum = 0
    blueSum = 0

    for path in paths:
        image = cv2.imread(path)
        red = np.reshape(image[:,:,0], -1)
        green = np.reshape(image[:,:,1], -1)
        blue = np.reshape(image[:,:,2], -1)

        redDiffs = red - redMean
        redSumOfSquares = np.sum(redDiffs ** 2)
        
        blueDiffs = blue - blueMean
        blueSumOfSquares = np.sum(blueDiffs ** 2)
        
        greenDiffs = green - greenMean
        greenSumOfSquares = np.sum(greenDiffs ** 2)
        
        redSum = redSum + (1/(len(paths) * 96 * 96)) * redSumOfSquares
        greenSum = greenSum + (1/(len(paths) * 96 * 96)) * greenSumOfSquares
        blueSum = blueSum + (1/(len(paths) * 96 * 96)) * blueSumOfSquares
        
    redStd = np.sqrt(redSum)
    greenStd = np.sqrt(greenSum)
    blueStd = np.sqrt(blueSum)
    
    return (redStd, greenStd, blueStd)

In [15]:
redStd, greenStd, blueStd = getStds(all_images, redMean, greenMean, blueMean)

In [16]:
print("Red: ", redMean, redStd)
print("Green: ", greenMean, greenStd)
print("Blue: ", blueMean, blueStd)

Red:  177.51475147222848 54.9887577728556
Green:  138.6587881454333 71.97104127673265
Blue:  178.55802581840624 60.98731138460821


In [17]:
cancer_stats = [[redMean, greenMean, blueMean], [redStd, blueStd, greenStd]]
print(cancer_stats)

[[177.51475147222848, 138.6587881454333, 178.55802581840624], [54.9887577728556, 60.98731138460821, 71.97104127673265]]
