## Library imports and iPython settings

In [2]:
# Full library imports
import random
import numpy as np
import scipy.ndimage as scimg
import matplotlib.pyplot as plt
import skimage.color as skc
# Selective library imports
from pylab import rcParams
from numpy import histogram, interp, cumsum, diff
from matplotlib import cm
from scipy.ndimage import imread
from mpl_toolkits.mplot3d import Axes3D
from sklearn.linear_model import LogisticRegressionCV
#ipython setting

rcParams['figure.figsize'] = (10, 6)

# Histogram equalization

1. To get $ f: D \rightarrow \left[ 0, M\right]$, simply scale the cumulative distribution function $H_f \left( v \right)$ with the factor M. This gives: $\psi = M \cdot H_f \left( v \right)$

2. The following program  takes in a simple image and runs a histogram equalization algorithm. After this the original picture and the equalised picture are shown together with their histograms. The image is read with scipy ensuring it is read into memory as a uint8 array.

In [None]:
BINS = 100
IM_SIZE = 255

def histogramEqualization(f, bins=BINS):
	his, be = histogram(f, range=(0,IM_SIZE), bins=bins)
	hist = his.astype(float)/sum(his)
	return interp(f, be[1:], cumsum(hist)), his, be


#Read figure and equalise
img = scimg.imread("cameraman1.png")
img_eq, his, be = histogramEqualization(img)

f, axarr = plt.subplots(2, 2)

#plot histogram of original data
axarr[1, 0].bar((be[:-1] + be[1:]) / 2, his, align='center', width=diff(be))
axarr[1, 0].set_xticks([be[x] for x in range(0, len(be), int(BINS/10))])

# Show original image
axarr[0, 0].imshow(img, cmap='gray')

#plot histogram of equalized image
his, be = histogram(img_eq, range=(0,1), bins=BINS)
axarr[1, 1].bar((be[:-1] + be[1:]) / 2, his, align='center', width=diff(be))
axarr[1, 1].set_xticks([be[x] for x in range(0, len(be), int(BINS/10))])

# Show equalized image
axarr[0, 1].imshow(img_eq, cmap='gray')

plt.show()

# Skin color detection

1. Plot a 3d-scatter plot of RGB values of skin and non-skin colors. Plots the points corresponding to skin color in green and the points corresponding to non-skin color in red.

In [None]:
# Amount of samples taken in plotting 3D scatter
SAMPLES = 500

# Randomly takes n points from a numpy array
def sample(ar, n):

	i = 0
	sample = 0
	# Create empty holder array
	samples = np.zeros(n)

	while i < len(ar) and sample < n:

		# 50% chance to select item, not critical in this
		# implementation (just used for speeding up program)
		if random.random() > .5:

			samples[sample] = ar[i]
			sample += 1

		i += 1

	return samples

#Read images
# f = imread('SkinColor/FacePhoto/0520962400.jpg')
# m = imread('SkinColor/GroundT_FacePhoto/0520962400.png')
f = imread('SkinColor/FamilyPhoto/buck_family.jpg')
m = imread('SkinColor/GroundT_FamilyPhoto/buck_family.png')

skincolors = f[m[:,:,0]==255]
nonskincolors = f[m[:,:,0]==0]

# Take a selection of SAMPLES points to plot in 3D plot for
# both skin- and non-skin colors
x, y, z = np.split(skincolors, 3, 1)
x = sample(x, SAMPLES)
y = sample(y, SAMPLES)
z = sample(z, SAMPLES)

t, u, v = np.split(nonskincolors, 3, 1)
t = sample(x, SAMPLES)
u = sample(y, SAMPLES)
v = sample(z, SAMPLES)

# 3D scatter plot of randomly selected points
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(x, y, z, s=30, c='green')
ax.scatter(t, u, v, s=30, c='red')
ax.set_xlabel('R')
ax.set_ylabel('G')
ax.set_zlabel('B')
plt.show()

Train classifier for use with skin color detection

In [None]:
# Convert image to lab color convention taking only the ab dimensions
f_lab = skc.rgb2lab(f)[::, ::, 1::]
# Extract skin- and non-skin colors from image data
sc_lab = f_lab[m[:,:,0]==255]
nsc_lab = f_lab[m[:,:,0]==0]

# Scatter plot of a selection of skin and non-skin colors
plt.scatter(sc_lab[::25, 0], sc_lab[::25, 1], s=1, marker=',', color='green')
plt.scatter(nsc_lab[::25, 0], nsc_lab[::25, 1], s=1, marker=',', color='red')
plt.xlabel("a")
plt.ylabel("b")
plt.show()

# Combine the skin and non-skin color feature vectors into a single item
# And do this for the classes corresponding to the colors
color = np.vstack((sc_lab, nsc_lab))
target = np.concatenate((np.ones(len(sc_lab)),np.zeros(len(nsc_lab))))

# Take half the data to learn the classifier and use the other half to
# test it
learn_color = color[1::2]
test_color = color[0::2]
learn_target = target[1::2]
test_target = target[0::2]
logregr = LogisticRegressionCV()
logregr.fit(learn_color, learn_target)

print("Score of logistic regression:")
print(logregr.score(test_color, test_target))

image_colors = f_lab.reshape((-1,2))
predict_skin = logregr.predict(image_colors).reshape(f_lab.shape[:2])

plt.subplot(121)
plt.imshow(f)
plt.subplot(122)
plt.imshow(predict_skin)
plt.show()

To get a generally applicable classifier we need to train it with more than just one picture. Also, using the same picture we trained with as a test is questionable to say the least. Though the princimple has been proven the need for more training data is necessary.

In the following program a loop is called for all pictures in the directory and then they are used for training the classifier. A subset of pixels is used to reduce the large amount of data. Then the classifier is tested on another folder of images which will check the validity of the classifier.

In [14]:
from os import listdir, getcwd
from os.path import isfile, join

REL_PATH_ORG = 'SkinColor/FacePhoto/'
REL_PATH_MASK = 'SkinColor/GroundT_FacePhoto/'

def read_image(file, sample_interval):
    
    f = imread(REL_PATH_ORG + file)
    if file[-4:] == ".jpg":
        m = imread(REL_PATH_MASK + file[:-4] + '.png')
    elif file[-4:] == "jpeg":
        m = imread(REL_PATH_MASK + file[:-4] + 'png')
        
    # Convert image to lab color convention taking only the ab dimensions
    # and every 100th element to reduce sheer amount of data
    f_lab = skc.rgb2lab(f)[::, ::sample_interval, 1::]
    # Extract skin- and non-skin colors from image data
    sc_lab = f_lab[m[:, ::sample_interval, 0]==255]
    nsc_lab = f_lab[m[:, ::sample_interval, 0]==0]
        
    color = np.vstack((sc_lab, nsc_lab))
    target = np.concatenate((np.ones(len(sc_lab)),np.zeros(len(nsc_lab))))
    
    return color, target
    
    

# Reads image data from current directory (image data should be saved here)
def read_image_folder():
    
    mypath = getcwd()
    original = mypath + '/' + REL_PATH_ORG
    original_list = [f for f in listdir(original)]
    original_list.sort()
    
    # Set up initial data vector to whcih other will be appended
    data_vector, target_vector = read_image(original_list[1], 100)
    
    for file in original_list[1:]:
        
        c, t = read_image(file, 100)
        
        data_vector = np.concatenate((data_vector, c))
        target_vector = np.concatenate((target_vector, t))
    
    return data_vector, target_vector

class skin_color_classifier:
    
    def __init__(self, data, target, classifier=LogisticRegressionCV):
        
        self.classifier = classifier
        self.data = data
        self.target = target
        
    
        
## TODO: get sampling of images working

## TODO: train classifier with smaples

## TODO: put family photos into calssifier and check if working


clrs, trgts = read_image_folder()

logregr = skin_color_classifier(clrs, trgts)