In [2]:
import os

import cv2
from dask import delayed
import numpy as np
from matplotlib import pyplot as plt
import sklearn
from sklearn.linear_model import Perceptron


##  Integral Image

Computes rectangle integrals of an image for fast computation of Harr Filters.

In [3]:
def compute_intg_image(img):
    """
    Computes matrix for quick computation of integrals.
    """
    # Create a matrix of zeros with the same dimensions as the input image
    intg_img = np.zeros(img.shape, dtype=np.int64)
    
    # Iterate over each pixel in the input image
    for i in range(img.shape[0]):
        for j in range(img.shape[1]):
            # Calculate the sum of all pixels above and to the left of the current pixel
            if i > 0:
                intg_img[i, j] += intg_img[i-1, j]
            if j > 0:
                intg_img[i, j] += intg_img[i, j-1]
            if i > 0 and j > 0:
                intg_img[i, j] -= intg_img[i-1, j-1]
            
            # Add the value of the current pixel to the integral image
            intg_img[i, j] += img[i, j]
    
    # Return the computed integral image
    return intg_img

def get_rect(intg_img, si, sj, ei, ej):
    # Calculate the sum of all pixels within the specified rectangular region
    result = intg_img[ei, ej]
    if si > 0:
        result -= intg_img[si-1, ej]
    if sj > 0:
        result -= intg_img[ei, sj-1]
    if si > 0 and sj > 0:
        result += intg_img[si-1, sj-1]
    
    # Return the computed sum of pixels
    return result

In [4]:
def load_folder(folder):
    imgs = []
    for file in os.listdir(folder):
        if ".pgm" in file:
            imgs.append(cv2.imread(folder+"/"+file,-1))
    return imgs

In [5]:
def compute_haar_features(intg_img, haar_filters, feature_size):
    haar_features = []
    
    for filter in haar_filters:
        filter_feature = []
        for i in range(intg_img.shape[0]-filter[0]+1):
            for j in range(intg_img.shape[1]-filter[1]+1):
             
                if filter[2]:
                    result = ( get_rect(intg_img, i, j, i+filter[0]-1, j+filter[1]//2-1) 
                    - get_rect(intg_img, i, j+filter[1]//2, i+filter[0]-1, j+filter[1]-1) )/((filter[0])*filter[1])
                else:
                    result = ( get_rect(intg_img, i, j, i+filter[0]//2-1, j +filter[1]-1) 
                    - get_rect(intg_img, i+filter[0]//2, j, i+filter[0]-1, j+filter[1]-1) )/((filter[0])*filter[1])
                filter_feature.append(result)
        haar_features.append(filter_feature)

    return haar_features
def generate_haar_filters(detector_size=19):
    haar_feature_size = 0
    # (r, c, is_vert)
    haar_filters= []
    for i in range(1, detector_size+1):
        for j in range(1, detector_size+1):
            if i % 2 == 0:
                haar_filters.append((i,j, 0))
                haar_feature_size += (19-i+1) * (19-j+1)
            if j % 2 == 0:
                haar_filters.append((i,j, 1))
                haar_feature_size += (19-i+1) * (19-j+1)
    return haar_feature_size, haar_filters

def get_haar_feature_extractor(detector_size):
    haar_feature_size, haar_filters = generate_haar_filters(detector_size)
    @delayed
    def get_haar(intg_img):
        return compute_haar_features(intg_img, haar_filters, haar_feature_size)
    return get_haar

haar_feature_extractor = get_haar_feature_extractor(19)


In [7]:
train_x_true = load_folder("train/face")[:200]
train_x_false = load_folder("train/non-face")[:200]
train_y_true = [1] * len(train_x_true)
train_y_false = [0] * len(train_x_false)

train_x = train_x_true + train_x_false
train_y = train_y_true + train_y_false

train_x = delayed(haar_feature_extractor(compute_intg_image(img)) for img in train_x)

train_x = train_x.compute(scheduler='single-threaded')
train_y = np.array(train_y)

In [64]:
def Adaboost(train_x, train_y, d=1, T=100):
    face_count = 0
    non_face_count = 0
    for i, label in enumerate(train_y):
        if label:
            face_count += 1
        else:
            non_face_count += 1
    w = np.array([1/face_count if train_y[i] else 1/non_face_count for i in range(len(train_y))])

    classifiers = []
    feature_sel = []
    beta = np.zeros(T)

    def select_features(x_data, x_sel):
        return [np.hstack([x[f] for f in x_sel]) for x in x_data]

    for t in range(T):
        w = w / np.sum(w)
        while True: 
            model = Perceptron()
            m_sel = np.random.permutation(len(train_x[0]))[:d]
            m_train_x = select_features(train_x, m_sel)
            model.fit(m_train_x, train_y, sample_weight=w)
            e = np.abs(train_y - model.predict(m_train_x))
            
            beta[t] = np.sum(w * e)
            if beta[t] < 0.5:
                
                feature_sel.append(m_sel)
                w = w * np.power(beta[t], 1-e)
                classifiers.append(model)
                break
            
        
    alpha = np.log(1/beta)
    predictions = np.sum( np.array([c.predict(select_features(train_x, f_sel)) for c, f_sel in zip(classifiers, feature_sel) ]) * alpha.reshape(-1,1), axis=0)[train_y==1]
    threshold = np.min(predictions)
    def model(x):
        predictions = np.sum( np.array([c.predict(select_features(x, f_sel)) for c, f_sel in zip(classifiers, feature_sel)]) * alpha.reshape(-1,1), axis=0)
        return predictions >= threshold
    return model

    

In [160]:
def predict(data):
    predictions = np.sum( np.array([c.predict(data) for c in classifiers]) * alpha.reshape(-1,1), axis=0)
    return predictions >= threshold


model = Adaboost(x, y)
y_pred = model(x)
def train_classifier(n=20,x,y):
    for j in range(n):
        







In [155]:
test_x_true = load_folder("train/face")[200:220]
test_x_false = load_folder("train/non-face")[200:220]
test_y_true = [1] * len(test_x_true)
test_y_false = [0] * len(test_x_false)

test_x = test_x_true + test_x_false
test_y = test_y_true + test_y_false

test_x = delayed(haar_feature_extractor(compute_intg_image(img)) for img in test_x)
test_x = np.array(test_x.compute(scheduler='single-threaded'))
test_y = np.array(test_y)



In [172]:
predict(test_x)

array([ True,  True,  True,  True, False,  True,  True,  True, False,
        True,  True,  True,  True,  True,  True,  True, False,  True,
        True,  True, False, False,  True, False, False, False,  True,
       False, False,  True, False,  True, False,  True, False,  True,
        True, False, False, False])

TypeError: len() of unsized object