# Important Modules

In [18]:
import cv2
import os
import sys
import glob
import numpy as np
import random as rand
import matplotlib.pyplot as plt
import pickle
import time

# <font color=blue> Load datasets (train and test) and save in a pickle file

### Training_dataset

In [2]:
faceDir_train     = './dataset/train/face/*.pgm'
nonfaceDir_train  = './dataset/train/non-face/*.pgm'

# 0 --> grayscale 
face_images       = [(cv2.imread(file,0),1) for file in glob.glob(faceDir_train)]
nonface_images    = [(cv2.imread(file,0),0) for file in glob.glob(nonfaceDir_train)]

face_count    = len(face_images)
nonface_count = len(nonface_images)

train_ds = face_images + nonface_images

In [3]:
print("Total face image: " + str(face_count))
print("Total non-face image: " + str(nonface_count))
print("Total image: " + str(len(train_ds)))

Total face image: 2429
Total non-face image: 4548
Total image: 6977


### Saving in a pickle file

In [4]:
with open("train_ds.pkl", 'wb') as f:
    pickle.dump(train_ds,f)

### Loading a pickle file

In [5]:
 with open("train_ds.pkl", 'rb') as f:
        t = pickle.load(f)
        print(len(t))

6977


# <font color=#9933FF> Integral Image Calculation 

In [6]:
def calcIntegral(img):
    """
    This method returns the integral image of a given image
           ------ 
          | Args:|
           ------
    img: A 2d-numpy array of original image
    
    """
    rows = img.shape[0]
    cols = img.shape[1]
    
    new_img = np.zeros((rows,cols))
    
    new_img[0][0] = img[0][0]
    
    '''
        1st row calculation
    '''
    for c in range(1,cols):
        new_img[0][c] = new_img[0][c-1] + img[0][c]
   
    '''
        1st column calculation
    '''
    for r in range(1,rows):
        new_img[r][0] = new_img[r-1][0] + img[r][0] 
    
    '''
        Other cell calculation
    '''
    for r in range(1,rows):
        for c in range(1,cols):
            new_img[c][r] = (new_img[c-1][r]+new_img[c][r-1]-new_img[c-1][r-1]) + (img[c][r])
    
    return new_img

#  <font color=red>Rectangle Class </font> - A helper class for `Haar Feature` calculation. 
### <font color=blue> A Haar Feature is a collection of Rectangle Regions.


In [7]:
class Rectangle:
    # constructor
    def __init__(self, x, y, width, height):
        self.x = x
        self.y = y
        self.width = width
        self.height = height
    
    # Return the sum of all pixels inside a rectangle for a specific integral image
    def compute_sum(self, integralImg):
        
        x = self.x
        y = self.y
        width  = self.width
        height = self.height
     
        one   = integralImg[y][x]
        two   = integralImg[y][x+width]
        three = integralImg[y+height][x]
        four  = integralImg[y+height][x+width]
        
        desiredSum = (one + four) - (two + three)
       
        return desiredSum

# <font color=orange> Creating all possible features in `19 by 19` window size </font> 
#  <font color=red>Feature Class </font> - A helper class for creating `Haar Feature`

In [8]:
class Feature:
    # constructor
    def __init__(self, image_shape):
        
        self.height, self.width = image_shape
        self.f = None  # Feature list
        self.f_values = None # Features' values for all images

    def creating_all_features(self):

        '''
          Create 5 types of Haar Features for all sizes, shapes and positions in a fixed window
        '''
        height = self.height
        width  = self.width

        # List of tuple where 1st element means List of black rectangles and 2nd element means List of white rectangles
        features = []

        for w in range(1, width+1):      # All possible width 
            for h in range(1, height+1): # All possible height

                i = 0
                while i + w < width:
                    j = 0
                    while j + h < height:

                        fixed   = Rectangle(i, j, w, h)
                        right_1 = Rectangle(i+1*w, j, w, h)
                        right_2 = Rectangle(i+2*w, j, w, h)

                        bottom_1_right_1 = Rectangle(i+1*w, j+1*h, w, h)

                        bottom_1 = Rectangle(i, j+1*h, w, h)
                        bottom_2 = Rectangle(i, j+2*h, w, h)

                        '''
                           2 Rectangle Haar Features
                        '''
                        # Horizontal  -->  fixed (white) | right_1 (black)
                        if i + 2 * w < width: 
                            features.append(([right_1], [fixed]))

                        # Vertical -->  fixed(black)
                                    #  ------------
                                    #   bottom_1(white)      
                        if j + 2 * h < height: 
                            features.append(([fixed], [bottom_1]))



                        '''
                           3 Rectangle Haar Features
                        '''
                        # Horizontal -->  fixed (white) | right_1 (black) | right_2 (white)
                        if i + 3 * w < width: 
                            features.append(([right_1], [right_2, fixed]))

                        # Vertical -->  fixed(white)
                                    #  ------------
                                    #   bottom_1(black)
                                    #  ------------
                                    #   bottom_2(white)
                        if j + 3 * h < height:
                            features.append(([bottom_1], [bottom_2, fixed]))


                        '''
                           4 Rectangle Haar Features
                        '''

                        if i + 2 * w < width and j + 2 * h < height:
                            features.append(([right_1, bottom_1], [fixed, bottom_1_right_1]))

                        j += 1
                    i += 1

        features = np.array(features)
        self.f = features
        return features
        
    def features_value(self, train_ds_integral):
        '''
          Save features' value across all training images
        '''
            
        X = np.zeros((len(self.f), len(train_ds_integral))) 
        y = np.array(list(map(lambda data: data[1], train_ds_integral)))
        
        feature_idx = 0
        
        for black_regions, white_regions in self.f:
            for k in range(len(train_ds_integral)):
                
                integral_img = train_ds_integral[k][0]
                black_value = 0
                white_value = 0
                
                for br in black_regions:
                    black_value += br.compute_sum(integral_img)
                for wr in white_regions:
                    white_value += wr.compute_sum(integral_img)
                    
                X[feature_idx][k] = (black_value - white_value)
                
            feature_idx += 1
            
        self.f_values = (X,y)    
        return X, y

    

### Saving integral images of the training dataset

In [9]:
train_ds_integral  = []

for x in range(len(train_ds)):
    integral_img = calcIntegral(train_ds[x][0])
    label        = train_ds[x][1]
    train_ds_integral.append((integral_img,label))

In [10]:
len(train_ds_integral)

6977

In [47]:
with open("train_integral_ds.pkl", 'wb') as f:
    pickle.dump(train_ds_integral,f)

## Testing the helper class & time statistics

In [12]:
# creating all features for a certain window shape
s = time.time()

f = Feature(train_ds[0][0].shape)
features = f.creating_all_features()

e = time.time()
print(e-s)

0.3631439208984375


In [13]:
len(features)

51705

In [14]:
type(f.f)

numpy.ndarray

In [15]:
len(f.f)

51705

In [16]:
(features == f.f).all()


True

In [17]:
start = time.time()

X1, y1 = f.features_value(train_ds_integral)

end = time.time()
print(end - start)


1566.442535161972


### Saving the features value of training data in pickle file

In [19]:
total_features = len(X1)
X_first_half   = X1[:total_features//2,]
X_second_half  = X1[ total_features//2:,]


In [20]:
with open("features_value_1.pkl", 'wb') as f:
    pickle.dump(X_first_half,f)

In [21]:
with open("features_value_2.pkl", 'wb') as f:
    pickle.dump(X_second_half,f)

In [22]:
with open("features_value_1.pkl", 'rb') as f:
    a = pickle.load(f)

In [23]:
with open("features_value_2.pkl", 'rb') as f:
    b = pickle.load(f)

In [24]:
X3 = np.concatenate((a,b), axis=0)

In [25]:
(X1==X3).all()

True

In [26]:
with open("y.pkl", 'wb') as f:
    pickle.dump(y1,f)

In [27]:
with open("y.pkl", 'rb') as f:
    yy = pickle.load(f)

In [28]:
(y1==yy).all()

True

In [35]:
with open("all_features.pkl", 'wb') as file:
    pickle.dump(features,file)

In [36]:
with open("all_features.pkl", 'rb') as file:
    ffff = pickle.load(file)

## Weak Classifier Modeling

In [44]:
class wc:
    # constructor
    def __init__(self, black, white, threshold, polarity):
        self.black = black
        self.white = white
        self.threshold = threshold
        self.polarity = polarity
    
    def classify(self, integral_img):
       
        black_value = 0
        white_value = 0
                
        for br in black:
            black_value += br.compute_sum(integral_img)
        for wr in white_regions:
            white_value += wr.compute_sum(integral_img)

        value = (black_value - white_value)
                
        if self.polarity*value < self.polarity*self.threshold:
            return 1
        else:
            return 0

# <center><font color=Blue> Viola-Jones Class --> Model </font> </center>

#### In a 24 by 24 window there will be 160k + Haar Features if we consider all types of Haar Features. The model will be trained on some face and no-face images. In this training the model will learn some weak classifiers. 

#### Each weak classifier is based on a Haar Feature and has a weight associted with it. 

#### <font color=red>So, the model will have 2 properties mainly : `A list of weak classifiers` and `a list of their weights`.

In [2]:
from feature import Feature
from rectangleArea import Rectangle
from weakClassifier import wc
from integral import calcIntegral

class VJ:
    # constructor
    def __init__(self, total_wc=100):
        
        self.total_wc           = total_wc
        self.classifiers        = []
        self.classifiersWeights = []
    
    def train(self, train_ds):
        
        face_count = 0
        nonface_count = 0
        image_count = len(train_ds)
        
        # face and nonface image count
        for x in range(image_count):
            label = train_ds[x][1]
            if label==1:  # Face
                face_count = face_count + 1
            else:
                nonface_count = nonface_count + 1
        
        # loading integral images of train_ds
        with open("train_integral_ds.pkl", 'rb') as f:
            train_ds_integral = pickle.load(f)
            print("Integral images are loaded from pickle file for training the model")
        
        
        w = np.zeros(image_count)  # sample_Weight
        
        for x in range(image_count):
            # Initial weight of every image (sample weight)
            if label == 1:  # Face
                w[x] = 1.0 / (2*face_count)
            else:
                w[x] = 1.0 / (2*nonface_count)
                
        f = Feature(train_ds[0][0].shape)
        features = f.creating_all_features()
                
        # load features value (X) and classification (y) from pickle file
        # saves a lot of time while tuning in training phase
        with open("features_value_1.pkl", 'rb') as f:
            a = pickle.load(f)
        with open("features_value_2.pkl", 'rb') as f:
            b = pickle.load(f)
        X = np.concatenate((a,b), axis=0) 
        with open("y.pkl", 'rb') as f:
            y = pickle.load(f)
            
        
        for wc_i in range(self.total_wc):
            
            w = w / np.linalg.norm(w)
            
            trainedWeakClassifier = self.weakClassifier_training(X, y, features, w, face_count,nonface_count)
            
            bc = None
            bacc = None
            be = float('inf')
            
            for twc in trainedWeakClassifier:
                e = 0
                acc = []
                
                for integral_image, w in zip(train_ds_integral, w):
                    
                    predicted_label = twc.classify(integral_image[0])
                    
                    true_label = integral_image[1]
                    
                    acc.append(abs(predicted_label-true_label))
                    
                    e += w * abs(predicted_label-true_label)
                    
                e = e / len(image_count)
                
                if e < be:
                    bc = twc
                    be = e
                    bacc = acc
            
            beta = be / (1.0 - be)
            
            for i in range(len(bacc)):
                w[i] = w[i] * (beta ** (1 - bacc[i]))
                
            alpha = math.log(1.0/beta)
            self.classifiersWeights.append(alpha)
            self.classifiers.append(bc)
            
        
    def weakClassifier_training(self, X, y, features, weights, face_count,nonface_count):
    
        total_pos_weights, total_neg_weights = 0, 0

        for w, label in zip(weights, y):

            if label == 1:
                total_pos_weights = total_pos_weights + w
            else:
                total_neg_weights = total_neg_weights + w

        c = []

        for index, feature in enumerate(X):

            # sort according to feature value
            sortedList = sorted(zip(weights,feature, y), key=lambda x: x[1])

            pos_img_seen = 0
            neg_img_seen = 0
            pos_img_weights = 0
            neg_img_weights = 0

            min_e = float('inf')
            bf = None
            bt = None
            bp = None

            for w, f, label in sortedList:

                error = min(neg_img_weights + total_pos_weights - pos_img_weights, pos_img_weights + total_neg_weights - neg_img_weights)

                if error < min_e:
                    min_e = error
                    bf = features[index]
                    bt = f
                    if pos_img_seen > neg_img_seen:
                        bt = 1
                    else:
                        bt = -1

                if label == 1:
                    pos_img_seen += 1
                    pos_img_weights += w
                else:
                    neg_img_seen += 1
                    neg_img_weights += w

            black_region = bf[0]
            white_region = bf[1]

            weak_c = wc(black_region,white_region,bt,bp)

            c.append(weak_c)

        return c
    
    def classify(self, image):
        total = 0
        integral_image = calcIntegral(image)
        
        for c_w, cl in zip(self.classifiersWeights, self.classifiers):
            total += c_w * cl.classify(integral_image)
            
        if total >= 0.5 * sum(self.classifiersWeights):
            return 1
        else:
            return 0

    def saveModel(self, filename):
        with open(filename+".pkl", 'wb') as f:
            pickle.dump(self, f)

    @staticmethod
    def loadModel(filename):
        with open(filename+".pkl", 'rb') as f:
            return pickle.load(f)
           