<a href="https://colab.research.google.com/github/Motadeh/Human-detection/blob/main/Machine_learning_and_Computer_vison_Human_detection.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Human detection Project

This project work has two sections. The first section shows human detection using my own HOG feature extraction function and the second section shows human detection using high level off-shelf functions.

Based on experiment carried out, the cell size of 3*3 produces a better result on both implementations

## Human Detection with built HOG feature extraction function

In this section, HOG feature extraction function was created from scratch

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import cv2
import glob
import numpy as np
import os

In [None]:
import numpy as np
import scipy
import scipy.signal as sig
import matplotlib.pyplot as plt
from skimage import io, color

### Dataset preparation

This section handles creating both negative and positive patches.

Positive patches are created from Image set containing just humans and converted to 2-D 128*64 image patches.

In [None]:
path = glob.glob('/content/drive/MyDrive/Colab Notebooks/pedestrians128x64/*.ppm')

In [None]:
path = glob.glob('/content/drive/MyDrive/Colab Notebooks/pedestrians128x64/*.ppm')

positive = [] 
for item in path:
  positive_image = cv2.imread(item)
  postive_test = np.array(positive_image)
  to_gray2 = cv2.cvtColor(postive_test, cv2.COLOR_BGR2GRAY)
  positive.append(to_gray2)
  

numpy_positive = np.array(positive)
np.save("positive.npy", numpy_positive)
positive_patches = numpy_positive

Negative patches for were created from skimage exisitng image data and are reshaped and resized to into 2-D 128*64 image

In [None]:
from skimage import data, color

imgs_to_use = ['immunohistochemistry', 'hubble_deep_field']
images = [color.rgb2gray(getattr(data, name)()) for name in imgs_to_use]

In [None]:
from sklearn.feature_extraction.image import PatchExtractor
from skimage import transform
import numpy as np

def extract_patches(img, N, scale=1, patch_size=positive_patches[0].shape):
    extracted_patch_size = tuple((scale * np.array(patch_size)).astype(int))
    extractor = PatchExtractor(patch_size=extracted_patch_size,
                               max_patches=N, random_state=0)
    patches = extractor.transform(img[np.newaxis])
    if scale != 1:
        patches = np.array([transform.resize(patch, patch_size)
                            for patch in patches])
    return patches

negative_patches = np.vstack([extract_patches(im, 1000, scale)
                              for im in images for scale in [0.5, 1.0, 2.0]])
negative_patches.shape

(6000, 128, 64)

### HOG Feature Extraction

✅ Calculate Gradient

✅ Calculate Magnitude and Orientation

✅ Calculate HOG in 8*8 cells in 9 bins

✅ Normalize gradients in 8x8 cells (2x2 cells of 8*8 block)

Reference code for a single pixel HOG extraction was sourced and adjusted to fit an entire image and list of images.

Comparing with the high-level off-shelf functions used in second section below, using a 3 by 3 cell size is seen to produce a better result than a 2*2 cell size.

**Calculate Gradient**

Sobel operator is used to compute the gradient along x and y axis.



In [None]:
from skimage import io, color

def extractGradient(img):

  sobel_x = np.array([[-1, 0, 1],[-2, 0, 2],[-1, 0, 1]])
  sobel_y = np.array([[1, 2, 1], [0, 0, 0], [-1, -2, -1]])

  G_x = sig.convolve2d(img, sobel_x, mode='same') 
  G_y = sig.convolve2d(img, sobel_y, mode='same') 

  return (G_x, G_y)

**Extract features**

**calculate_magnitude_hist_cell** is used to calculate the magnitude and orientation of 8*8 pixel. The magnitude is distributed between two bin values closest to it.

**distribute_bucket_vals** is used to determine which two bins the magnitude should occupy based on the orientation and how the magnitude will be distributed between the two bins.

**calculate_magnitude_hist_block** is used to normalize the gradients

In [None]:
from functools import reduce

N_BUCKETS = 9
CELL_SIZE = 8  # 8x8 pixels
BLOCK_SIZE = 2  # 2x2 cells

def distribute_bucket_vals(m, d, bucket_vals):
    left_bin = int(d / 20.)
    # Also checks cases where the direction is between [160, 180)
    right_bin = (int(d / 20.) + 1) % N_BUCKETS
    assert 0 <= left_bin < right_bin < N_BUCKETS

    left_val= m * (right_bin * 20 - d) / 20
    right_val = m * (d - left_bin * 20) / 20
    bucket_vals[left_bin] += left_val
    bucket_vals[right_bin] += right_val
    

def calculate_magnitude_hist_cell(loc__x, loc__y, gradient, eps=1e-5):
    # (loc__x, loc__y) defines the top left corner of the target cell.
    cell__x = gradient[0][loc__x:loc__x + CELL_SIZE, loc__y:loc__y + CELL_SIZE]
    cell__y = gradient[1][loc__x:loc__x + CELL_SIZE, loc__y:loc__y + CELL_SIZE]
    magnitudes = np.sqrt(cell__x * cell__x + cell__y * cell__y)
    # np.seterr(divide='ignore', invalid='ignore')
    directions = np.abs(np.arctan(cell__y / (cell__x + eps)) * 180 / np.pi)

    buckets = np.linspace(0, 180, N_BUCKETS + 1)
    bucket_vals = np.zeros(N_BUCKETS)

    list(map(
        lambda m: distribute_bucket_vals(m[0], m[1], bucket_vals), 
        zip(magnitudes.flatten(), directions.flatten())
    ))
    return bucket_vals

# This is used for 2*2 block size
def calculate_magnitude_hist_block(loc__x, loc__y, gradient):
    # (loc__x, loc__y) defines the top left corner of the target block.
    # this implements the 2 by 2 block size
    return reduce(
        lambda arr1, arr2: np.concatenate((arr1, arr2)),
        [calculate_magnitude_hist_cell(x, y, gradient) for x, y in zip(
            [loc__x, loc__x + CELL_SIZE, loc__x, loc__x + CELL_SIZE],
            [loc__y, loc__y, loc__y + CELL_SIZE, loc__y + CELL_SIZE],
        )]
    )
# This is used for 3*3 block size
# def calculate_magnitude_hist_block(loc__x, loc__y, gradient):
#     # (loc__x, loc__y) defines the top left corner of the target block.
#     # this implements the 2 by 2 block size
#     return reduce(
#         lambda arr1, arr2: np.concatenate((arr1, arr2)),
#         [calculate_magnitude_hist_cell(x, y, gradient) for x, y in zip(
#             [loc__x, loc__x + CELL_SIZE, loc__x + (2*CELL_SIZE), loc__x, loc__x + CELL_SIZE, loc__x + (2*CELL_SIZE), loc__x, loc__x + CELL_SIZE, loc__x + (2*CELL_SIZE)],
#             [loc__y, loc__y, loc__y, loc__y + CELL_SIZE, loc__y + CELL_SIZE, loc__y + CELL_SIZE, loc__y + (2*CELL_SIZE), loc__y + (2*CELL_SIZE), loc__y + (2*CELL_SIZE)],
#         )]
#     )

This section is a function that loops through all patches(positve and negative) to extract HOG features. This function calls both the extractGradient and Get_magnitude_hist_block functions and returns an array of dataset of image features.

In [None]:
def extract_all(image):

  ydata_array = []
  xdata_array = []
  bucket_names_array = []

  gradient = extractGradient(image)

  img = image


  for x in range(0, img.shape[0]):
    for y in range(0, img.shape[1]):
      ydata = calculate_magnitude_hist_block(x, y, gradient)
      ydata = ydata / np.linalg.norm(ydata)

      xdata = range(len(ydata))
      bucket_names = np.tile(np.arange(N_BUCKETS), BLOCK_SIZE * BLOCK_SIZE)

      ydata_array.append(ydata)
      xdata_array.append(xdata)
      bucket_names_array.append(bucket_names)

      # This commented part plots a histogram showing the distirbution of bin values in the 4 blocks(2*2)
      # plt.figure(figsize=(10, 3))
      # plt.bar(xdata, ydata, align='center', alpha=0.8, width=0.9)
      # plt.xticks(xdata, bucket_names * 20, rotation=90)
      # plt.grid(ls='--', color='k', alpha=0.1)
      # plt.tight_layout()

  ydata_array = reduce(
    lambda arr1, arr2: np.concatenate((arr1,arr2)),
    ydata_array

  )
  return ydata_array

This section calls the above function and saves the result in X. Not all positive and negative patches were used here in other to reduce run-time has the function as a high run-time

In [None]:
from itertools import chain

X = np.array([extract_all(image) for image in chain(positive_patches, negative_patches)])

In [None]:
y = np.zeros(X.shape[0])
y[0:positive_patches.shape[0]] = 1

### Training
This section handles splitting, training and testing of the dataset

The X dataset and y labels are split into training data and test data

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=10)

SVC is then used to create a classifier using the training dataset.

In [None]:
from sklearn.svm import SVC
human_detection = SVC()
human_detection.fit(X_train, y_train)

In [None]:
human_detection.score(X_test, y_test)

1.0

### Human Detection
Here an image containing human and other objects is used to evaluate the system.

In [None]:
import skimage
from skimage import io, color
from skimage.io import imread_collection


load_pattern = '/content/drive/MyDrive/Colab Notebooks/street_video assessed_test /*.png'
frames=imread_collection(load_pattern)

skimage.io.imshow(frames[0])

test_image = frames[0]

test_image_gray_0 = cv2.cvtColor(test_image, cv2.COLOR_RGB2GRAY)


In [None]:
from google.colab.patches import cv2_imshow
cv2_imshow(test_image_gray_0)

In [None]:
def sliding_window(image, stepSize, windowSize):
  for y in range(0, image.shape[0]-windowSize[0], stepSize):
    for x in range(0, image.shape[1]-windowSize[1], stepSize):
      patch = image[y:y+windowSize[0],x:x+windowSize[1]]
      yield(x,y),patch

In [None]:
indices,patches = zip(*sliding_window(test_image_gray_0,10,positive_patches[0].shape))

Features of the new image are extracted

In [None]:
hog_2 = np.array([extract_all(patch) for patch in patches])


Trained dataset is now used to label which parts of the image contains humans

In [None]:
import matplotlib.pyplot as plt

labels = human_detection.predict(hog_2)

fig, ax = plt.subplots(1,1, figsize=(12,20))
ax.imshow(test_image_gray_0[190:350, :])
ax.axis('off')

Ni, Nj = positive_patches[0].shape
indices =np.array(indices)

track = []

for i, j in indices[labels == 1]:
    ax.add_patch(plt.Rectangle((i, j), Nj, Ni, edgecolor='red',alpha=0.3, lw=2, facecolor='none')) 
    track.append([i,j])  


### Tracking

Here, human frames from the human detection image are tracked.

In the last code for Human detection, the index positions of the detected humans are saved to be used for tracking.

Indexes of detected humans used for bounding box

In [None]:
track

CSRT tracking algorithm is used here.

>the box location will be updated later with new frame

In [None]:
tracker = cv2.TrackerCSRT_create()
init_box = [210,170,64,128]

tracker.init(frames[0], init_box)

In [None]:
import numpy as np
n_frames = len(frames) - 1
boxes = np.zeros((n_frames,4), dtype='int')

for i in range(n_frames):
  ok, box = tracker.update(frames[i+1])
  if ok:
    boxes[i] = box

In [None]:
x,y,w,h = init_box
vis_image = cv2.rectangle(frames[0],(x,y),(x+w,y+h),(255,0,0),2)
skimage.io.imshow(vis_image)

In [None]:
video_output_file = '/content/drive/MyDrive/Colab Notebooks/test2.avi'
cc = cv2.VideoWriter_fourcc(*'XVID')
writer = cv2.VideoWriter(video_output_file,cc,30,(vis_image.shape[1],vis_image.shape[0]),True)

writer.write(vis_image[:,:,::-1]) #BGR-> RGB

for i,(x,y,w,h) in enumerate(boxes):
  if x!=0:
    vis_image = cv2.rectangle(frames[i],(x,y),(x+w,y+h),(255,0,0),2)
    writer.write(vis_image[:,:,::-1])

writer.release()

### Confusion Matrix

The confusion matrix for the dataset is calculated

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, human_detection.predict(X_test))

In [None]:
import seaborn as sb
plt.figure(figsize=(10,7))
sb.heatmap(cm, annot=True)
plt.xlabel('Predicted')
plt.ylabel('Truth')

## Human detection using high-level off-shelf libraries
This code-base uses high-level off-shelf libraries to detect humans in a video recording.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import cv2
import glob
import numpy as np
import os

### Dataset preparation
dataset preparation

Negative and Positive patches creation

Here the negative and positive patches are imported and created respectively and preprocessing is done on both dataset. The positive patches are first created and the negative patches are created using the shape of positive patches because the shape and format have to be the same.

Image set containing just humans are imported and converted to 2-D 128*64 image patches.

In [None]:
path = glob.glob('/content/drive/MyDrive/Colab Notebooks/pedestrians128x64/*.ppm')

positive_image_set = [] 
for item in path:
  positive_image = cv2.imread(item)
  postive_test = np.array(positive_image)
  to_gray2 = cv2.cvtColor(postive_test, cv2.COLOR_BGR2GRAY)
  positive_image_set.append(to_gray2)
  
positive_patches = np.array(positive_image_set)

Negative patches for were created from skimage exisitng image data and are reshaped and resized to into 2-D 128*64 image

In [None]:
from skimage import data, color

imgs_to_use = ['immunohistochemistry', 'hubble_deep_field']
images = [color.rgb2gray(getattr(data, name)()) for name in imgs_to_use]

In [None]:
from sklearn.feature_extraction.image import PatchExtractor
from skimage import transform
import numpy as np

def extract_patches(img, N, scale=1, patch_size=positive_patches[0].shape):
    extracted_patch_size = tuple((scale * np.array(patch_size)).astype(int))
    extractor = PatchExtractor(patch_size=extracted_patch_size,
                               max_patches=N, random_state=0)
    patches = extractor.transform(img[np.newaxis])
    if scale != 1:
        patches = np.array([transform.resize(patch, patch_size)
                            for patch in patches])
    return patches

negative_patches = np.vstack([extract_patches(im, 1000, scale)
                              for im in images for scale in [0.5, 1.0, 2.0]])

### Feature Extraction

This section includes the extraction of features using sklearn feature

X which is an array of the features extracted from the images is gotten and the y value is retrieved using the first value for the shape of X which is vasically the number of labels from the dataset. The size(shape) of both X and Y have to be the same.

After series of tests, cell size(cells_per_block) of (3,3) was used as it produced better result

In [None]:
from skimage import feature
from itertools import chain

test_images = []
fds = []

X = np.array([feature.hog(image, orientations=9, pixels_per_cell=(8, 8),
                    cells_per_block=(3, 3) ) for image in chain(positive_patches, negative_patches)])
y = np.zeros(X.shape[0])
y[0:positive_patches.shape[0]] = 1


### Training
This section handles splitting, training and testing of the dataset

The X dataset and y labels are split into training data and test data

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=4)

SVC is then used to create a classifier using the training dataset.

In [None]:
from sklearn.svm import SVC
human_detection = SVC()
human_detection.fit(X_train, y_train)

SVC()

In [None]:
human_detection.score(X_test, y_test)

0.9994222992489891

### Human Detection
Here an image containing human and other objects is used to evaluate the system.

In [None]:
from skimage import io, color
import skimage
from skimage.io import imread_collection

load_pattern = '/content/drive/MyDrive/Colab Notebooks/street_video assessed_test /*.png'
frames=imread_collection(load_pattern)

skimage.io.imshow(frames[0])

test_image = frames[0]

test_image_gray = cv2.cvtColor(test_image, cv2.COLOR_RGB2GRAY)


Sliding window is used to scan the image while also creating patches of the same size the training dataset from the new imported image

In [None]:
def sliding_window(image, stepSize, windowSize):
  for y in range(0, image.shape[0]-windowSize[0], stepSize):
    for x in range(0, image.shape[1]-windowSize[1], stepSize):
      patch = image[y:y+windowSize[0],x:x+windowSize[1]]
      yield(x,y),patch

In [None]:
indices,patches = zip(*sliding_window(test_image_gray,10,positive_patches[0].shape))

Features of the new image are extracted

In [None]:
picture_hog_2 = np.array([feature.hog(patch, orientations=9, pixels_per_cell=(8, 8),
                    cells_per_block=(3, 3)) for patch in patches])

Trained dataset is now used to label which parts of the image contains humans

In [None]:
import matplotlib.pyplot as plt

labels = human_detection.predict(picture_hog_2)

fig, ax = plt.subplots(1,1, figsize=(12,20))
ax.imshow(test_image)
ax.axis('off')

Ni, Nj = positive_patches[0].shape
indices =np.array(indices)

track = []


for i, j in indices[labels == 1]:
    #ax.add_patch(plt.Rectangle((i,j), 30, 45,  edgecolor='red',alpha=0.3, lw=2, facecolor='none'))  
    ax.add_patch(plt.Rectangle((i, j), Nj, Ni, edgecolor='red',alpha=0.3, lw=2, facecolor='none'))
    track.append([i,j])  


### Tracking

Here, human frames from the human detection image are tracked.

In the last code for Human detection, the index positions of the detected humans are saved to be used for tracking.

Indexes of detected humans used for bounding box

In [None]:
track

CSRT tracking algorithm is used here.

>the box location will be updated later with new frame

In [None]:
tracker = cv2.TrackerCSRT_create()
init_box = [210,170,64,128]

tracker.init(frames[0], init_box)

In [None]:
import numpy as np
n_frames = len(frames) - 1
boxes = np.zeros((n_frames,4), dtype='int')

for i in range(n_frames):
  ok, box = tracker.update(frames[i+1])
  if ok:
    boxes[i] = box

In [None]:
x,y,w,h = init_box
vis_image = cv2.rectangle(frames[0],(x,y),(x+w,y+h),(255,0,0),2)
skimage.io.imshow(vis_image)

In [None]:
video_output_file = '/content/drive/MyDrive/Colab Notebooks/test2.avi'
cc = cv2.VideoWriter_fourcc(*'XVID')
writer = cv2.VideoWriter(video_output_file,cc,30,(vis_image.shape[1],vis_image.shape[0]),True)

writer.write(vis_image[:,:,::-1]) #BGR-> RGB

for i,(x,y,w,h) in enumerate(boxes):
  if x!=0:
    vis_image = cv2.rectangle(frames[i],(x,y),(x+w,y+h),(255,0,0),2)
    writer.write(vis_image[:,:,::-1])

writer.release()

### Confusion Matrix

The confusion matrix for the dataset is calculated

In [None]:
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test, human_detection.predict(X_test))

In [None]:
import seaborn as sb
plt.figure(figsize=(10,7))
sb.heatmap(cm, annot=True)
plt.xlabel('Predicted')
plt.ylabel('Truth')