# Find people on pictures

Now that we have trained models, we will try to find the localization of people.

In [None]:
import pandas as pd
import numpy as np
from utility_functions import log_progress
import matplotlib.pyplot as plt

CSV_PATH = "../data/raw/RAISE_6k.csv"
MODEL_PATH = "../models/trained_model_pooling_2000"
IMAGE_SIZE = (600, 700)
NB_IMAGES_TO_DL = 50
IS_LAYER_MODEL = False
PEOPLE_PRED_VALUE = 0

## Pictures download and preprocessing

Here we will download some pictures and extract the ones with people in.

In [None]:
import requests

DATASET_SIZE = NB_IMAGES_TO_DL

First we extract from the dataframe:
- The file name
- The download link
- The people label

In [None]:
df = pd.read_csv(CSV_PATH)
df["Class"] = df["Keywords"].map(lambda keywordsStr : 'people' if 'people' in keywordsStr else 'not_people')
df["FileName"] = df["File"].map(lambda file_name : "image_" + file_name + ".tif")
df = df[["FileName", "TIFF", "Class"]]
df.info()

Then we select the correct number of images.

In [None]:
df_final = df[df["Class"] == "people"][:DATASET_SIZE]
df_final

Now we can download the images

In [None]:
def get_file_path(file_name):
    return "../data/raw/pictures/" + file_name

def download_images():
    for index, row in log_progress(df_final.iterrows(), size=df_final.shape[0]):
        if not os.path.isfile(get_file_path(row["FileName"])):
            response = requests.get(row["TIFF"])
            file = open(get_file_path(row["FileName"]), 'wb')
            file.write(response.content)
            file.close()

download_images()

## Preprocessing

The goal of this part is to preprocess data that we will use in Keras.

In [None]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.inception_v3 import preprocess_input

We must do the same preprocessing that we did for our model (essentially resize and put the color values between 0 and 1).

In [None]:
images = []
for index, row in log_progress(df_final.iterrows(), size=DATASET_SIZE):
    img = image.load_img(get_file_path(row["FileName"]), target_size=IMAGE_SIZE)
    images.append(image.img_to_array(img))
preprocessed_images = preprocess_input(np.array(images))
print(preprocessed_images.shape)

## Load and test model

Now we will load the model and keep only images that are detected with people on them.

**Remark:** depending on how the model was built, the inception layers can be accessible or hidden in a layer

In [None]:
from tensorflow import keras

First we load the model

In [None]:
model = keras.models.load_model(MODEL_PATH)
print("Model structure: ", model.summary())

In [None]:
# Nb of layers
print("Nb of layers: ", len(model.layers), "\n")

if not IS_LAYER_MODEL:
    # Entry layer
    print("Entry layer: ", model.layers[0], " with ", model.layers[0].input_shape, " input shape.\n")

    # Last layers
    print("Before before last layer: ", model.layers[-3], " with ", model.layers[-3].output_shape, " output shape.")
    print("Before last layer: ", model.layers[-2], " with ", model.layers[-2].output_shape, " output shape.")
    print("Last layer: ", model.layers[-1], " with ", model.layers[-1].output_shape, " output shape.")

else:
    # Model layers
    print("First layer: ", model.layers[0], " with ", model.layers[0].input_shape, " input shape.\n")
    print("Second layer (Inception V3): ", model.layers[1], " with ", model.layers[0].output_shape, " input shape.\n")
    print("Third layer: ", model.layers[2], " with ", model.layers[2].input_shape, " input shape.\n\n")

    # Inside Inception v3
    print("First Inception layer: ", model.layers[1].layers[0], " with ", model.layers[1].layers[0].input_shape, " input shape.\n")
    print("Before last Inception layer: ", model.layers[1].layers[-2], " with ", model.layers[1].layers[-2].output, " output.\n")
    print("Last Inception layer: ", model.layers[1].layers[-1], " with ", model.layers[1].layers[-1].output, " output.\n")

Then we predict labels from the preprocessed images (all with people).

In [None]:
predictions = model.predict(preprocessed_images, batch_size=10, verbose=1)
predictions

In [None]:
predictions = np.where(predictions > 0.5, PEOPLE_PRED_VALUE, 1-PEOPLE_PRED_VALUE)
print("Accuracy: {}".format(sum(predictions)[0]/len(predictions)))

## Build model to find the position of the people

Now we will try to find the position of the people by extracting the final dense layer weights, deleting the average pooling and the final dense layer, and adding a 1*1 convolution layer with these weights.

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Conv2D

First we extract the weights from the final layers.

In [None]:
dense_layer = model.layers[-1]
dense_layer_weights = dense_layer.get_weights()
dense_layer_weights[0].shape

Then we construct the new model:
- We remove the last two layers (pooling and dense)
- We add a 2D 1*1 convolution layer with the extracted weights

In [None]:
if not IS_LAYER_MODEL:
    formatted_weights = (np.array([[dense_layer_weights[0]]]), dense_layer_weights[1])
    prediction_outputs = Conv2D(1, (1,1), activation='sigmoid', weights=formatted_weights, name='conv_predictor')(model.layers[-3].output)
else:
    print("Not implemented!")
prediction_model = Model(inputs=model.inputs, outputs=prediction_outputs)
prediction_model.compile(optimizer='adam', loss='binary_crossentropy')

print(prediction_model.summary())

In [None]:
# Nb of layers
print("Nb of layers: ", len(prediction_model.layers), "\n")

# Entry layer
print("Entry layer: ", prediction_model.layers[0], " with ", prediction_model.layers[0].input_shape, " input shape.\n")

# Last layers
print("Before last layer: ", prediction_model.layers[-2], " with ", prediction_model.layers[-2].output_shape, " output shape.")
print("Last layer: ", prediction_model.layers[-1], " with ", prediction_model.layers[-1].output_shape, " output shape.")

## Testing step

Now let's try this model.

In [None]:
import matplotlib.pyplot as plt
import skimage.transform as st
from math import ceil

IMG_IND = 20

In [None]:
first_picture = np.array([preprocessed_images[IMG_IND]])
prediction_new_model = prediction_model.predict(first_picture)
prediction_new_model = prediction_new_model[0].reshape(prediction_new_model[0].shape[0], prediction_new_model[0].shape[1])
print(prediction_new_model.shape)
#prediction_new_model = np.where(prediction_new_model > 0.2, 1, 0)

In [None]:
print(prediction_new_model)
img = image.load_img(get_file_path(df_final.iloc[IMG_IND]["FileName"]), target_size=IMAGE_SIZE)
plt.imshow(np.asarray(img))

In [None]:
def get_img_colored(img, predictions):
    def to_red_green(x):
        if PEOPLE_PRED_VALUE == 0:
            color_x = np.array([0, 255, 0])
            color_1_x = np.array([255, 0, 0])
        else:
            color_x = np.array([255, 0, 0])
            color_1_x = np.array([0, 255, 0])
        return x * color_x + (1-x) * color_1_x
    red_green_img = np.array([[to_red_green(x) for x in row] for row in predictions])
    red_green_img_resized = 255*st.resize(red_green_img.astype('uint8'), IMAGE_SIZE)

    colored_image = 0.5*np.asarray(img) + 0.5*red_green_img_resized
    return colored_image.astype('uint8')

plt.imshow(get_img_colored(img, prediction_new_model))

No we can see the results for all the images.

In [None]:
predictions_new_model = prediction_model.predict(preprocessed_images, batch_size=10, verbose=1)

In [None]:
NB_IMAGES_PER_ROW = 4
fig, axes = plt.subplots(nrows=ceil(NB_IMAGES_TO_DL/NB_IMAGES_PER_ROW), ncols=NB_IMAGES_PER_ROW, figsize=(NB_IMAGES_PER_ROW*10,ceil(NB_IMAGES_TO_DL/NB_IMAGES_PER_ROW)*10))
plt.figure(1)
for i, pred in enumerate(predictions_new_model):
    #formatted_prediction = np.where(pred > 0.5, PEOPLE_PRED_VALUE, 1-PEOPLE_PRED_VALUE)
    formatted_prediction = pred
    formatted_prediction = formatted_prediction.reshape(formatted_prediction.shape[0], formatted_prediction.shape[1])
    img = image.load_img(get_file_path(df_final.iloc[i]["FileName"]), target_size=IMAGE_SIZE)
    colored_image = get_img_colored(img, formatted_prediction)
    axes[i//NB_IMAGES_PER_ROW, i%NB_IMAGES_PER_ROW].imshow(colored_image)

We can see here what parts of the picture are detected as people.

## Visualization

Now we will try to improve the visualizations of the results.

In [None]:
import matplotlib.pyplot as plt
import skimage.transform as st
from math import ceil

IMG_IND = 10
THRESHOLD = 0.5
PROP_COLOR = 0.5

In [None]:
test_pred = predictions_new_model[IMG_IND]
test_pred = test_pred.reshape(test_pred.shape[0], test_pred.shape[1])
test_img = image.load_img(get_file_path(df_final.iloc[IMG_IND]["FileName"]), target_size=IMAGE_SIZE)
print(test_pred.shape)
test_pred

In [None]:
def threshold_pred(predictions):
    if PEOPLE_PRED_VALUE == 1:
        predictions = 1-predictions
    return np.where(predictions > THRESHOLD, 1, 0)

thresholded_test_predictions = threshold_pred(test_pred)
thresholded_test_predictions

In [None]:
def get_img_visualization(img, predictions):
    img_array = np.asarray(img).copy()
    width_img, height_img = img.size
    height_pred, width_pred = predictions.shape
    i_step = height_img/height_pred
    j_step = width_img/width_pred
    for i in range(predictions.shape[0]):
        for j in range(predictions.shape[1]):
            if predictions[i, j] == 1:
                i_0 = int(i*i_step)
                i_1 = int((i+1)*i_step)
                j_0 = int(j*j_step)
                j_1 = int((j+1)*j_step)
                green_array = np.array([[[0, 255, 0] for k in range(j_1-j_0)] for l in range(i_1-i_0)])
                img_array[i_0:i_1, j_0:j_1] = (1-PROP_COLOR) * img_array[i_0:i_1, j_0:j_1] + PROP_COLOR * green_array
    return img_array.astype('uint8')

img_pred = get_img_visualization(test_img, thresholded_test_predictions)
plt.imshow(img_pred)

In [None]:
def get_thresholded_visualization(img, predictions):
    reshaped_pred = predictions.reshape(predictions.shape[0], predictions.shape[1])
    thresholded_pred = threshold_pred(reshaped_pred)
    return get_img_visualization(img, thresholded_pred)

In [None]:
NB_IMAGES_PER_ROW = 4
fig, axes = plt.subplots(nrows=ceil(NB_IMAGES_TO_DL/NB_IMAGES_PER_ROW), ncols=NB_IMAGES_PER_ROW, figsize=(NB_IMAGES_PER_ROW*10,ceil(NB_IMAGES_TO_DL/NB_IMAGES_PER_ROW)*10))
plt.figure(1)
for i, pred in enumerate(predictions_new_model):
    img = image.load_img(get_file_path(df_final.iloc[i]["FileName"]), target_size=IMAGE_SIZE)
    visualization = get_thresholded_visualization(img, pred)
    axes[i//NB_IMAGES_PER_ROW, i%NB_IMAGES_PER_ROW].imshow(visualization)

For now, this model doesn't work everywhere. We still need to train a better model.

## People contour

Now we need to draw shapes where people are.

### Simple algorithm

The goal of this algorithm is simply to draw the contour of the positive predictions, without trying to draw boxes.

In [None]:
def get_img_contours_from_threshold(predictions, final_shape, thickness=3):
    height_img, width_img = final_shape
    contour_array = np.array([[[0, 0, 0, 0] for k in range(width_img)] for l in range(height_img)])
    height_pred, width_pred = predictions.shape
    i_step = height_img/height_pred
    j_step = width_img/width_pred
    for i in range(predictions.shape[0]):
        for j in range(predictions.shape[1]):
            if predictions[i, j] == 1:
                i_0 = int(i*i_step)
                i_1 = int((i+1)*i_step)
                j_0 = int(j*j_step)
                j_1 = int((j+1)*j_step)
                if i == 0 or predictions[i-1, j] == 0:
                    contour_array[i_0:i_0+thickness, j_0:j_1] = np.array([[[255, 0, 0, 255] for k in range(j_1-j_0)] for l in range(thickness)])
                if i == predictions.shape[0] - 1 or predictions[i+1, j] == 0:
                    contour_array[i_1-thickness:i_1, j_0:j_1] = np.array([[[255, 0, 0, 255] for k in range(j_1-j_0)] for l in range(thickness)])
                if j == 0 or predictions[i, j-1] == 0:
                    contour_array[i_0:i_1, j_0:j_0+thickness] = np.array([[[255, 0, 0, 255] for k in range(thickness)] for l in range(i_1-i_0)])
                if j == predictions.shape[1] - 1 or predictions[i, j+1] == 0:
                    contour_array[i_0:i_1, j_1-thickness:j_1] = np.array([[[255, 0, 0, 255] for k in range(thickness)] for l in range(i_1-i_0)])
    return contour_array.astype('uint8')

In [None]:
test_array = np.array([
    [1, 1, 1, 0, 0, 0], 
    [1, 1, 1, 0, 0, 1],
    [1, 1, 1, 0, 0, 0], 
    [0, 0, 0, 0, 1, 1],
    [0, 0, 0, 1, 1, 1]])
final_dim = (500, 600)
test_contours = get_img_contours_from_threshold(test_array, final_dim)
plt.imshow(test_contours)
test_array

In [None]:
test_contours = get_img_contours_from_threshold(thresholded_test_predictions, IMAGE_SIZE)
plt.imshow(img_pred)
plt.imshow(test_contours)
thresholded_test_predictions

In [None]:
def get_img_contour(predictions, image_shape):
    reshaped_pred = predictions.reshape(predictions.shape[0], predictions.shape[1])
    thresholded_pred = threshold_pred(reshaped_pred)
    return get_img_contours_from_threshold(thresholded_pred, image_shape)

In [None]:
NB_IMAGES_PER_ROW = 4
fig, axes = plt.subplots(nrows=ceil(NB_IMAGES_TO_DL/NB_IMAGES_PER_ROW), ncols=NB_IMAGES_PER_ROW, figsize=(NB_IMAGES_PER_ROW*10,ceil(NB_IMAGES_TO_DL/NB_IMAGES_PER_ROW)*10))
plt.figure(1)
for i, pred in enumerate(predictions_new_model):
    img = image.load_img(get_file_path(df_final.iloc[i]["FileName"]), target_size=IMAGE_SIZE)
    contours = get_img_contour(pred, IMAGE_SIZE)
    axes[i//NB_IMAGES_PER_ROW, i%NB_IMAGES_PER_ROW].imshow(img)
    axes[i//NB_IMAGES_PER_ROW, i%NB_IMAGES_PER_ROW].imshow(contours)

For now this isn't very good. We will have to try with a well trained model.

### With OpenCV

In [None]:
import cv2

In [None]:
def get_prediction_image(predictions):
    img_array = np.zeros(IMAGE_SIZE)
    height_img, width_img = IMAGE_SIZE
    height_pred, width_pred = predictions.shape
    i_step = height_img/height_pred
    j_step = width_img/width_pred
    for i in range(predictions.shape[0]):
        for j in range(predictions.shape[1]):
            i_0 = int(i*i_step)
            i_1 = int((i+1)*i_step)
            j_0 = int(j*j_step)
            j_1 = int((j+1)*j_step)
            img_array[i_0:i_1, j_0:j_1] = predictions[i, j]*np.ones((i_1-i_0, j_1-j_0))
    return img_array

test_pred = predictions_new_model[IMG_IND].reshape(test_pred.shape[0], test_pred.shape[1])
pred_image = get_prediction_image(test_pred)
print(pred_image)
plt.imshow(pred_image, cmap='gray')

In [None]:
th_test_pred = threshold_pred(test_pred)
th_pred_image = get_prediction_image(th_test_pred)
print(th_pred_image)
plt.imshow(th_pred_image, cmap='gray')

In [None]:
cv2.findContours(th_pred_image, 1, 2)