# Histogram Model (1D) for Image Objects
----
Sergei Papulin (papulin.study@yandex.ru)

## Contents

- Loading Dataset
    - Image and Annotation
    - Image Objects
- Defining Positional Elements
    - Basic Elements
    - Position Mask
    - High-Level Elements
- Defining Object Elements
    - Basic Elements
    - Object Mask
    - High-Level Elements
- Creating Histogram
- Querying
    - Set Operations
    - Logical Operations
- Image Retrieval
- References

#### Load packages

Install the following packages if needed:
`pip install Cython pycocotools scikit-image`

In [None]:
import numpy as np
from pycocotools.coco import COCO

import matplotlib.pyplot as plt
from matplotlib.collections import PatchCollection
from matplotlib.patches import Polygon, Rectangle

import skimage.io as io
import skimage.draw as draw

%matplotlib inline

In [None]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.insert(0, "../../")

from lshist.histogram import operations, Histogram1D, HElement
from lshist.executor import Parser, Evaluator
from lshist.utils import E

## Loading Dataset

Download the dataset from the COCO [website](http://cocodataset.org):
- [images](http://images.cocodataset.org/zips/val2017.zip)
- [annotations](http://images.cocodataset.org/annotations/annotations_trainval2017.zip)

### Image and Annotation

In [None]:
ANNOT_FILE_PATH = "datasets/annotations/instances_val2017.json"
IMAGE_PATH = "datasets/val2017"
IMAGE_ID = 404484

In [None]:
coco = COCO(ANNOT_FILE_PATH)

In [None]:
img_meta = coco.loadImgs(ids=[IMAGE_ID])[0]
img_meta

In [None]:
I = io.imread("{}/{}".format(IMAGE_PATH, img_meta["file_name"]))
plt.imshow(I)
plt.show()

### Image Objects

In [None]:
cats = coco.loadCats(coco.getCatIds())
len_cats = len(cats)
print("Single category:\n{}\n".format(cats[0]))
print("All categories:\n{}\n".format(" ".join([cat["name"] for cat in cats])))
print("Total number of categories: {}".format(len_cats))

In [None]:
img_anns_id = coco.getAnnIds(imgIds=IMAGE_ID, iscrowd=None)
img_anns = coco.loadAnns(img_anns_id)
print(img_anns[0])

In [None]:
seg_polys = list()
poly_colors = list()

for seg in img_anns:
    seg_ = seg["segmentation"][0]
    poly_colors.append((np.random.random((1, 3))*0.5+0.5).tolist()[0])
    seg_polys.append(Polygon(np.array(seg_).reshape((int(len(seg_)/2), 2)), fill=False))

In [None]:
fig, ax = plt.subplots(1) #, figsize=(15,15))

ax.imshow(I)
p_objs = PatchCollection(seg_polys, facecolor=poly_colors, edgecolor=poly_colors, alpha=0.6, linewidths=2)
ax.add_collection(p_objs)
plt.show()

## Defining Positional Elements

### Basic Elements

In [None]:
def generate_positional_grid_1d(num_x, num_y):
    elements = list()
    for i in range(num_y):
        for j in range(num_x):
            element = dict()
            element["id"] = "e{}".format(i*num_x + j + 1)
            element["pos"] = (j*1/num_x, i*1/num_y, 1/num_x, 1/num_y)
            elements.append(element)
    return elements


def get_positional_grid_1d(width, height, elements):
    elements_abs = list()
    for el in elements:
        x_start = el["pos"][0] * width
        y_start = el["pos"][1] * height
        x_end = x_start + el["pos"][2] * width
        y_end = y_start + el["pos"][3] * height
        elements_abs.append({"id": el["id"], "pos": (x_start, y_start, x_end, y_end)})
    return elements_abs

In [None]:
GRID_X_SPLITS = 5
GRID_Y_SPLITS = 5

grid = generate_positional_grid_1d(GRID_X_SPLITS, GRID_Y_SPLITS)
grid[:2]

In [None]:
position_elements = get_positional_grid_1d(img_meta["width"], img_meta["height"], grid)
position_elements[:5]

In [None]:
position_converter = {el["id"]: el["pos"] for el in position_elements}

In [None]:
Up = {el["id"] for el in position_elements}

Show the positional element along with the initial image:

In [None]:
def show_positional_grid(I, elements, position_converter):
    pos_el_rects = list()
    pos_el_texts = list()
    fig, ax = plt.subplots(1) #, figsize=(15,15))
    ax.imshow(I)
    for el in elements:
        pos = position_converter[el]
        left, width = pos[0], pos[2]-pos[0]
        bottom, height = pos[1], pos[3]-pos[1]
        right = left + width
        top = bottom + height
        ax.add_patch(Rectangle(xy=(left, bottom), width=width, height=height, fill=False, 
                                      label=el, edgecolor="red", linewidth=2))
        ax.text(0.5*(left+right), 0.5*(bottom+top), el, 
                horizontalalignment="center", verticalalignment="center", fontsize=15, color="red")
    plt.show()

In [None]:
show_positional_grid(I, Up, position_converter)

### Position Mask

In [None]:
def create_position_mask(width, height, position_elements):
    pos_mask = np.zeros((height, width), dtype=np.object) #dtype=np.int)
    # pos_mask = np.chararray((img_meta["height"], img_meta["width"]))
    for pos in position_elements:
        start = [int(pos["pos"][1]), int(pos["pos"][0])]
        end = [int(pos["pos"][3]), int(pos["pos"][2])]
        r, c = draw.rectangle(start, end=end, shape=pos_mask.shape)
        r.dtype = c.dtype = np.int
        pos_mask[r, c] = pos["id"] # int(pos["id"].strip("e"))
    return pos_mask

In [None]:
pos_mask = create_position_mask(img_meta["width"], img_meta["height"], position_elements)
pos_mask

In [None]:
convpos2int = np.vectorize(lambda x: int(x.strip("e")))
plt.imshow(convpos2int(pos_mask))
plt.show()

### High-Level Elements

In [None]:
parser = Parser()

In [None]:
Ep_center = E("e7+e8+e9+e12+e13+e14+e17+e18+e19")
Ep_center_set = parser.parse_set(Ep_center.value)
Ep_center_set

In [None]:
# Definition of high-level positional elements

Ep_top = E("e1+e2+e3+e4+e5+e6+e7+e8+e9+e10")
Ep_bottom = E("e16+e17+e18+e19+e20+e21+e22+e23+e24+e25")
Ep_left = E("e1+e2+e6+e7+e11+e12+e16+e17+e21+e22")
Ep_right = E("e4+e5+e9+e10+e14+e15+e19+e20+e24+e25")
Ep_center = E("e7+e8+e9+e12+e13+e14+e17+e18+e19")

Eps = [("top", Ep_top), ("bottom", Ep_bottom), ("left", Ep_left), ("right", Ep_right), ("center", Ep_center)]


# Sets of high-level positional elements (they will be used for the Evaluator below)

Eps_set = { name: parser.parse_set(Ep.value) for name, Ep in Eps}
Eps_set["center"]

Show a grid of the high-level element along with the initial image:

In [None]:
show_positional_grid(I, Eps_set["center"], position_converter)

Show the high-level element based in the position mask:

In [None]:
def show_positional_elements(I, pos_mask, elements):
    mask = np.full((I.shape[0], I.shape[1], 3), fill_value=0, dtype=np.int)
    
    for x in range(I.shape[1]):
        for y in range(I.shape[0]):
            if pos_mask[y,x] in elements:
                mask[y,x] = I[y,x]
    
    fig, ax = plt.subplots(1) #, figsize=(15,15))
    ax.imshow(I)
    ax.imshow(mask, alpha=0.5)
    plt.show()

In [None]:
show_positional_elements(I, pos_mask, Eps_set["center"])

## Defining Object Elements

### Basic Elements

In [None]:
cats[:2]

In [None]:
Uo = {str(cat["id"]) for cat in cats}

### Object Mask for Image

In [None]:
def create_object_mask(width, height, img_anns):
    obj_mask = np.full((height, width), fill_value="null", dtype=np.object) # fill_value=-1, dtype=np.int)
    for i in range(len(img_anns)):
        if img_anns[i]["iscrowd"] == 0:
            seg_ = img_anns[i]["segmentation"][0]
            poly_ = np.array(seg_).reshape((int(len(seg_)/2), 2))
            r, c = draw.polygon(poly_[:,1], poly_[:,0])
            obj_mask[r, c] = str(img_anns[i]["category_id"])
    return obj_mask

In [None]:
obj_mask = create_object_mask(img_meta["width"], img_meta["height"], img_anns)
obj_mask

In [None]:
convobj2int = np.vectorize(lambda x: int(x.strip("null") if x.strip("null") else 0))
plt.imshow(convobj2int(obj_mask))
plt.show()

### High-Level Elements

In [None]:
catid_by_name = {cat["name"]: cat["id"] for cat in cats}
catid_by_name["person"]

In [None]:
catname_by_id = {cat["id"]: cat["name"] for cat in cats}
catname_by_id[1]

In [None]:
Eo_person = E(str(catid_by_name["person"]))
Eo_person_set = parser.parse_set(Eo_person.value)
Eo_person_set

In [None]:
Eos_set = {cat["name"]: parser.parse_set(E(str(cat["id"])).value) for cat in cats}
Eos_set["person"]

In [None]:
Eo_pet = E("{}+{}".format(catid_by_name["dog"], catid_by_name["cat"])) 
Eo_pet_set = parser.parse_set(Eo_pet.value)
Eo_pet_set

In [None]:
Eos_set.update({"pet": Eo_pet_set})
Eos_set["pet"]

Show a high-level element along with the initial image:

In [None]:
def show_object_segment(I, elements, image_objects):
    seg_polys = list()
    poly_colors = list()
    for seg in image_objects:
        if str(seg["category_id"]) in elements:
            seg_ = seg["segmentation"][0]
            poly_colors.append((np.random.random((1, 3))*0.5+0.5).tolist()[0])
            seg_polys.append(Polygon(np.array(seg_).reshape((int(len(seg_)/2), 2)), fill=False))
    fig, ax = plt.subplots(1) #, figsize=(15,15))
    ax.imshow(I)
    p_objs = PatchCollection(seg_polys, facecolor=poly_colors, edgecolor=poly_colors, alpha=0.6, linewidths=2)
    ax.add_collection(p_objs)
    plt.show()

In [None]:
img_anns_id = coco.getAnnIds(imgIds=IMAGE_ID, iscrowd=None)
img_anns = coco.loadAnns(img_anns_id)

In [None]:
show_object_segment(I, Eos_set["pet"], img_anns)

Show the high-level element based in the object mask:

In [None]:
def show_object_elements(I, obj_mask, elements):
    mask = np.full((I.shape[0], I.shape[1], 3), fill_value=0, dtype=np.int)
    colors = {el: np.random.randint(0, 255, 3) for el in elements}
    for x in range(I.shape[1]):
        for y in range(I.shape[0]):
            if obj_mask[y,x] in elements:
                mask[y,x] = colors[obj_mask[y,x]] #(0,255,156) #colors[obj_mask[y,x]]
    
    fig, ax = plt.subplots(1) #, figsize=(15,15))
    ax.imshow(I)
    ax.imshow(mask, alpha=0.5)
    plt.show()

In [None]:
show_object_elements(I, obj_mask, Eos_set["pet"])

## Creating Histogram

In [None]:
def create_histogram(width, height, pos_mask, obj_mask):
    hist = Histogram1D(data=None)
    for x in range(width):
        for y in range(height):
            if obj_mask[y, x] != "null": # if obj_mask[y, x] > 0:
                el_id = (pos_mask[y, x], obj_mask[y, x])
                if el_id not in hist:
                    hist[el_id] = HElement(el_id, 0)
                hist[el_id].value += 1
    hist.normalize(width * height)
    return hist

In [None]:
hist = create_histogram(img_meta["width"], img_meta["height"], pos_mask, obj_mask)
hist.to_dict()

## Querying

In [None]:
high_level_elements = {
    0: Eps_set, # positions
    1: Eos_set  # objects
}

In [None]:
evaluator = Evaluator(operations, hist, high_level_elements=high_level_elements)

In [None]:
POS1 = "center"
OBJ1 = "person"

POS2 = "left"
OBJ2 = "dog"

In [None]:
E1 = E(POS1, OBJ1)
E2 = E(POS2, OBJ2)

In [None]:
E1_expr = parser.parse_string(E1.value)
HE1 = evaluator.eval(E1_expr)
print("Expression for E1:\n{}".format(E1.value))
print("\nThe parsed expressino for E1 in the postfix notation:\n{}".format(E1_expr))
print("\nHistogram of E1 given the image:\n{}".format(HE1.to_dict()))
print("\nValue of presence for E1:\n{}".format(HE1.sum()))

In [None]:
def show_elements(I, pos_mask, obj_mask, pos_elements, obj_elements, title=None):
    mask = np.full((I.shape[0], I.shape[1], 3), fill_value=0, dtype=np.int)
    colors = {el: np.random.randint(0, 255, 3) for el in obj_elements}
    for x in range(I.shape[1]):
        for y in range(I.shape[0]):
            if pos_mask[y,x] in pos_elements:
                mask[y,x] = I[y,x]
                if obj_mask[y,x] in obj_elements:
                    mask[y,x] = colors[obj_mask[y,x]]
    
    fig, ax = plt.subplots(1)
    if title:
        fig.suptitle(title)
    ax.imshow(I)
    ax.imshow(mask, alpha=0.5)
    plt.show()

    
def show_elements_by_HE(I, pos_mask, obj_mask, HE, title=None):
    mask = np.full((I.shape[0], I.shape[1], 3), fill_value=0, dtype=np.int)
    elements = HE.to_dict().keys()
    pos_elements = {el[0] for el in elements}
    obj_elements = {el[1] for el in elements}
    colors = {el: np.random.randint(0, 255, 3) for el in obj_elements}
        
    for x in range(I.shape[1]):
        for y in range(I.shape[0]):
            if pos_mask[y,x] in pos_elements:
                mask[y,x] = I[y,x]
            if (pos_mask[y,x], obj_mask[y,x]) in elements:
                mask[y,x] = colors[obj_mask[y,x]]
    
    fig, ax = plt.subplots(1)
    if title:
        fig.suptitle(title)
    ax.imshow(I)
    ax.imshow(mask, alpha=0.5)
    plt.show()

In [None]:
show_elements(I, pos_mask, obj_mask, Eps_set[POS1], Eos_set[OBJ1], title="E1")

In [None]:
show_elements_by_HE(I, pos_mask, obj_mask, HE1, title="E1")

In [None]:
E2_expr = parser.parse_string(E2.value)
HE2 = evaluator.eval(E2_expr)
print("Expression for E2:\n{}".format(E2.value))
print("\nThe parsed expressino for E2 in the postfix notation:\n{}".format(E2_expr))
print("\nHistogram of E2 given the image:\n{}".format(HE2.to_dict()))
print("\nValue of presence for E2:\n{}".format(HE2.sum()))

In [None]:
show_elements(I, pos_mask, obj_mask, Eps_set[POS2], Eos_set[OBJ2], title="E2")

In [None]:
show_elements_by_HE(I, pos_mask, obj_mask, HE2, title="E2")

### Set Operations

#### UNION

In [None]:
E_union = E1 + E2
E_union_expr = parser.parse_string(E_union.value)
HE_union = evaluator.eval(E_union_expr)

print("Expression for E_union:\n{}".format(E_union))
print("\nThe parsed expression for E_union in the postfix notation:\n{}".format(E_union_expr))
print("\nHistogram of E_union given the image:\n{}".format(HE_union.to_dict()))
print("\nValue of presence for E_union:\n{}".format(HE_union.sum()))

In [None]:
show_elements_by_HE(I, pos_mask, obj_mask, HE_union, title="E_union")

#### INTERSECTION

In [None]:
E_intersect = E1 * E2  # or E1.Intersection(E2)
E_intersect_expr = parser.parse_string(E_intersect.value)
HE_intersect = evaluator.eval(E_intersect_expr)

print("Expression for E_intersect:\n{}".format(E_intersect))
print("\nThe parsed expression for E_intersect in the postfix notation:\n{}".format(E_intersect_expr))
print("\nHistogram of E_intersect given the image:\n{}".format(HE_intersect.to_dict()))
print("\nValue of presence for E_intersect:\n{}".format(HE_intersect.sum()))

In [None]:
show_elements_by_HE(I, pos_mask, obj_mask, HE_intersect, title="E_intersect")

#### SUBSTRACTION or EXCEPTION

In [None]:
E_sub = E1 - E2  # or E1.Sub(E2)
E_sub_expr = parser.parse_string(E_sub.value)
HE_sub = evaluator.eval(E_sub_expr)

print("Expression for E_sub:\n{}".format(E_sub))
print("\nThe parsed expression for E_sub in the postfix notation:\n{}".format(E_sub_expr))
print("\nHistogram of E_sub given the image:\n{}".format(HE_sub.to_dict()))
print("\nValue of presence for E_sub:\n{}".format(HE_sub.sum()))

In [None]:
show_elements_by_HE(I, pos_mask, obj_mask, HE_sub, title="E_sub")

### Logical Operations

#### AND

In [None]:
E_and = E1 & E2  # or E1.And(E2)
E_and_expr = parser.parse_string(E_and.value)
HE_and = evaluator.eval(E_and_expr)

print("Expression for E_and:\n{}".format(E_and))
print("\nThe parsed expression for E_and in the postfix notation:\n{}".format(E_and_expr))
print("\nHistogram of E_and given the image:\n{}".format(HE_and.to_dict()))
print("\nValue of presence for E_and:\n{}".format(HE_and.sum()))

In [None]:
show_elements_by_HE(I, pos_mask, obj_mask, HE_and, title="E_and")

#### OR

In [None]:
E_or = E1 | E2  # or E1.Or(E2)
E_or_expr = parser.parse_string(E_or.value)
HE_or = evaluator.eval(E_or_expr)

print("Expression for E_or:\n{}".format(E_or))
print("\nThe parsed expression for E_or in the postfix notation:\n{}".format(E_or_expr))
print("\nHistogram of E_or given the image:\n{}".format(HE_or.to_dict()))
print("\nValue of presence for E_or:\n{}".format(HE_or.sum()))

In [None]:
show_elements_by_HE(I, pos_mask, obj_mask, HE_or, title="E_or")

#### XOR

In [None]:
E_xor = E1 ^ E2  # or E1.Xor(E2)
E_xor_expr = parser.parse_string(E_xor.value)
HE_xor = evaluator.eval(E_xor_expr)

print("Expression for E_xor:\n{}".format(E_xor))
print("\nThe parsed expression for E_xor in the postfix notation:\n{}".format(E_xor_expr))
print("\nHistogram of E_xor given the image:\n{}".format(HE_xor.to_dict()))
print("\nValue of presence for E_xor:\n{}".format(HE_xor.sum()))

In [None]:
show_elements_by_HE(I, pos_mask, obj_mask, HE_xor, title="E_xor")

#### XSUBSTRACTION

In [None]:
# TODO

## Image Retrieval

Serialize the historgram objects:

In [None]:
# import time

# LIMIT = 5000

# start_tick = time.time()
# hists = list()

# for indx, (img_id, img_meta) in enumerate(coco.imgs.items()):
#     if indx == LIMIT:
#         break
#     img_anns = coco.imgToAnns[img_id]
#     pos_mask = create_position_mask(img_meta["width"], img_meta["height"], position_elements)
#     obj_mask = create_object_mask(img_meta["width"], img_meta["height"], img_anns)
#     hist = create_histogram(img_meta["width"], img_meta["height"], pos_mask, obj_mask)
#     hists.append((img_id, hist))
#     if indx % 100 == 0:
#         print("Current image index: {}".format(indx))

# delta_tick = time.time() - start_tick
# print("Total time: {}s".format(delta_tick))
# print("Time per image: {}s".format(delta_tick / LIMIT))

# with open("imagehist.pickle", "wb") as f:
#     import pickle
#     pickle.dump(hists, f, pickle.HIGHEST_PROTOCOL)

Deserialize the histogram of images:

In [None]:
with open("imagehist.pickle", "rb") as f:
    import pickle
    hists = pickle.load(f)

Define your query:

In [None]:
query = E("left", "dog") & E("center", "person")

Retrieve images using the query:

In [None]:
def retrieve(query, hists, topN=10, lastN=None, threshold=0.001):
    expr = parser.parse_string(query.value)
    HEs = [(img_id, evaluator.eval(expr, hist)) for img_id, hist in hists] 
    img_rank = sorted([(img_id, HE.sum()) for img_id, HE in HEs if HE.sum() > threshold], key=lambda x: -x[1])
    if isinstance(lastN, int):
        return img_rank[:topN], img_rank[-lastN:]
    return img_rank[:topN]


IMAGE_LIMIT = 11
IMAGE_CLMNS = 5


def show_retrieved_images(img_rank, img_paths, limit=None):
    
    img_limit = len(img_rank) if IMAGE_LIMIT > len(img_rank) else IMAGE_LIMIT
    if limit:
        img_limit = limit
    row_num = -(-img_limit // IMAGE_CLMNS)

    fig, axs = plt.subplots(row_num, IMAGE_CLMNS, figsize=(15, 4*row_num), squeeze=False)

    for i in range(row_num):
        for j in range(IMAGE_CLMNS):
            indx = i*IMAGE_CLMNS + j
            if indx >= img_limit:
                fig.delaxes(axs[i,j])
            else:
                I = io.imread(img_paths[indx])
                axs[i,j].imshow(I)
                axs[i,j].set_title("rank={}\nid={}\nscore={:0.4f}".format(indx+1, 
                                                                          img_rank[indx][0], 
                                                                          img_rank[indx][1]))
    plt.tight_layout()
    plt.show()

In [None]:
TOP_N = 20

In [None]:
img_rank = retrieve(query, hists, topN=TOP_N)
img_rank

Show the retrieved images:

In [None]:
img_paths = ["{}/{}".format(IMAGE_PATH, coco.imgs[img_meta_[0]]["file_name"]) for img_meta_ in img_rank]
img_paths[:1]

In [None]:
show_retrieved_images(img_rank, img_paths, limit=TOP_N)

## References

- [COCO (Dataset): Common Objects in Context](http://cocodataset.org)