In [None]:
import IPython

In [1]:
!pip install h5py

Collecting h5py
  Downloading h5py-3.9.0-cp39-cp39-win_amd64.whl (2.7 MB)
     ---------------------------------------- 2.7/2.7 MB 5.7 MB/s eta 0:00:00
Installing collected packages: h5py
Successfully installed h5py-3.9.0


In [None]:
!pip uninstall h5py

In [8]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.font_manager
from mpl_toolkits.mplot3d import Axes3D
from sklearn import svm
from matplotlib import cm
from IPython.display import Image
from tqdm import tqdm
import pandas as pd

from tensorflow.keras.layers import ( MaxPool2D, Conv2D, Input,
                                     BatchNormalization, Dense, Activation,
                                     Flatten, Lambda, Add, Concatenate)
from tensorflow.keras import (Model, Sequential, utils, losses, metrics,
                              optimizers, backend as K, callbacks,
                              applications)

import h5py
import os
import seaborn as sns
import tensorflow as tf

In [6]:
def read_image(path):
    img = utils.read_image(path,
                         grayscale=True,
                         color_mode='rgb',
                         target_size=(224, 224),
                         interpolation='nearest',
                         keep_aspect_ratio=True
                         )
    img = utils.img_to_array(img, data_format='channels_last', dtype=tf.float64)
    return img

In [3]:
def func_square_DEP0(x, y, x0, y0, width, height):
    """
    Output:
        Map of integers with values which are either outside or inside the square.
      """
    on_in_horizontal = np.greater_equal(x, x0) & np.less_equal(x, x0 + width)
    on_in_vertical = np.greater_equal(y, y0 - height) & np.less_equal(y, y0)
    return np.logical_and(on_in_horizontal, on_in_vertical)

def func_square_DEP1(x, y, x0, y0, width, height):
    """
    Output:
        Map of integers with values which are either outside or inside the square.
    """
    return np.abs(x - x0) + np.abs(y - y0)  <= 1

def func_square(point, x0, y0, width, height):
    """
    Output:
        Map of integers with values which are either outside or inside the square.
  """
    shape = point.shape
    if shape[0] == 2:
        x, y = point
    else:
        x, y = point.transpose(1, 0)

    if width == height:
        c = width
    else:
        c = width
    return np.less_equal(np.maximum(np.abs(x - x0), np.abs(y - y0)), c)

In [None]:
point = np.random.rand(2, 100) * 5
x, y = point

In [None]:
c = [-2.5, -2.5] # out
t = func_square(point, c[0], c[1], 1, 1)
t

In [7]:
def make_circles(xy0=None, r=1, n_circles=10, x_bound=10, y_bound=10, plus=0.5, img_size=128):
    xxyy = make_mesh(-x_bound, y_bound, img_size)
    #print(f'...Mesh shape: {xxyy.shape}...')
    stacks = []
    if xy0 is None:
        low = x_bound - (plus + r / 2)
        high = y_bound - (plus + r / 2)
        xy0 = np.random.uniform(low=-low, high=high, size=(n_circles, 2))
    for x0, y0 in tqdm(xy0):
        func = lambda x: ((x[..., 0] - x0) ** 2 + (x[..., 1] - y0) ** 2) <= r**2
        stacks.append(np.apply_along_axis(func, 2, xxyy))
    return np.clip(np.stack(stacks, axis=-1).sum(axis=-1), 0, 1).astype(np.uint8)

def make_mesh(x_b, y_b, size=128):
    xx, yy = np.meshgrid(np.linspace(-x_b, x_b, size), np.linspace(-y_b, y_b, size))
    return np.concatenate([np.expand_dims(xx, axis=-1), np.expand_dims(yy, axis=-1)], axis=2)

In [None]:
circle_map = make_circles()

In [None]:
circle_map.shape, circle_map.sum()

In [None]:
plt.imshow(circle_map)

In [None]:
def make_data(n_examples=20, min_n_circles=1, max_n_circles=10, size=128):
    y = np.random.randint(min_n_circles, max_n_circles + 1, (n_examples, 1))

    imgs = [make_circles(n_circles=n.item(), img_size=size) for n in tqdm(y)]
    imgs = np.stack(imgs, axis=-1)
    imgs = imgs.transpose(2, 0, 1)
    imgs = np.expand_dims(imgs, axis=-1)
    return imgs, y.reshape(-1, 1)

In [None]:
images, target = make_data(n_examples=1000)
images.shape, target.shape

In [None]:
target = target.reshape(-1, 1)

In [None]:
display_images(test_images_extra, test_extra_target, n_examples=5)

We see that many circles overlap each other, creating a cluster and losing the
circle like shape. This might lead the model to not properly detect individual
circles. This model downsize could be expected if training set only contains overall perfectly separated circles.

However, we can improve the dataset maker function to avoid circles from overlapping. Another additional improvement would be using grayscale images, in other words, mostly black background and gray circles with variation of magnitude to allow the model to learn more complex features and end up generalizing in more real-world examples like images taken from cameras.

In [None]:
xy0 = np.random.uniform(low=-10, high=10, size=(10, 2))
xy0_test = xy0[7]
xy0_test = np.array([0.1, 0.2])
func = lambda x: ((x[..., 0] - xy0_test[0]) ** 2 + (x[..., 1] - xy0_test[1]) ** 2) <= 1**2
any([func(x) for x in xy0])

In [None]:
def make_circles(xy0=None, r0=None, min_r=0.5, max_r=1.0, n_circles=10, x_bound=10, y_bound=10, extra_bound_separation=2, img_size=128, extra_circle_separation=0.1, max_gray=0.2):
    xxyy = make_mesh(-x_bound, y_bound, img_size)
    #print(f'...Mesh shape: {xxyy.shape}...')
    stacks = []
    new_center_tries = 10
    _on_random_r = False
    if r0 is None:
        _on_random_r = True

    if xy0 is None:
        max_radius = 1
        low = x_bound - (extra_bound_separation + max_radius / 2)
        high = y_bound - (extra_bound_separation + max_radius / 2)
        centers = np.random.uniform(low=-low, high=high, size=(n_circles, 2))
    func = lambda x: ((x[..., 0] - x0) ** 2 + (x[..., 1] - y0) ** 2) <= r**2

    for idx, (x0, y0) in enumerate(centers):
        if _on_random_r:
            r0 = np.random.rand(1).item()
            r0 = change_interval(r0, [0, 1], [min_r, max_r])
        r = 2 * r0 + extra_circle_separation

    c = 0
    while any([func(x) for i, x in enumerate(centers) if i != idx]):
        if c >= new_center_tries:
            break
        #print('\n...Center inside other circle. Assigning new center...')
        center_aux = np.random.uniform(low=-low, high=high, size=(2, ))
        x0, y0 = center_aux
        centers[idx] = center_aux
        if _on_random_r:
            r0 = np.random.rand(1).item()
            r0 = change_interval(r0, [0, 1], [min_r, max_r])
        c += 1
        r = r0
        stacks.append(np.apply_along_axis(func, 2, xxyy))
    stacks = np.stack(stacks, axis=-1).sum(axis=-1)
    stacks = np.clip(stacks, 0, 1).astype(np.uint8)
    return max_gray * stacks * np.random.rand(*stacks.shape)


def make_mesh(x_b, y_b, size=128):
    xx, yy = np.meshgrid(np.linspace(-x_b, x_b, size), np.linspace(-y_b, y_b, size))
    return np.concatenate([np.expand_dims(xx, axis=-1), np.expand_dims(yy, axis=-1)], axis=2)

def change_interval(x, ab=None, cd=[0, 1]):
    if ab is not None:
        a, b = ab
        c, d = cd
        xp = c + (x - a) * (d - c) / (b - a)
        return xp
    else:
        return x

def make_data(n_examples=20, min_n_circles=1, max_n_circles=10, size=128, on_random_n_circles=False):
    if on_random_n_circles:
        y = np.random.randint(min_n_circles, max_n_circles + 1, (n_examples, 1))
    else:
        inter = (max_n_circles - min_n_circles) + 1
        y = [x1 for x1 in range(min_n_circles, max_n_circles + 1) for _ in range(n_examples // inter)]
        y = np.array(y).reshape(-1, 1)

    imgs = [make_circles(n_circles=n.item(), img_size=size) for n in tqdm(y)]
    imgs = np.stack(imgs, axis=-1)
    imgs = imgs.transpose(2, 0, 1)
    imgs = np.expand_dims(imgs, axis=-1)
    return imgs, y.reshape(-1, 1)

In [None]:
circle = make_circles(n_circles=10, r0=None, img_size=128)

In [None]:
plt.imshow(circle, cmap='Greys_r')

#### Make new dataset

In [9]:
max_n_circles = 20
size_w, size_h = 224, 224
images_gray, target_gray = make_data(n_examples=1000, max_n_circles=20, size=224)
images_gray.shape, target_gray.shape

In [None]:
os.listdir()

Save the files into h5 format

In [None]:
f"circle_images_{max_n_circles}_{size_w}_{size_h}.hdf5"
with h5py.File(f"./{file_name}", "w") as f:
    f.create_dataset("images", data=images_gray)
    f.create_dataset("target", data=target_gray)

Check the savings

In [None]:
os.listdir('drive/MyDrive/Data and datasets')

Read the h5 files previously saved

In [None]:
file_name = f"circle_images_{max_n_circles}_{size_w}_{size_h}.hdf5"
with h5py.File(f"./{file_name}", "r") as f:
    images_gray = f.get('images')[:]
    target_gray = f.get('target')[:]
print(images_gray.shape)
print(target_gray.shape)

Distribution of number of circles in gray images

In [None]:
u, c = np.unique(target_gray, return_counts=True)

Let's check the distribution of circles with a plot

In [None]:
plt.bar(x=u, height=c)

Equally distributed