In [1]:
%load_ext autoreload
%autoreload 2

# Step 1: Rectangular Box Extraction
Applying classical image processing methods such as Canny filter, dilation and erotion and Houghes Transform to extract the main rectangular shapes from the image

In [2]:
from image_processing.BoxDetector import BroadBoxDetector
from image_processing.BoxDetectorUtils import BoxDetectorUtils
import matplotlib.pyplot as plt
from tqdm.notebook import trange, tqdm
import numpy as np
import pandas as pd
import os
root = "../../assets/imgs"
individual_imgs_results_dir =  "../../results/individual_spots"

## Feature Extraction
We us the top layer, without the classsification head, of a vgg model to map the images to a feature space. 

In [None]:
import torch
model = torch.hub.load('pytorch/vision:v0.10.0', 'vgg11', pretrained=True)

In [47]:
from PIL import Image
from torchvision import transforms

def extract_features(image_path):
    """ Given the image path, apply preprocessing and compute the feature vector"""
    input_image = Image.open(image_path)
    preprocess = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])
    input_tensor = preprocess(input_image)
    input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

    # move the input and model to GPU for speed if available
    if torch.cuda.is_available():
        input_batch = input_batch.to('cuda')
        model.to('cuda')

    with torch.no_grad():
        output = model(input_batch)
    return output[0].cpu().detach().numpy()

In [50]:
images = os.listdir(root) # todas la imagenes padre, el que tiene a las personas
# Con esto vamos a fittear el NN
results_df = pd.DataFrame(columns=["src_image","dst_image", "box", "feature_vector"])

for src_image in tqdm(images):
    image_path = f"{root}/{src_image}"
    pixels = plt.imread(image_path)
    BBD = BroadBoxDetector()
    bounding_boxes = BBD.detect_boxes(pixels, find_zocalos=False)

    for counter, bounding_box in enumerate(bounding_boxes):
        individual_spot = BoxDetectorUtils.crop_frame(pixels, bounding_box) # cajita individual
        individual_name = f"{src_image.split('.')[0]}_{counter}.jpg"

        individual_box_path = f"{individual_imgs_results_dir}/{individual_name}"
        plt.imsave(individual_box_path, individual_spot)

        data = {
            "src_image":src_image,
            "dst_image":individual_name,
            "box": bounding_box,
            "feature_vector": extract_features(individual_box_path)
        }
        results_df = results_df.append(data, ignore_index=True)

  0%|          | 0/326 [00:00<?, ?it/s]

In [53]:
results_df.to_pickle("../../results/results.pck")

In [3]:
results_df = pd.read_pickle("../../results/results.pck")

In [4]:
results_df = results_df[results_df['src_image']!='betsson.jpeg']

In [5]:
src_image_column = results_df['src_image'].map(lambda name: int(name.split(".")[0]))
src_image_column
results_df['src_image_number'] = src_image_column

In [6]:
results_df

Unnamed: 0,src_image,dst_image,box,feature_vector,src_image_number
0,0.jpg,0_0.jpg,"(356, 327, 151, 104)","[-1.849893, -2.1841455, -3.7927911, -2.5572228...",0
1,0.jpg,0_1.jpg,"(202, 326, 151, 105)","[-2.4421442, -3.314545, -3.2191696, -2.8591805...",0
2,1.jpg,1_0.jpg,"(364, 331, 152, 103)","[-0.08064067, 0.17646468, -3.6226554, -1.66871...",1
3,1.jpg,1_1.jpg,"(193, 332, 152, 102)","[-1.0705454, -3.0018249, 0.36436686, 0.1047717...",1
4,100.jpg,100_0.jpg,"(347, 301, 160, 108)","[-1.7891351, -1.2626743, -0.75569665, 0.215172...",100
...,...,...,...,...,...
395,96.jpg,96_0.jpg,"(187, 302, 160, 107)","[-0.9566692, -1.2426225, -3.8722253, -4.516473...",96
396,96.jpg,96_1.jpg,"(363, 301, 160, 108)","[-2.3526785, -0.8273309, -3.5193415, -2.552870...",96
397,98.jpg,98_0.jpg,"(288, 284, 134, 91)","[-3.2030826, -2.696166, -3.9320142, -4.3927507...",98
398,99.jpg,99_0.jpg,"(363, 301, 160, 108)","[-2.3857367, -1.2225716, -2.7028058, -1.609530...",99


# Clustering y clasificación

Una vez obtenidos los feature vectors podemos correr un algoritmo de clasificación que nos permita agruparlas por similitud coseno

In [7]:
from sklearn.neighbors import NearestNeighbors
from sklearn.utils import check_array
from scipy.spatial.distance import cosine

# classifier = NearestNeighbors(n_neighbors=5, algorithm='auto', metric=lambda x, y: 1-cosine(x,y))
classifier = NearestNeighbors(n_neighbors=10)
X = results_df["feature_vector"].to_numpy()
X = np.array([item for item in X])

In [8]:
classifier.fit(X)

NearestNeighbors(n_neighbors=10)

## Generamos las predicciones y guardamos las publicidades similares en carpetas individuales

In [28]:
def filter_differents(distance, nbrs, threshold):
    new_nbrs = []
    for d, n in zip(distance, nbrs):
        if d<threshold:
            new_nbrs.append(n)
    return new_nbrs

In [30]:
classes = []
classes_img_names = []
images_already_classified = []
for counter, feature_vector in enumerate(X):
    if counter not in images_already_classified:
        distance, nbrs = classifier.kneighbors([feature_vector], return_distance=True)
        nbrs = filter_differents(distance[0], nbrs, 30)
        print(nbrs, distance)
        nbrs = np.asarray(nbrs[0])
        classes.append(nbrs)
        tmp = []
        for c in nbrs:
            images_already_classified.append(c)
            tmp.append(results_df.iloc[c,:].src_image)
        classes_img_names.append(tmp)

[array([  0,  60, 290, 342, 281,  27, 237, 228, 335, 193])] [[ 0.       33.047672 33.354595 33.8153   35.41275  35.543552 36.727005
  41.31494  43.03323  43.7792  ]]
[array([  1, 291, 282, 343, 229, 238, 336,  61,  28, 342])] [[1.9073486e-06 3.4562969e+01 3.5900887e+01 3.7670200e+01 3.9927525e+01
  3.9932362e+01 4.3019325e+01 4.5757637e+01 4.7089058e+01 5.2681690e+01]]
[array([  2, 222, 232, 344,  26, 283, 164,  67, 339, 337])] [[2.6973983e-06 3.4691990e+01 3.9532089e+01 4.2892982e+01 4.3269806e+01
  4.3342514e+01 4.3366436e+01 4.3448456e+01 4.3745754e+01 4.4297630e+01]]
[array([  3,  68, 223,  24, 263, 112, 213,  55, 133, 313])] [[ 0.       39.78517  40.148323 40.23347  41.369484 42.40074  42.570557
  43.24257  43.304333 43.503242]]
[array([  4, 323,  46, 375, 366, 398,  64, 321,  95, 174])] [[ 0.       37.248184 37.529675 38.093117 38.256485 38.691925 38.73386
  39.33299  39.384197 39.407024]]
[array([  5, 324, 166,  87,  65, 171,  92, 365, 319, 354])] [[2.3360155e-06 2.3526131e+01 2

In [13]:
classes_img_names[0]

['0.jpg',
 '143.jpg',
 '318.jpg',
 '62.jpg',
 '310.jpg',
 '117.jpg',
 '287.jpg',
 '278.jpg',
 '57.jpg',
 '259.jpg']

In [14]:
for counter, src_images in enumerate(classes_img_names):
    imgs = [plt.imread(f"{root}/{src_image}") for src_image in src_images]
    stacked = np.hstack(imgs)
    plt.imsave(f"../../results/clustered/{counter}.jpg", stacked)

In [281]:
classes[9]

array([251,  22,  75, 386,  76])

In [301]:
for c in classes[45]:
    print(results_df.iloc[c,:].src_image)

141.jpg
182.jpg
181.jpg
272.jpg
109.jpg


In [241]:
results_df.iloc[396,:]

src_image                                                      96.jpg
dst_image                                                    96_1.jpg
box                                              (363, 301, 160, 108)
feature_vector      [-2.3526785, -0.8273309, -3.5193415, -2.552870...
src_image_number                                                   96
Name: 396, dtype: object