## Ejecutar las siguientes líneas

In [34]:
import sys
from os import path, mkdir, listdir, chdir
import pickle
import cv2 as cv
import ipywidgets as widgets
from ipywidgets import interact, interact_manual
import matplotlib.pyplot as plt
from skimage import io
import pandas as pd
import numpy as np

modulos_path = path.abspath('../minIA')
if modulos_path not in sys.path:
    sys.path.append(modulos_path)
    
from utiles import lectura_img
import random
random.seed(42)
from collections import Counter


In [35]:
#Recibe lista de keypoints de una imagen, crea los objetos KP de openCV
def genKeyPoints( kp_img ):
    keypoints = list()
    for kp in kp_img:
        keypoints.append(cv.KeyPoint(kp[0], kp[1], kp[2]))
    return keypoints

## Visualización tópicos

El siguiente codigo carga los tópicos y elementos de la figura para mostrar los archivos; seleccionar de la primera caja de opciones el archivo de minado y oprimir el botón: "run interact", por ejemplo: _index_size_inv_full_dataset_v2_SIFT.r4l1000000o09.models_

In [36]:
@interact_manual
def load_topicos(
    topics_path=[(file,path.join("/media/working/minia/minhash/",file)) for file in listdir("/media/working/minia/minhash/") if file.endswith(".models")],
    centXimg='/media/working/minia/descriptores/labels_full_dataset_v2_SIFT.pickle',
    descr_kp='/media/working/minia/descriptores/full_dataset_v2_SIFT.pickle', 
    image_dir='/media/working/minia/images/images_training_rev1/'):
    global topics
    global len_topics
    global images_descr
    global desc_kp
    global etiquetas
    global dir_out
    global dir_img
    global topics_lenght
    
    topics = open(topics_path, 'r')
    topics= topics.readlines()
    topics_lenght=[]
    for line in topics:
        centroides = line.strip().split()[1:]
        centroides = [ int(cent.split(':')[0]) for cent in centroides]
        topics_lenght.append(len(centroides))
        
    len_topics=len(topics)
    
    pickle_file = open(centXimg,'rb')
    images_descr = pickle.load(pickle_file)
    pickle_file.close()
    
    pickle_file = open(path.abspath(descr_kp), 'rb')  
    args = pickle.load(pickle_file)
    desc_kp = pickle.load(pickle_file)
    pickle_file.close()
    
    dir_img=image_dir

    print('#Topics: '+ str(len_topics))
    print('Model name:',topics_path)

interactive(children=(Dropdown(description='topics_path', options=(('index_inv_full_dataset_v2_SIFT.r4l10000o0…

## Ejecutar esta celda

In [37]:
def indicesTopicos (centroides, images_descr,threshold=0.7,threshold2=0.7, binary=True):
    tam_cents = len(centroides)
    centroides_= centroides
    img_index = 0
    images = list()
    for idesc,descrp in enumerate(images_descr[0]):
        descp_ = dict([(ic,i) for i,ic in enumerate(descrp)])
        key_points= [desc_kp[idesc]['keypoints'][kp] for kp in [ descp_[ic] for ic in descrp]]
        keypoints_ = genKeyPoints(key_points)
        new_descrp=Counter()
        for i,curKey in enumerate(keypoints_):
            if descrp[i] in new_descrp:
                new_descrp[descrp[i]]+= np.int(curKey.size)+1
            else:
                new_descrp[descrp[i]] = np.int(curKey.size)+1
        if binary:
            new_descrp=Counter(new_descrp.keys())
        
        inter=centroides_ & new_descrp
        overlap=sum(inter.values())/min(sum(centroides_.values()),sum(new_descrp.values()))
        if overlap >= threshold and sum(inter.values()) >= threshold2:
            images.append((img_index,[ (descp_[ic],v) for ic,v in inter.items()],overlap))
        #Overlaping
        #max_ = round( min(tam_descp, tam_cents)*.30 ) #Requiere un 30%
        #posc_index = 0
        #posc = list()
        #for cent in descrp:
        #    if cent in centroides:
        #        posc.append(posc_index)
        #    posc_index += 1
        #if len(posc) > max_ : #No puede ser >= porque max_ puede ser cero
        #    images.append((img_index ,posc)) #Añade una tupla, del indice de la imagen y los indices de los KP
        img_index += 1
    return images

## Ver contenido de tópico (opcional)

Seleccionar el tópico para ver el contenido, id de cluster y pesos

In [7]:
@interact_manual
def inspect_topicos(Topics=[(f"{i} [size {z}]",i) for i,z in zip(range(len_topics),topics_lenght)], binary=True):
    global lista_imgs
    global num_topic
    global name_images
    
    num_topic= Topics
    centroides = topics[Topics].split()[1:]
    centroides = Counter(dict([ tuple(int(x) for x in cent.split(':')) for cent in centroides]))
    if binary:
        centroides = Counter(centroides.keys())
    for c,v in centroides.items():
        print(f"{c}:{v}")
    print('#Len topic: '+str(len(centroides)))
    print('#Mass topic: '+str(sum(centroides.values())))

interactive(children=(Dropdown(description='Topics', options=(('0 [size 49]', 0), ('1 [size 376]', 1), ('2 [si…

## Elegir tópico

Este código permite elejir el tópico, el número de imágnes dónde buscar el código y el threshold a usar para considerar que un tópico esta presente o no en la imagen (topicos con números pequeños usar 1.0)

Selecionar el tópico de la caja de opciones, poner el número de imágenes al máximo, ajustar el threshold, si el archivo de minado seleccionado incluye en su nombre _size_ desmarcar el _checkpoint_ de _binany_; no moverle a la masa. Oprimir el botón de "run interact".

In [28]:
@interact_manual
def choose_topicos(Topics=[(f"{i} [size {z}]",i) for i,z in zip(range(len_topics),topics_lenght)],
                   num_imgs= (0, int(len(images_descr)/1), 5), 
                   threshold = (0.001, 1.0, 0.01),
                   mass = 0,
                   binary=True):
    global lista_imgs
    global num_topic
    global name_images
    
    num_topic= Topics
    centroides = topics[Topics].split()[1:]
    centroides = Counter(dict([ tuple(int(x) for x in cent.split(':')) for cent in centroides]))
    if binary:
        centroides = Counter(centroides.keys())
    lista_imgs = indicesTopicos (centroides, images_descr[0:num_imgs], threshold=threshold, threshold2=mass, binary=binary)
    
    imgs_index=[imgs[0] for imgs in lista_imgs]
    aux=0
    name_images=[]
    for ii,i in enumerate(imgs_index):
        imagen = path.abspath(dir_img + desc_kp[i]['name_img'])
        name_images.append((desc_kp[i]['name_img'],aux,i,lista_imgs[ii][1],lista_imgs[ii][2]))
        aux+=1
        
    print('#Images: '+str(len(lista_imgs)))
    print('#Len topic: '+str(len(centroides)))
    print('#Mass topic: '+str(sum(centroides.values())))

interactive(children=(Dropdown(description='Topics', options=(('0 [size 159]', 0), ('1 [size 36]', 1), ('2 [si…

## Visualizando ejemplos del tópico

Se muestran ejemplos de imágenes que contiene el tópico seleccionado en la parte [anterior](http://minhashing.ngrok.io/notebooks/minIA/notebooks/keyPoints.ipynb#Elegir-t%C3%B3pico)

In [33]:
plt.rcParams["figure.figsize"] = (10,5)
plt.rcParams['figure.dpi'] = 160

@interact
def show_images_per_topic(img=[(aux,(img,aux,i,v,o)) for img,aux,i,v,o in name_images],binary=True,gray=True):
    img,aux,i,v,o=img
    if aux-5 < 0:
        aux=5
    if aux+5 > len(name_images):
        aux=len(name_images)-5
    min_=max(0,aux-5)
    max_=min(len(name_images),aux+5)
    fig, axs = plt.subplots(2,5)
    for ii in range(min_,max_):
        img,aux,i,vals,overlap=name_images[ii]
        imagen = path.abspath(dir_img + img)
        img = cv.imread(imagen)
        key_points= [desc_kp[i]['keypoints'][kp] for kp,v in vals]
        keypoints = genKeyPoints(key_points)
        if gray:
            gray_= cv.cvtColor(img,cv.COLOR_BGR2GRAY)
            img= cv.cvtColor(gray_,cv.COLOR_GRAY2BGR)
        else:
            img=img
        for j,curKey in enumerate(keypoints):
            x=np.int(curKey.pt[0])
            y=np.int(curKey.pt[1])
            if binary:
                size=np.int(curKey.size)
            else:
                size=vals[j][1]
            img=cv.circle(img,(x,y),size,color=(255,0,0),thickness=1, lineType=0, shift=0)
        
        x=ii-min_
        y=x%5
        x=int(x/5)
        axs[x,y].set_yticklabels([])
        axs[x,y].set_xticklabels([])
        axs[x,y].set_xlabel(f'{ii}, {overlap:0.3}, #{i}')
        axs[x,y].imshow(img)

interactive(children=(Dropdown(description='img', options=((0, ('910976.jpg', 0, 20, [(8, 1), (9, 1), (11, 1),…

## Ver una imágen

Este código permite seleccionar una imágen y ver los tópicos presentes en esa imágen. 

Se escribe un número, se deteminar un _threshold_, no mover a la mass. Sí se quiere que aparezca el número del tópico dejar seleccionado _text_, si el archivo de minado tiene _size_ quitar la seleccion de _binary_, oprimir "run interact"

In [12]:
@interact_manual
def choose_topicos(image=str(random.choice(range(1,len(images_descr)))),
                   threshold = (0.01, 1.0, 0.01),
                   mass= 0,
                   text=True,binary=True,gray=True):
    global lista_imgs
    global topics
    global name_images
    global images_descr
    cmap=plt.get_cmap("hsv")

    descrp=images_descr.iloc[int(image)][0]
    descp_ = dict([(ic,i) for i,ic in enumerate(descrp)])
    img=images_descr.iloc[int(image)][1]
    key_points= [desc_kp[int(image)]['keypoints'][kp] for kp in [ descp_[ic] for ic in descrp]]
    keypoints_ = genKeyPoints(key_points)
    new_descrp=Counter()
    for i,curKey in enumerate(keypoints_):
        if descrp[i] in new_descrp:
                new_descrp[descrp[i]]+= np.int(curKey.size)+1
        else:
                new_descrp[descrp[i]] = np.int(curKey.size)+1
    descrp = new_descrp
    if binary:
        descrp = Counter(descrp.keys())
    tam_descp = len(descrp)
    topics_in_image= []
    
    imagen = path.abspath(dir_img + img)
    img = cv.imread(imagen)
    if gray:
        gray= cv.cvtColor(img,cv.COLOR_BGR2GRAY)
        gray= cv.cvtColor(gray,cv.COLOR_GRAY2BGR)
    else:
        gray=img
    keypoints=[]
    for itopic, topic in enumerate(range(len(topics))):
        color=tuple(int(c*256) for c in cmap(topic/len(topics))[:3])
        centroides = topics[topic].split()[1:]
        centroides = Counter(dict([ tuple(int(x) for x in cent.split(':')) for cent in centroides]))
        if binary:
            centroides = Counter(centroides.keys())
    
        inter=centroides & descrp
        overlap=sum(inter.values())/min(sum(centroides.values()),sum(descrp.values()))
        
        if overlap >= threshold and sum(inter.values()) >= mass:
            key_points= [(desc_kp[int(image)]['keypoints'][kp],v) for kp,v in [ (descp_[ic],v) for ic,v in inter.items()]]
            keypoints_ = genKeyPoints([x for x,y in key_points])
            for i,curKey in enumerate(keypoints_):
                x=np.int(curKey.pt[0])
                y=np.int(curKey.pt[1])
                if binary:
                    size = np.int(curKey.size)
                else:
                    size =key_points[i][1]
                gray=cv.circle(gray,(x,y),size,color=color,thickness=1, lineType=0, shift=0)
                if text:
                    gray=cv.putText(gray,f"{topic}",(x,y),cv.FONT_HERSHEY_SIMPLEX ,0.3,color,1, cv.LINE_AA)
            topics_in_image.append((itopic,overlap,sum(inter.values()),sum(centroides.values()),sum(descrp.values())))
            
    plt.imshow(gray, cmap=cmap)
    topics_in_image = sorted(topics_in_image, key=lambda tup: tup[1])
    for i,o,inter,cen,im in topics_in_image:
        print(f"#{i}: {o:0.3} = {inter}/min({cen},{im})")
    

interactive(children=(Text(value='41906', description='image'), FloatSlider(value=0.5, description='threshold'…