In [46]:
import cv2
import os
from config import COCO_img_train_dir
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import KMeans
import pickle as pk
from joblib import dump, load
import pandas as pd

In [63]:
competitors_dir = '../COCO/competitors/'

def get_SIFT(img, sift):
    """
    Return SIFT descriptors for this image (resized 200x200)
    """
    img = cv2.resize(img, (200,200))
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    (kps, descs) = sift.detectAndCompute(img_gray, None)
    return kps, descs

def get_descriptors(img_names):
    """
    Return SIFT descriptors matrix (N. images x 100 x 128)
    Max 100 descriptors for each image
    """
    X = None
    sift = cv2.xfeatures2d.SIFT_create()
    for i, img_name in enumerate(img_names):   
        img = cv2.imread(os.path.join(COCO_img_train_dir, img_name))
        kps, descs = get_SIFT(img)
        if X is None:
            X = descs[:100]
        else:
            X = np.vstack([X, descs[:100]])
    return X

def get_BOW(img_names):
    X = None
    sift = cv2.xfeatures2d.SIFT_create()
    for i, img_name in enumerate(img_names):    # 34 secondi con 1000 immagini
        img = cv2.imread(os.path.join(COCO_img_train_dir, img_name))
        kps, descs = get_SIFT(img, sift)
        y = km.predict(descs)
        fvect = np.zeros(codebook.n_clusters)
        unique, counts = np.unique(y, return_counts=True)
        for u, c in zip(unique, counts):
            fvect[u]=c/len(descs)
        if X is None:
            X = fvect
        else:
            X = np.vstack([X, fvect])
        if i%1000 == 0:
            print(f"Done: {i}")
    return X

images = os.listdir(COCO_img_train_dir)
selected = np.random.choice(images, 10000)
# Computing descriptors
print("Computing descriptors...")
X = get_descriptors(selected)
X.dump(os.path.join(competitors_dir, "sift_descr_collection.np")) 
print("Saved.")

Computing descriptors...
Saved.


In [64]:
print("Computing codebook with KMeans...")
X = np.load(os.path.join(competitors_dir, "sift_descr_collection.np"),allow_pickle=True)
codebook = KMeans(500) # Number of codes
y = km.fit_transform(X)
dump(codebook, os.path.join(competitors_dir, "sift_codebook.pkl")) 
print("Saved.")

Computing codebook with KMeans...
Saved.


In [77]:
print("Computing feature vectors for all images...")
codebook = load(os.path.join(competitors_dir, "sift_codebook.pkl"))
X = get_BOW(selected)
df = pd.DataFrame(X)
df.to_csv(os.path.join(competitors_dir, "bow_images.np"), index=False)
X.dump(os.path.join(competitors_dir, "bow_images.np"))
print("Saved.")

Computing feature vectors for all images...
Done: 0
Saved.


In [76]:
pd.read_csv(os.path.join(competitors_dir, "bow_images.np"))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,490,491,492,493,494,495,496,497,498,499
0,0.004444,0.000000,0.004444,0.000000,0.000000,0.000000,0.008889,0.000000,0.004444,0.000000,...,0.000000,0.000000,0.013333,0.000000,0.000000,0.000000,0.000000,0.000000,0.004444,0.000000
1,0.000000,0.007407,0.003704,0.000000,0.000000,0.003704,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.003704,0.000000
2,0.000000,0.002674,0.002674,0.000000,0.000000,0.005348,0.005348,0.002674,0.002674,0.000000,...,0.000000,0.000000,0.002674,0.002674,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
3,0.000000,0.003247,0.000000,0.006494,0.000000,0.000000,0.000000,0.000000,0.003247,0.000000,...,0.003247,0.000000,0.000000,0.003247,0.000000,0.000000,0.003247,0.000000,0.003247,0.000000
4,0.000000,0.000000,0.000000,0.007273,0.000000,0.003636,0.000000,0.000000,0.003636,0.003636,...,0.000000,0.000000,0.000000,0.000000,0.007273,0.000000,0.000000,0.000000,0.003636,0.000000
5,0.000000,0.000000,0.000000,0.007092,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.007092,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
6,0.000000,0.000000,0.000000,0.000000,0.006757,0.000000,0.000000,0.000000,0.006757,0.000000,...,0.006757,0.000000,0.000000,0.006757,0.000000,0.000000,0.006757,0.000000,0.000000,0.000000
7,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.004854,0.000000,0.000000,0.004854,...,0.000000,0.000000,0.000000,0.004854,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
8,0.000000,0.000000,0.000000,0.000000,0.000000,0.004878,0.004878,0.000000,0.000000,0.004878,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
9,0.000000,0.000000,0.003401,0.003401,0.000000,0.003401,0.000000,0.000000,0.006803,0.000000,...,0.000000,0.000000,0.010204,0.003401,0.006803,0.000000,0.003401,0.003401,0.003401,0.003401
