In [56]:
import os
import time
import random
import cv2
import joblib
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans

In [57]:
def get_random_image_path(folder_path: str, seed: int = None) -> str:
  imgs = os.listdir(folder_path)
  img = imgs[np.random.RandomState(seed).randint(0, len(imgs))]
  img_path = os.path.join(folder_path, img)
  return img_path

In [58]:
def get_d_from_image(image: np.ndarray) -> np.ndarray:
  sift = cv2.SIFT_create()
  keypoints, descriptors = sift.detectAndCompute(image, None)
  return descriptors

In [59]:
def img_path_to_bovw(descriptors: np.ndarray, kmeans: KMeans) -> np.ndarray:
  image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
  sift = cv2.SIFT_create()
  keypoints, descriptors = sift.detectAndCompute(image, None)
  predictions = kmeans.predict(descriptors)
  histogram = np.bincount(predictions, minlength=kmeans.n_clusters)
  histogram = histogram / np.sum(histogram)
  return histogram

In [60]:
kmeans = joblib.load("kmeans_1000_512.joblib")

In [61]:
folder_path = "VOCdevkit/VOC2012/JPEGImages"

In [62]:
db_image = [os.path.join(folder_path, img_name) for img_name in os.listdir(folder_path)[:1000]]

In [63]:
db = [img_path_to_bovw(img_path, kmeans) for img_path in db_image]

In [64]:
df = pd.DataFrame({
    "path": db_image,
    "vector": db
}) 

In [65]:
df.to_csv(f"export_{time.time()}.csv")