In [1]:
import os
import numpy as np
from yael import ynumpy

In [2]:
image_names = [filename.split('.')[0]
               for filename in os.listdir("../siftgeo")
               if filename.endswith('.siftgeo')]
print(image_names[0])

# load the SIFTs for these images
image_descs = []
for imname in image_names:
    desc, meta = ynumpy.siftgeo_read("../siftgeo/%s.siftgeo" % imname)
    if desc.size == 0: desc = np.zeros((0, 128), dtype = 'uint8')
    # we drop the meta-information (point coordinates, orientation, etc.)
    image_descs.append(desc)

137102


In [5]:
# make a big matrix with all image descriptors

all_desc = np.vstack(image_descs)
print(all_desc.shape)

k = 64
n_sample = k * 1000

# choose n_sample descriptors at random
sample_indices = np.random.choice(all_desc.shape[0], n_sample)
sample = all_desc[sample_indices]

# until now sample was in uint8. Convert to float32
sample = sample.astype('float32')
print(sample.shape)

# compute mean and covariance matrix for the PCA
mean = sample.mean(axis = 0)
sample = sample - mean
cov = np.dot(sample.T, sample)

# compute PCA matrix and keep only 64 dimensions
eigvals, eigvecs = np.linalg.eig(cov)
perm = eigvals.argsort()                   # sort by increasing eigenvalue
pca_transform = eigvecs[:, perm[64:128]]   # eigenvectors for the 64 last eigenvalues

# transform sample with PCA (note that numpy imposes line-vectors,
# so we right-multiply the vectors)
sample = np.dot(sample, pca_transform)
print(sample.shape)

# train GMM
gmm = ynumpy.gmm_learn(sample, k)

(4455091, 128)
(64000, 128)
(64000, 64)


In [15]:
image_fvs = []
print(len(image_descs))
print(len(image_descs[0]))
print(image_descs[0][0])

for image_desc in image_descs:
    # apply the PCA to the image descriptor
    image_desc = np.dot(image_desc - mean, pca_transform)
    # compute the Fisher vector, using only the derivative w.r.t mu
    fv = ynumpy.fisher(gmm, image_desc, include = 'mu')
    image_fvs.append(fv)

print(len(image_fvs))

print(len(image_fvs[0]))
print(image_fvs[0][0])

# make one matrix with all FVs
image_fvs = np.vstack(image_fvs)
print(len(image_fvs))
# normalizations are done on all descriptors at once

# power-normalization
image_fvs = np.sign(image_fvs) * np.abs(image_fvs) ** 0.5
print(len(image_fvs))

# L2 normalize
norms = np.sqrt(np.sum(image_fvs ** 2, 1))
image_fvs /= norms.reshape(-1, 1)

# handle images with 0 local descriptor (100 = far away from "normal" images)
image_fvs[np.isnan(image_fvs)] = 100

1491
659
[ 85  57  10   0   0   0   0  42  29  63  81 124   7   8  20  17  51 124
  33  68   1  24  19   4   8 124  10   0   0   6   1   0 124  53  95  51
   0   0   0  41 124  46  82 109   0   0   0  69  67  45  86 124   5   0
   0  10  24  21   3   9   1  14   5   6  46  25  64  41   0   0   0  20
 103  12  16   8   0   0   7 124  10   0   6  81  34  19  52  83   3   0
   1  19  44  38  21  11   3  14  42  34   0   0   0   1   6   6  11  55
   4   0  10  36   3   1   2   5   5   8  65  48   2   0   0  11  45  19
  22  11]
1491
4096
-0.38522238
1491
1491


