In [7]:
import numpy as np
import pdb

from sklearn.datasets import make_classification

import cv2

index = 0
def extract_image_features(image):
    global index
    sift = cv2.xfeatures2d.SIFT_create()
    image_bgr = cv2.cvtColor(image.T, cv2.COLOR_RGB2BGR)
    _ , descriptors =  sift.detectAndCompute(image_bgr, None)
    try:
        descriptors.shape
    except AttributeError:
        print(index)
        index = index + 1
        return np.empty([0, 128])
    index = index + 1
    return descriptors

def process_images(images):
    return [extract_image_features(image) for image in images]

In [2]:
def fisher_vector(xx, gmm):
    """Computes the Fisher vector on a set of descriptors.
    Parameters
    ----------
    xx: array_like, shape (N, D) or (D, )
        The set of descriptors
    gmm: instance of sklearn mixture.GMM object
        Gauassian mixture model of the descriptors.
    Returns
    -------
    fv: array_like, shape (K + 2 * D * K, )
        Fisher vector (derivatives with respect to the mixing weights, means
        and variances) of the given descriptors.
    Reference
    ---------
    J. Krapac, J. Verbeek, F. Jurie.  Modeling Spatial Layout with Fisher
    Vectors for Image Categorization.  In ICCV, 2011.
    http://hal.inria.fr/docs/00/61/94/03/PDF/final.r1.pdf
    """
    xx = np.atleast_2d(xx)
    N = xx.shape[0]

    # Compute posterior probabilities.
    Q = gmm.compute_posteriors(xx)  # NxK
    
    Q = Q.asarray()

    # Compute the sufficient statistics of descriptors.
    Q_sum = np.sum(Q, 0)[:, np.newaxis] / N
    Q_xx = np.dot(Q.T, xx) / N
    Q_xx_2 = np.dot(Q.T, xx ** 2) / N

    # Compute derivatives with respect to mixing weights, means and variances.
    d_pi = Q_sum.squeeze() - gmm.get_weights()
    d_mu = Q_xx - Q_sum * gmm.get_means()
    d_sigma = (
        - Q_xx_2
        - Q_sum * gmm.get_means() ** 2
        + Q_sum * gmm.get_covars()
        + 2 * Q_xx * gmm.get_means())

    # Merge derivatives into a vector.
    return np.hstack((d_pi, d_mu.flatten(), d_sigma.flatten()))

In [9]:
from scipy import ndimage, misc
import numpy as np
import os
import pandas as pd
from pandas import HDFStore, DataFrame

import h5py

In [10]:
dataset_dir = ('../dataset_h5/')

h5f = h5py.File(os.path.join(dataset_dir,'images_224_delta_1.5.h5'),'r')

In [None]:
##EXAMPLE OF IMAGES WITH NO SIFT POINTS
%matplotlib inline
from matplotlib import pyplot as plt
plt.imshow(h5f['data'][144].T, interpolation='nearest')
plt.figure()
plt.imshow(h5f['data'][406].T, interpolation='nearest')
plt.figure()
plt.imshow(h5f['data'][609].T, interpolation='nearest')
plt.figure()
plt.imshow(h5f['data'][732].T, interpolation='nearest')
plt.figure()
plt.imshow(h5f['data'][869].T, interpolation='nearest')
plt.figure()
plt.imshow(h5f['data'][923].T, interpolation='nearest')
plt.show()

In [None]:
image_descriptors = np.asarray(process_images(h5f['data'][:10000]))

In [None]:
filename = 'descriptors_images_224_delta_1.5.h5'
if(os.path.isfile(os.path.join(dataset_dir,filename))):
    descriptor_h5f = h5py.File(os.path.join(dataset_dir,filename),'r')
    images_descriptors = descriptor_h5f['image_descriptors']
else:
    descriptor_h5f = h5py.File(os.path.join(dataset_dir,filename),'w')
    image_descriptors = np.asarray(process_images(h5f['data'][:10000]))
    descriptor_h5f['image_descriptors'] = image_descriptors
# image_descriptors.shape


In [None]:
np.save('image_descriptors.npy', image_descriptors)

image_descriptors = np.load('image_descriptors.npy')
from sklearn.decomposition import PCA
pca = PCA(n_components=64)# adjust yourself
pca.fit(np.concatenate(image_descriptors[:5000]))
image_descriptors_reduced = np.asarray([ pca.transform(image) for image in image_descriptors if image.shape[0] > 0])


In [None]:
image_descriptors_reduced.shape

In [None]:
descriptor_h5f['descriptor_reduced'] = image_descriptors_reduced

In [None]:
from scipy import ndimage, misc
import numpy as np
import os
import pandas as pd
from pandas import HDFStore, DataFrame

import h5py

dataset_dir = ('../dataset_h5/')
filename = 'descriptors_images_224_delta_1.5.h5'
descriptor_h5f = h5py.File(os.path.join(dataset_dir,filename),'r')
image_descriptors_reduced = descriptor_h5f['descriptor_reduced'][:]

In [None]:
# full_dataset = h5f['data'][:]
# full_dataset = full_dataset.reshape(full_dataset.shape[0], -1)
# randomly_sampled = np.random.choice(full_dataset.shape[0], size=10000, replace=False)

# X_train = X_train.reshape(X_train.shape[0], -1)
# X_test = h5f['data'][-10000: ]
# X_test = X_test.reshape(X_test.shape[0], -1)

import ggmm.gpu as ggmm
import numpy as np

image_descriptors_reduced = np.load("image_descriptors_reduced.npy")
concatenated_descriptors = np.concatenate(image_descriptors_reduced)
N, D = concatenated_descriptors.shape
K=128

ggmm.init(2731155)
gmm = ggmm.GMM(K,D)


thresh = 1e-3 # convergence threshold
n_iter = 20 # maximum number of EM iterations
init_params = 'wmc' # initialize weights, means, and covariances

# train GMM
gmm.fit(concatenated_descriptors, thresh, n_iter, init_params=init_params)

# # retrieve parameters from trained GMM
# weights = gmm.get_weights()
# means = gmm.get_means()
# covars = gmm.get_covars()

# # compute posteriors of data
# posteriors = gmm.compute_posteriors(image_descriptors_reduced)


In [None]:
#fv = fisher_vector(image_descriptors_reduced, gmm)

fv = [ fisher_vector(image,gmm) for image in image_descriptors_reduced]

In [None]:
from scipy import ndimage, misc
import numpy as np
import os
import pandas as pd
from pandas import HDFStore, DataFrame

import h5py

dataset_dir = ('../dataset_h5')
filename = 'descriptors_images_224_delta_1.5.h5'
descriptor_h5f = h5py.File(os.path.join(dataset_dir,filename),'r')


fv = descriptor_h5f['feature_vector']

fv.shape

In [5]:
fv[:]

array([ -2.79233216e-05,  -8.29181364e-06,   5.64052493e-05, ...,
         1.52099688e-02,   2.96574679e-02,   1.00977729e-02])

In [None]:
from sklearn import svm
def train(gmm, features, labels):
    X = features
    Y = labels

    clf = svm.LinearSVC()
    clf.fit(X, Y)

    return clf

In [None]:
import pandas as pd
from pandas import HDFStore, DataFrame
import numpy as np
# image_with_no_features = []
# index=0
# for image in image_descriptors:
#     if image.shape[0] <= 0:
#         image_with_no_features.append(index)
#     index = index + 1
image_with_no_features =  np.load("image_with_no_features.npy")



image_no_feat_cropped = [i for i in image_with_no_features if i <= 1000]

store = HDFStore('../dataset_h5/labels.h5')
ava_table = store['labels']
temp = ava_table.head(1000)
temp = temp.drop(temp.iloc[image_no_feat_cropped].index.values)



labels = temp.good
labels_test = temp2.good

fv = np.load("fisher_vector.npy")
gmm = np.load("")

classifier = train(gmm, fv[:993,:],labels)
rate = success_rate(classifier, fv[:993,:])
print("Success rate is", rate)

In [None]:
from sklearn.metrics import accuracy_score

temp2 = ava_table.head(10000)
temp2 = temp2.drop(temp2.iloc[image_with_no_features].index.values)

classifier = train(gmm, fv,labels_test)
labels_test = temp2.good

accuracy_score(labels, classifier.predict(fv[:993,:]))

In [None]:
descriptors = cv2.SIFT().detectAndCompute(full_dataset[0], None)