Skip to content

Commit

Permalink
Cleaned up code
Browse files Browse the repository at this point in the history
  • Loading branch information
Mattymar committed Aug 1, 2017
1 parent c93ffec commit 808e4fe
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 28 deletions.
61 changes: 37 additions & 24 deletions classify.py
Expand Up @@ -14,11 +14,10 @@


def histogram_intersection(M, N):
"""See histogram intersection kernel for image classification"""

m = M.shape[0]
print(m)
n = N.shape[0]
print(n)
print(M.T.shape)

result = np.zeros((m,n))
for i in range(m):
Expand All @@ -32,42 +31,50 @@ def histogram_intersection(M, N):


def classify_hi(train_df, test_df):
"""Classification using the histogram intersection kernel with SVC"""

# Load the dataframes with the feature vectors
print('loading pickles')
train_df = sift.pickle_load(train_df)
test_df = sift.pickle_load(test_df)

# Initiate the histogram intersection kernel
print('running kernel...')
matrix = histogram_intersection(train_df.T.iloc[:, : -1], train_df.T.iloc[:, : -1])
matrix = histogram_intersection(train_df.T.iloc[:10, : -8300], train_df.T.iloc[:10, : -8300])

# Fit SVC classifier using the kernel computed above
print('fitting svc...')
clf = SVC(kernel='precomputed')
clf.fit(matrix, train_df.T.ix[:, -1])
clf.fit(matrix, train_df.T.iloc[:10, -1])

# Run the intersection kernel to prepare the test images
print('predict matrix...')
predict_matrix = histogram_intersection(test_df.T.iloc[:, : -1], train_df.T.iloc[:, : -1])
predict_matrix = histogram_intersection(test_df.T.iloc[:5, : -8300], train_df.T.iloc[:10, : -8300])

# Predict the class for the test images using the predict matrix computed above
print('predicting results...')
SVMResults = clf.predict(predict_matrix)

# Calculate the accuracy
print('calculating')
correct = sum(1.0 * (SVMResults == test_df.T['y']))
accuracy = correct / len(test_df.T['y'])
correct = sum(1.0 * (SVMResults == test_df.T.iloc[:5, -1]))
accuracy = correct / len(test_df.T.iloc[:5, -1])
print("SVM (Histogram Intersection): " + str(accuracy) + " (" + str(int(correct)) + "/" + str(len(test_df.T['y'])) + ")")

#print('score: ' + str(clf.score(test_df.T.ix[:, test_df.T.columns != 'y'], test_df.T['y'])))
# results = clf.predict(predict_matrix)
# correct = sum(1.0 * (results == test_df.T['y']))
# accuracy = correct / len(test_df.T['y'])

# cnf_matrix = confusion_matrix(test_df.T['y'], clf.predict(test_df.T.ix[:, test_df.T.columns != 'y']))
# np.set_printoptions(precision=2)
#
# plt.figure()
# plot_confusion_matrix(cnf_matrix, classes=['ant', 'bee', 'butterfly', 'centipede', 'dragonfly', 'ladybug', 'tick', 'beetle',
# 'termite', 'worm'],
# normalize=True,
# title='Normalized confusion matrix')
#
# plt.show()
# Plot a confusion matrix of the results
cnf_matrix = confusion_matrix(test_df.T['y'], clf.predict(test_df.T.ix[:, test_df.T.columns != 'y']))
np.set_printoptions(precision=2)
plt.figure()
plot_confusion_matrix(cnf_matrix, classes=['ant', 'bee', 'butterfly', 'centipede', 'dragonfly', 'ladybug', 'tick', 'beetle', 'termite', 'worm'],
normalize=True,
title='Normalized confusion matrix')
plt.show()



def classify_images_svc(train_df, test_df):
"""Image classification using SVC with grid search"""

param_grid = {'C': [1.0, 5.0],
'degree': [2, 3],
'kernel': ['rbf']}
Expand Down Expand Up @@ -96,16 +103,20 @@ def classify_images_svc(train_df, test_df):


def classify_images_rf(train_df, test_df):
"""Image classification using Random Forest"""

clf = RandomForestClassifier()
with open(train_df, 'rb') as train:
train_df = pickle.load(train)
with open(test_df, 'rb') as test:
test_df = pickle.load(test)
clf.fit(train_df.T.ix[:, train_df.T.columns != 'y'], train_df.T['y'])
print(clf.score(test_df.T.ix[:, test_df.T.columns != 'y'], test_df.T['y']))


def classify_images_one_v_all(train_df, test_df):
"""One vs. All linear SVC with Grid Search for image classification"""

param_grid = {'C': [0.1, 0.5, 1.0, 5., 10.]}
clf = OneVsRestClassifier(GridSearchCV(LinearSVC(), param_grid=param_grid))
with open(train_df, 'rb') as train:
Expand All @@ -132,6 +143,8 @@ def classify_images_one_v_all(train_df, test_df):


def classify_images_sgd(train_df, test_df):
"""Image classification using Stochastic Gradient Descent classifier"""

clf = SGDClassifier()
with open(train_df, 'rb') as train:
train_df = pickle.load(train)
Expand Down
19 changes: 15 additions & 4 deletions codebook.py
Expand Up @@ -7,15 +7,18 @@


def build_codebook(all_features_array):
"""Build the feature space for all images in the training set"""
all_features_array = sift.pickle_load(all_features_array)
nfeatures = all_features_array.shape[0]
nclusters = 200#int(np.sqrt(nfeatures) * 2)
nclusters = int(np.sqrt(nfeatures) * 2)
codebook = MiniBatchKMeans(n_clusters=nclusters, max_iter=500).fit(all_features_array.astype('float64'))

return codebook


def build_codebook_spm(all_features_dict):
"""Build feature space for each spatial level in all images of training set"""

with open(all_features_dict, 'rb') as all_features_dict:
all_features_dict = pickle.load(all_features_dict)
levels = ['l0', 'l1', 'l2']
Expand All @@ -29,6 +32,8 @@ def build_codebook_spm(all_features_dict):


def build_image_histograms(codebook, train_img_dicts_list, test_img_dicts_list):
"""Build the feature vector for each image"""

print(codebook.n_clusters)
s = list(range(codebook.n_clusters))
s.append('y')
Expand All @@ -38,10 +43,12 @@ def build_image_histograms(codebook, train_img_dicts_list, test_img_dicts_list):
train_img_dicts_list = sift.pickle_load(train_img_dicts_list)
for i in range(len(train_img_dicts_list)):
try:
# Match each descriptor vector to a cluster in the codebook
codewords = [codebook.predict(desc.reshape(1, -1))[0] for desc in train_img_dicts_list[i]['descriptors']]
except:
print(i)
#print(min(codewords), max(codewords))

# Build a histogram of number of ocurrences of all k features for each image
histogram, clusters = np.histogram(codewords, bins=range(0, codebook.n_clusters + 1), density=False)
data = list(histogram)
data.append(train_img_dicts_list[i]['label'])
Expand All @@ -67,6 +74,8 @@ def build_image_histograms(codebook, train_img_dicts_list, test_img_dicts_list):


def build_image_histograms_spm(levels_codebook, train_img_dicts_list, test_img_dicts_list):
"""Build a feature vector for each spatial level of an image"""

s = list(range(codebook.n_clusters))
s.append('y')
train_df = pd.DataFrame(index=s)
Expand All @@ -75,10 +84,12 @@ def build_image_histograms_spm(levels_codebook, train_img_dicts_list, test_img_d
train_img_dicts_list = sift.pickle_load(train_img_dicts_list)
for i in range(len(train_img_dicts_list)):
try:
# Match each descriptor vector to a cluster in the codebook
codewords = [codebook.predict(desc.reshape(1, -1))[0] for desc in train_img_dicts_list[i]['descriptors']]
except:
print(i)
#print(min(codewords), max(codewords))

# Build a histogram of number of ocurrences of all k features for each image
histogram, clusters = np.histogram(codewords, bins=range(0, codebook.n_clusters + 1), density=False)
data = list(histogram)
data.append(train_img_dicts_list[i]['label'])
Expand All @@ -103,4 +114,4 @@ def build_image_histograms_spm(levels_codebook, train_img_dicts_list, test_img_d
with open('test_df_4000_8cx.pkl', 'wb') as tedf:
pickle.dump(test_df, tedf)

return train_df, test_df
return train_df, test_df
13 changes: 13 additions & 0 deletions sift.py
Expand Up @@ -33,6 +33,8 @@ def compute(self, image, kps, eps=1e-7):


class MacOSFile(object):
"""Used to work around memory issues for large pickled files on MacOS"""

def __init__(self, f):
self.f = f

Expand Down Expand Up @@ -76,6 +78,8 @@ def pickle_load(file_path):


def get_root_sift_features(labeled_paths):
"""Get feature descriptors for each image"""

rs = RootSIFT()
detector = cv2.xfeatures2d.SIFT_create()
train_descs_list = []
Expand All @@ -88,11 +92,14 @@ def get_root_sift_features(labeled_paths):
img = cv2.imread(path)
gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Detect SIFT keypoints
kps = detector.detect(gray, None)

# Detect MSER keypoints
mser = cv2.MSER_create()
mser_kps = mser.detect(gray)

# Calculate descriptors for SIFT and MSER keypoints and stack them
kps, sift_descs = rs.compute(gray, kps)
kps, mser_descs = rs.compute(gray, mser_kps)

Expand All @@ -112,8 +119,11 @@ def get_root_sift_features(labeled_paths):
print(sift_descs.shape, mser_descs.shape)

except:
# Skip and note any failed images
print(path + ' Could not be converted')

# Repeat above process for test images
# TODO: Add function to eliminate repeated code
for path, label in test_paths:
try:
img = cv2.imread(path)
Expand All @@ -139,6 +149,7 @@ def get_root_sift_features(labeled_paths):
print(path + ' Could not be converted')
train_descs_array = np.vstack(train_descs_list)

# Store feature vectors as pickled files
pickle_dump(train_descs_array, 'all_descriptors_full_10c.pkl')
pickle_dump(train_img_dicts_list, 'train_img_dicts_list_full_10c.pkl')
pickle_dump(test_img_dicts_list, 'test_img_dicts_list_full_10c.pkl')
Expand All @@ -161,6 +172,8 @@ def get_image_labels_and_paths(images_path):


def spm_split(image):
"""Splits image into grids of various sizes for spatial tests"""

h = image.shape[0]
w = image.shape[1]

Expand Down

0 comments on commit 808e4fe

Please sign in to comment.