3D Augmented Reality Final Project


Local feature compression using autoencoders
---
**University of Padua**<br>
*Master Degree in ICT for Internet and Multimedia - Cybersystems*
*Student: Amerigo Aloisi*
<br>



In [None]:
# needed imports
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Input, Dense
from keras.models import Model
import os
import cv2 as cv



In [None]:
# change the option to download the dataset from google drive
DOWNLOAD = False
if (DOWNLOAD):
    !pip install gdown
    import gdown
    url = 'https://drive.google.com/drive/u/0/folders/1vRlC1Ih5UszyLFX46r2uNC4nhwM_7nY3'
    gdown.download_folder(url, quiet=False)
    DOWNLOAD = False

In [None]:
# funcion to load the images and extract SIFT features from them 

def load_dataset(datapath):

    image_filenames = [img for img in os.listdir(datapath) if os.path.splitext(img)[1].lower() == ".jpg"]
    
    sift = cv.SIFT_create()
    
    descriptors = []

    keypoints = []

    for image_filename in image_filenames:

        image = cv.imread(os.path.join(datapath, image_filename),cv.IMREAD_GRAYSCALE)
        
        # apply pre-processing to improve performance
        
        clahe = cv.createCLAHE()

        image = clahe.apply(image) 

        kp, des = sift.detectAndCompute(image,None)
        
        # l2 normalization
        
        des /= np.linalg.norm(des, axis=1, keepdims=True)
    
        keypoints.append(kp)
    
        descriptors.append(des)

    return keypoints,descriptors

# function to save keypoints to txt file respecting the colmap format

def save_keypoints (keypoints,datapath):
        
    image_filenames = [img for img in os.listdir(datapath) if os.path.splitext(img)[1].lower() == ".jpg"]

    # keypoints are now saved to file

    for i, keypoint in enumerate (keypoints):

        header = str(np.shape(keypoint)[0]) + " 128" # colmap header
        
        # filename
 
        filename = os.path.join(datapath, image_filenames [i]+".txt")
    
        with open(filename, 'w') as f:
            
            f.write(header + '\n')    
            
        # the cv object keypoint is serialized into a string with 4 values: xposition, yposition, size, angle
        
            for kp in keypoint:
                x, y = kp.pt
                size = kp.size
                angle = kp.angle
                f.write('{} {} {} {}\n'.format(x, y, size,angle))
            
       # 128 zeros are added to each line (meaning no descriptor provided)

        with open(filename, 'r') as input_file:

            file_contents = input_file.read()

            lines = file_contents.split('\n')

            modified_lines = [lines[0]] + [line + ' ' + '0 '*128 for line in lines[1:-2]]

            modified_file = '\n'.join(modified_lines)


        with open(filename, 'w') as output_file:

            output_file.write(modified_file)
            
# function to find feature matches between each pair of images and then save them to txt files according to Colmap format

def save_matches(descriptors,keypoints,datapath,output_file):
        
    # FLANN matcher for large dataset
    FLANN_INDEX_KDTREE = 1
    index_params = dict(algorithm = FLANN_INDEX_KDTREE, trees = 5)
    search_params = dict(checks=50)   # or pass empty dictionary
    flann = cv.FlannBasedMatcher(index_params,search_params)
    
    filenames = [img for img in os.listdir(datapath) if os.path.splitext(img)[1].lower() == ".jpg"]
        
    with open(output_file,'w') as f:
    
        for i in range(len(descriptors)):
        
            for j in range(i+1,len(descriptors)):
            
                header = filenames[i] + ' ' + filenames [j]
            
                sift1 = descriptors[i] 
                sift2 = descriptors[j] 
                matches = flann.knnMatch(sift1,sift2,k=2) 
                
                print ("Number of matches: ", len(matches))
                
                # Lowe ratio test 
                good_matches = []
                for m, n in matches:
                    if m.distance < 0.8 * n.distance:
                        good_matches.append(m)
                
                print("Matches after filtering: ", len(good_matches))
                
                             
                """
                # Geometric verification: filter out some wrong matches but takes much more time
                
                pts1 = []
                pts2 = []

                for match in good_matches:
                    pts1.append(keypoints[i][match.queryIdx].pt)
                    pts2.append(keypoints[j][match.trainIdx].pt)

                pts1 = np.int32(pts1)
                pts2 = np.int32(pts2)
                
                F, mask = cv.findFundamentalMat(pts1,pts2,cv.USAC_MAGSAC) 

                # Apply the epipolar constraint to filter out bad matches
                new_matches = []
                for k in range(len(good_matches)):
                    if mask[k] == 1:
                        new_matches.append(good_matches[k])
                        
                
                print("Number of inliers: ", len(new_matches))

                             
                """
                
                # saving
                # change good_matches to new_matches if you want to perform geometric validation 
                  
                idx1 = np.array([m.queryIdx for m in good_matches])
                idx2 = np.array([m.trainIdx for m in good_matches])
                
                f.write(header + '\n')
                
                for n in range(len(good_matches)):
                    f.write('{} {}\n'.format(idx1[n],idx2[n]))
                    
                f.write('\n')
                


In [None]:
# load training set
# change this with your local path
train_path = "castle"
train_path2 = "Herz-Jesus-P8"
train_kp,train_des = load_dataset(train_path)
train_kp2,train_des2 = load_dataset(train_path2)
train_set = np.concatenate((np.concatenate(train_des),np.concatenate(train_des2)), axis=0)
print(np.shape(train_set))

In [None]:
# Define the encoder layers
input_shape = (train_set.shape[1]) 
input_layer = Input(shape=(input_shape,))
encoder_l1 = Dense(64, activation='relu')(input_layer)
encoder_l2 = Dense(32, activation='relu')(encoder_l1)
encoder_l3 = Dense(16, activation='relu')(encoder_l2)
encoder_l4 = Dense(8, activation='relu')(encoder_l3)


# define the encoder model
encoder = Model(inputs = input_layer, outputs = encoder_l3, name = 'encoder')
encoder.summary()

# Define the decoder layers
decoder_l1 = Dense(16, activation='relu')(encoder_l4)
decoder_l2 = Dense(32, activation='relu')(decoder_l1)
decoder_l3 = Dense(64, activation='relu')(decoder_l2)
output_layer = Dense(input_shape, activation='relu')(decoder_l3)

# Define the autoencoder model
autoencoder = Model(input_layer, output_layer, name = 'autoencoder')
autoencoder.summary()
autoencoder.compile(loss='mean_squared_error', optimizer='adam')

In [None]:
# Train the autoencoder on the SIFT features
history = autoencoder.fit(train_set, train_set, epochs=10, batch_size=256)


In [None]:
# load test set
# change this with your local path
test_path = "fountain"
test_path2 = "santo"
# extract SIFT features test sets
test_kp,test_des = load_dataset(test_path)
save_keypoints (test_kp,test_path)
test_kp2,test_des2 = load_dataset(test_path2)
save_keypoints (test_kp2,test_path2)



In [None]:
# encode the test sets
compressed = []
for des in test_des:
    comp_des = encoder.predict(des)
    compressed.append(comp_des)
    
compressed2 = []
for des in test_des2:
    comp_des = encoder.predict(des)
    compressed2.append(comp_des)


# reconstruct the test sets
reconstructed = []
for des in test_des:
    dec_des = autoencoder.predict(des)
    reconstructed.append(dec_des)
    
reconstructed2 = []
for des in test_des2:
    dec_des = autoencoder.predict(des)
    reconstructed2.append(dec_des)
    


In [None]:
save_matches(test_des,test_kp,test_path, "original_fountain.txt")
save_matches(reconstructed,test_kp,test_path, "decoded_fountain.txt")
save_matches(compressed,test_kp,test_path, "comp_fountain.txt")

In [None]:
save_matches(test_des2,test_kp2,test_path2, "original_santo.txt")
save_matches(reconstructed2,test_kp2,test_path2, "decoded_santo.txt")
save_matches(compressed2,test_kp2,test_path2, "comp_santo.txt")

In [None]:
# compute reconstruction losses
ssd = []
for des1, des2 in zip(test_des, reconstructed):
    sd = np.mean((des1 - des2)**2)
    ssd.append(sd)
mse = np.mean(ssd)
print ("Reconstruction error for first test set", mse)

ssd2 = []
for des1, des2 in zip(test_des2, reconstructed2):    
    sd = np.mean((des1 - des2)**2)
    ssd2.append(sd)
mse2 = np.mean(ssd2)
print ("Reconstruction error for second test set", mse2)

