In [1]:
import h5py

# Open the .mat file in read mode
mat_file = h5py.File('queryDataset.mat', 'r')
mat_file2 = h5py.File('wholeDataset.mat', 'r')

In [2]:
Q = mat_file['queryDataset']['pos']
D = mat_file2['wholeDataset']['pos']

In [3]:
import numpy as np
Qnp = np.array(Q)
Dnp = np.array(D)

In [4]:
Qnp.shape

(2463, 93)

In [5]:
Dnp.shape

(378694, 93)

In [6]:
query = Qnp
dataset = Dnp
print(query.shape)
print(dataset.shape)

(2463, 93)
(378694, 93)


In [7]:
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics import mean_squared_error
import time

## DTW Algorithm

In [8]:
pip install fastdtw

Collecting fastdtwNote: you may need to restart the kernel to use updated packages.





  Downloading fastdtw-0.3.4.tar.gz (133 kB)
Building wheels for collected packages: fastdtw
  Building wheel for fastdtw (setup.py): started
  Building wheel for fastdtw (setup.py): finished with status 'done'
  Created wheel for fastdtw: filename=fastdtw-0.3.4-py3-none-any.whl size=3566 sha256=6d41cd1469ce34ba0e7a1a4856915f590e14b4857c5c5c14d77bb6369741ceb2
  Stored in directory: c:\users\bilal\appdata\local\pip\cache\wheels\1f\a1\63\bfd0fddb5bf0b59f564872e29272cee8a2de0cd745d88fede5
Successfully built fastdtw
Installing collected packages: fastdtw
Successfully installed fastdtw-0.3.4


In [10]:
import numpy as np
from fastdtw import fastdtw
import time

def dtw_distance(sequence1, sequence2):
    """
    Calculate the DTW distance between two sequences.
    """
    distance, _ = fastdtw(sequence1, sequence2)
    return distance

def calculate_dtw_for_all_rows(query_matrix, data_matrix):
    """
    Calculate DTW distance for each row in the query matrix with the corresponding rows in the data matrix.
    """
    num_queries = query_matrix.shape[0]
    num_data_points = data_matrix.shape[0]
    
    dtw_distances = np.zeros((num_queries, num_data_points))
    execution_times = np.zeros(num_queries)
    
#     for i in range(num_queries):
    start_time = time.time()
        
    for j in range(num_data_points):
        dtw_distances[0, j] = dtw_distance(query_matrix[0], data_matrix[j])

        end_time = time.time()
        execution_times[0] = end_time - start_time

    return dtw_distances, execution_times


dtw_distances, execution_times = calculate_dtw_for_all_rows(query, dataset)

# Print DTW distances and execution times
print("DTW Distances:")
print(dtw_distances)
print("\nExecution Times:")
print(execution_times)


MemoryError: Unable to allocate 6.95 GiB for an array with shape (2463, 378694) and data type float64

## Locality-Sensitive Hashing (LSH)

# KD TREE

In [12]:
import numpy as np
from sklearn.neighbors import KDTree
from sklearn.metrics import mean_squared_error
import time

def knn_search_and_evaluate(query, dataset, K):
    kdtree = KDTree(dataset)
    
    # Perform K-nearest neighbor search for the query matrix
    start_time = time.time()
    distances, indices = kdtree.query(query, k=K)
    retrieval_time = time.time() - start_time
    print(indices.shape)
    
    # Initialize variables to store evaluation metrics
    mse_list = []
    mpjse_list = []
    pck_list = []
    
    # Calculate evaluation metrics for each query pose
    for i in range(len(query)):
        # Get the K-nearest neighbors from the dataset
        nearest_neighbors = dataset[indices[i]]
        
        # Calculate Mean Squared Error (MSE)
#         mse = mean_squared_error(query[i], nearest_neighbors)
#         mse_list.append(mse)
        
        # Calculate Mean Per Joint Squared Error (MPJSE)
        mpjse = np.mean(np.linalg.norm(query[i] - nearest_neighbors))
        mpjse_list.append(mpjse)
        
        # Compute PCK (Percentage of Correct Keypoints)
        threshold = 30  # Set a threshold for correctness
        correct_keypoints = np.linalg.norm(query[i] - nearest_neighbors, axis=1) < threshold
        pck = np.sum(correct_keypoints) / len(correct_keypoints)
        pck_list.append(pck)
        
    # Calculate the mean of each evaluation metric
#     mean_mse = np.mean(mse_list)
    mean_mpjse = np.mean(mpjse_list)
    mean_pck = np.mean(pck_list)
    
#     print(f"Mean Squared Error (MSE): {mean_mse}")
    print(f"Mean Per Joint Squared Error (MPJSE): {mean_mpjse}")
    print(f"Percentage of Correct Keypoints (PCK): {mean_pck}")
    print(f"Retrieval Time: {retrieval_time} seconds")
    
    return mean_mpjse, mean_pck

In [13]:
k = 2
while k != 1024:
    print("K value is: ",k)
    knn_search_and_evaluate(query, dataset, k)
    k = k*2

K value is:  2
(2463, 2)
Mean Per Joint Squared Error (MPJSE): 24.646671295166016
Percentage of Correct Keypoints (PCK): 0.9587900933820545
Retrieval Time: 27.14136505126953 seconds
K value is:  4
(2463, 4)
Mean Per Joint Squared Error (MPJSE): 36.32893371582031
Percentage of Correct Keypoints (PCK): 0.9470158343483557
Retrieval Time: 25.554195165634155 seconds
K value is:  8
(2463, 8)
Mean Per Joint Squared Error (MPJSE): 54.428688049316406
Percentage of Correct Keypoints (PCK): 0.9251928542427933
Retrieval Time: 25.1305148601532 seconds
K value is:  16
(2463, 16)
Mean Per Joint Squared Error (MPJSE): 82.76153564453125
Percentage of Correct Keypoints (PCK): 0.8925852618757613
Retrieval Time: 29.495342254638672 seconds
K value is:  32
(2463, 32)
Mean Per Joint Squared Error (MPJSE): 127.25959014892578
Percentage of Correct Keypoints (PCK): 0.8532785221274868
Retrieval Time: 33.92673444747925 seconds
K value is:  64
(2463, 64)
Mean Per Joint Squared Error (MPJSE): 196.8567352294922
Perc

## KNN With diff metrics 

In [96]:
from sklearn.neighbors import NearestNeighbors, BallTree
from sklearn.metrics import mean_squared_error
import numpy as np
import time

def knn_search_and_evaluate(query, dataset, K, method='knn', metric='euclidean'):
    if method == 'knn':
        knn = NearestNeighbors(n_neighbors=K, algorithm='auto', metric=metric)
        knn.fit(dataset)
        
    elif method == 'range':
        knn = BallTree(dataset, metric=metric)
    
    # Fit the model with the dataset
    
    # Perform K-nearest neighbor search or range search for the query matrix
    start_time = time.time()
    if method == 'knn':
        distances, indices = knn.kneighbors(query)
    elif method == 'range':
        indices = knn.query_radius(query, r=55.0)
    retrieval_time = time.time() - start_time
    
    # Initialize variables to store evaluation metrics
#     mse_list = []
    mpjse_list = []
    pck_list = []
    
    # Calculate evaluation metrics for each query pose
    for i in range(len(query)):
        if method == 'knn':
            # Get the K-nearest neighbors from the dataset
            nearest_neighbors = dataset[indices[i]]
        elif method == 'range':
            # Get the neighbors within the specified radius
            nearest_neighbors = dataset[indices[i]]
        
        # Calculate Mean Squared Error (MSE)
#         mse = mean_squared_error(query[i], nearest_neighbors)
#         mse_list.append(mse)
        
        # Calculate Mean Per Joint Squared Error (MPJSE)
        mpjse = np.mean(np.linalg.norm(query[i] - nearest_neighbors, axis=1))
        mpjse_list.append(mpjse)
        
        # Compute PCK (Percentage of Correct Keypoints)
        threshold = 30  # Set a threshold for correctness
        correct_keypoints = np.linalg.norm(query[i] - nearest_neighbors, axis=1) < threshold
        pck = np.sum(correct_keypoints) / len(correct_keypoints)
        pck_list.append(pck)
        
    # Calculate the mean of each evaluation metric
#     mean_mse = np.mean(mse_list)
    mean_mpjse = np.mean(mpjse_list)
    mean_pck = np.mean(pck_list)
    
    print(f"Method: {method}")
    print(f"Metric: {metric}")
#     print(f"Mean Squared Error (MSE): {mean_mse}")
    print(f"Mean Per Joint Squared Error (MPJSE): {mean_mpjse}")
    print(f"Percentage of Correct Keypoints (PCK): {mean_pck}")
    print(f"Retrieval Time: {retrieval_time} seconds")

# Example usage:
# knn_search_and_evaluate(query, dataset, 32, method='knn', metric='euclidean')
knn_search_and_evaluate(query, dataset, 32, method='range', metric='euclidean')
# You can call this function with different methods and metrics as needed.


Method: range
Metric: euclidean
Mean Per Joint Squared Error (MPJSE): 47.147491455078125
Percentage of Correct Keypoints (PCK): 0.031628115229951174
Retrieval Time: 127.56776309013367 seconds


In [98]:
knn_search_and_evaluate(query, dataset, 32, method='knn', metric='euclidean')
print("-------------------------------------------------------------------------\n")
knn_search_and_evaluate(query, dataset, 32, method='knn', metric='cosine')
# print("-------------------------------------------------------------------------\n")
# knn_search_and_evaluate(query, dataset, 32, method='knn', metric='jaccard')
print("-------------------------------------------------------------------------\n")
knn_search_and_evaluate(query, dataset, 32, method='knn', metric='hamming')


Method: knn
Metric: euclidean
Mean Per Joint Squared Error (MPJSE): 22.36042594909668
Percentage of Correct Keypoints (PCK): 0.8532785221274868
Retrieval Time: 28.141523122787476 seconds
-------------------------------------------------------------------------

Method: knn
Metric: cosine
Mean Per Joint Squared Error (MPJSE): 22.85877227783203
Percentage of Correct Keypoints (PCK): 0.8461987413723102
Retrieval Time: 16.68146252632141 seconds
-------------------------------------------------------------------------

Method: knn
Metric: hamming
Mean Per Joint Squared Error (MPJSE): 95.14612579345703
Percentage of Correct Keypoints (PCK): 0.0010530856678846936
Retrieval Time: 136.45128917694092 seconds


In [99]:
k = 2
while k != 1024:
    print("K value is: ",k)
    knn_search_and_evaluate(query, dataset, k, method='range', metric='euclidean')
    k = k*2
    print("---------------------------------------------------------------------\n")


K value is:  2
Method: range
Metric: euclidean
Mean Per Joint Squared Error (MPJSE): 47.147491455078125
Percentage of Correct Keypoints (PCK): 0.031628115229951174
Retrieval Time: 115.4296522140503 seconds
---------------------------------------------------------------------

K value is:  4
Method: range
Metric: euclidean
Mean Per Joint Squared Error (MPJSE): 47.147491455078125
Percentage of Correct Keypoints (PCK): 0.031628115229951174
Retrieval Time: 150.9068329334259 seconds
---------------------------------------------------------------------

K value is:  8
Method: range
Metric: euclidean
Mean Per Joint Squared Error (MPJSE): 47.147491455078125
Percentage of Correct Keypoints (PCK): 0.031628115229951174
Retrieval Time: 127.372403383255 seconds
---------------------------------------------------------------------

K value is:  16
Method: range
Metric: euclidean
Mean Per Joint Squared Error (MPJSE): 47.147491455078125
Percentage of Correct Keypoints (PCK): 0.031628115229951174
Retri

# ANNOY Algorithm


In [8]:
!pip install --use-pep517
!pip install annoy

ERROR: You must give at least one requirement to install (see "pip help install")


Collecting annoy
  Using cached annoy-1.17.3.tar.gz (647 kB)
Building wheels for collected packages: annoy
  Building wheel for annoy (setup.py): started
  Building wheel for annoy (setup.py): finished with status 'error'
  Running setup.py clean for annoy
Failed to build annoy
Installing collected packages: annoy
    Running setup.py install for annoy: started
    Running setup.py install for annoy: finished with status 'error'


  ERROR: Command errored out with exit status 1:
   command: 'D:\Anaconda\python.exe' -u -c 'import io, os, sys, setuptools, tokenize; sys.argv[0] = '"'"'C:\\Users\\Bilal\\AppData\\Local\\Temp\\pip-install-2_h7wgxj\\annoy_d33b71afb56c4f5d94941c2267c77b59\\setup.py'"'"'; __file__='"'"'C:\\Users\\Bilal\\AppData\\Local\\Temp\\pip-install-2_h7wgxj\\annoy_d33b71afb56c4f5d94941c2267c77b59\\setup.py'"'"';f = getattr(tokenize, '"'"'open'"'"', open)(__file__) if os.path.exists(__file__) else io.StringIO('"'"'from setuptools import setup; setup()'"'"');code = f.read().replace('"'"'\r\n'"'"', '"'"'\n'"'"');f.close();exec(compile(code, __file__, '"'"'exec'"'"'))' bdist_wheel -d 'C:\Users\Bilal\AppData\Local\Temp\pip-wheel-l0t4rp9x'
       cwd: C:\Users\Bilal\AppData\Local\Temp\pip-install-2_h7wgxj\annoy_d33b71afb56c4f5d94941c2267c77b59\
  Complete output (29 lines):
  D:\Anaconda\lib\site-packages\setuptools\__init__.py:80: _DeprecatedInstaller: setuptools.installer and fetch_build_eggs are deprec

In [10]:
import numpy as np
import time
import annoy
from annoy import AnnoyIndex

def annoy_search_and_evaluate(query, dataset, K, metric='euclidean'):
    # Build AnnoyIndex for both KNN and range search
    num_trees = 10  # You can adjust the number of trees for performance vs. accuracy trade-off
    annoy_index = AnnoyIndex(dataset.shape[1], metric=metric)
    for i, vec in enumerate(dataset):
        annoy_index.add_item(i, vec)
    annoy_index.build(num_trees)
    
    # Perform K-nearest neighbor search for the query matrix
    start_time = time.time()
    indices_knn = [annoy_index.get_nns_by_vector(vec, K, search_k=-1, include_distances=False) for vec in query]
    knn_retrieval_time = time.time() - start_time
    
    # Perform range search for the query matrix
    start_time = time.time()
    indices_range = [annoy_index.get_nns_by_vector(vec, K, search_k=-1, include_distances=False) for vec in query]
    range_retrieval_time = time.time() - start_time
    
    # Initialize variables to store evaluation metrics
    mpjse_list_knn = []
    mpjse_list_range = []
    pck_list_knn = []
    pck_list_range = []
    
    # Calculate evaluation metrics for each query pose for KNN search
    for i in range(len(query)):
        nearest_neighbors_knn = dataset[indices_knn[i]]
        
        # Calculate Mean Per Joint Squared Error (MPJSE)
        mpjse_knn = np.mean(np.linalg.norm(query[i] - nearest_neighbors_knn, axis=1))
        mpjse_list_knn.append(mpjse_knn)
        
        # Compute PCK (Percentage of Correct Keypoints)
        threshold = 30  # Set a threshold for correctness
        correct_keypoints_knn = np.linalg.norm(query[i] - nearest_neighbors_knn, axis=1) < threshold
        pck_knn = np.sum(correct_keypoints_knn) / len(correct_keypoints_knn)
        pck_list_knn.append(pck_knn)
    
    # Calculate evaluation metrics for each query pose for range search
    for i in range(len(query)):
        nearest_neighbors_range = dataset[indices_range[i]]
        
        # Calculate Mean Per Joint Squared Error (MPJSE)
        mpjse_range = np.mean(np.linalg.norm(query[i] - nearest_neighbors_range, axis=1))
        mpjse_list_range.append(mpjse_range)
        
        # Compute PCK (Percentage of Correct Keypoints)
        correct_keypoints_range = np.linalg.norm(query[i] - nearest_neighbors_range, axis=1) < threshold
        pck_range = np.sum(correct_keypoints_range) / len(correct_keypoints_range)
        pck_list_range.append(pck_range)
    
    # Calculate the mean of each evaluation metric for KNN search
    mean_mpjse_knn = np.mean(mpjse_list_knn)
    mean_pck_knn = np.mean(pck_list_knn)
    
    # Calculate the mean of each evaluation metric for range search
    mean_mpjse_range = np.mean(mpjse_list_range)
    mean_pck_range = np.mean(pck_list_range)
    
    print("KNN Search:")
    print(f"Metric: {metric}")
    print(f"Mean Per Joint Squared Error (MPJSE): {mean_mpjse_knn}")
    print(f"Percentage of Correct Keypoints (PCK): {mean_pck_knn}")
    print(f"Retrieval Time: {knn_retrieval_time} seconds\n")
    
    print("Range Search:")
    print(f"Metric: {metric}")
    print(f"Mean Per Joint Squared Error (MPJSE): {mean_mpjse_range}")
    print(f"Percentage of Correct Keypoints (PCK): {mean_pck_range}")
    print(f"Retrieval Time: {range_retrieval_time} seconds")

# Example usage:
annoy_search_and_evaluate(query, dataset, 32, metric='euclidean')
# You can call this function with different metrics as needed.


ModuleNotFoundError: No module named 'annoy.annoylib'