In [1]:
import numpy as np
from sklearn.metrics import pairwise_distances

query = np.array([[1.4, 1.6]])
points = np.array([
    [1.5, 1.7],  # x1
    [2.0, 1.9],  # x2
    [1.6, 1.8],  # x3
    [1.2, 1.5],  # x4
    [1.5, 1.0],  # x5
])
labels = np.array(["x1","x2","x3","x4","x5"])

# pairwise_distances with metric='cosine' returns cosine *distance* = 1 - cosine_similarity
cosine_dist = pairwise_distances(points, query, metric='cosine').ravel()
cosine_sim = 1 - cosine_dist

import pandas as pd
df = pd.DataFrame({
    "Point": labels,
    "CosineSimilarity": cosine_sim
}).sort_values("CosineSimilarity", ascending=False).reset_index(drop=True)
df

Unnamed: 0,Point,CosineSimilarity
0,x1,0.999991
1,x3,0.999969
2,x4,0.999028
3,x2,0.995752
4,x5,0.965363


## Ranking
Higher cosine similarity means more similar.

In [2]:
df["Rank"] = np.arange(1, len(df)+1)
df

Unnamed: 0,Point,CosineSimilarity,Rank
0,x1,0.999991,1
1,x3,0.999969,2
2,x4,0.999028,3
3,x2,0.995752,4
4,x5,0.965363,5


## Cross-check using NumPy formula
$\cos(\theta) = \frac{a\cdot b}{\|a\|\,\|b\|}$

In [3]:
def cosine_similarity(a, b):
    num = (a*b).sum()
    den = np.linalg.norm(a) * np.linalg.norm(b)
    return float(num/den)

q = query.ravel()
numpy_sims = [cosine_similarity(q, p) for p in points]
pd.DataFrame({"Point": labels, "CosineSimilarity_NumPy": numpy_sims}).sort_values("CosineSimilarity_NumPy", ascending=False)

Unnamed: 0,Point,CosineSimilarity_NumPy
0,x1,0.999991
2,x3,0.999969
3,x4,0.999028
1,x2,0.995752
4,x5,0.965363


## Final ordered list (most to least similar)
Ties, if any, can be reported with the same rank.

In [4]:
ordered = df[['Point','CosineSimilarity']].values.tolist()
ordered

[['x1', 0.999991391443956],
 ['x3', 0.9999694838187878],
 ['x4', 0.9990282349375619],
 ['x2', 0.9957522612528876],
 ['x5', 0.9653633930282662]]

NO**TE:** I used the ChatGPTt for the Homework