In [30]:
import pandas as pd
import numpy as np
from typing import Dict, Tuple, List
from itertools import combinations

In [9]:
embeddings = {
    1: np.array([-26.57, -76.61, 81.61, -9.11, 74.8, 54.23, 32.56, -22.62, -72.44, -82.78]),
    2: np.array([-55.98, 82.87, 86.07, 18.71, -18.66, -46.74, -68.18, 60.29, 98.92, -78.95]),
    3: np.array([-27.97, 25.39, -96.85, 3.51, 95.57, -27.48, -80.27, 8.39, 89.96, -36.68]),
    4: np.array([-37.0, -49.39, 43.3, 73.36, 29.98, -56.44, -15.91, -56.46, 24.54, 12.43]),
    5: np.array([-22.71, 4.47, -65.42, 10.11, 98.34, 17.96, -10.77, 2.5, -26.55, 69.16])
}


prices = {
    1: 100.5,
    2: 12.2,
    3: 60.0,
    4: 11.1,
    5: 245.2
}


In [97]:
@staticmethod
def similarity(embeddings: Dict[int, np.ndarray]) -> Dict[Tuple[int, int], float]:
    """Calculate pairwise similarities between each item
    in embedding.

    Args:
        embeddings (Dict[int, np.ndarray]): Items embeddings.

    Returns:
        Tuple[List[str], Dict[Tuple[int, int], float]]:
        List of all items + Pairwise similarities dict
        Keys are in form of (i, j) - combinations pairs of item_ids
        with i < j.
        Round each value to 8 decimal places.
    """
    possible_combinations = list(combinations(embeddings.keys(), 2))
    
    def get_similarity(first_product: np.array, second_product: np.array) -> float:
        """
        Func calculates cosine distance between embeddings

        Parameters
        ----------
        first_product : np.array[float]
            first product embedding
        second_product : np.array[float]
            second product embedding

        Returns
        -------
        float
            Cosine distance
        """
        return round(sum(first_product * second_product) / (np.sqrt(sum(first_product ** 2)) * np.sqrt(sum(second_product ** 2))), 8)
        
    result_dict = {}
    for i in possible_combinations:
        result_dict[i] = get_similarity(embeddings[i[0]], embeddings[i[1]]) 
        
    return result_dict


In [130]:
@staticmethod
def knn(
    sim: Dict[Tuple[int, int], float], top: int
) -> Dict[int, List[Tuple[int, float]]]:
    """Return closest neighbors for each item.

    Args:
        sim (Dict[Tuple[int, int], float]): <similarity> method output.
        top (int): Number of top neighbors to consider.

    Returns:
        Dict[int, List[Tuple[int, float]]]: Dict with top closest neighbors
        for each item.
    """
    sorted_dict = dict(sorted(sim.items(), key=lambda x: x[1], reverse=True))
    
    n = 1
    res_dict = {}
    for i in sorted_dict.items():
        res_dict[n] = list(i)
        n += 1
    
    return res_dict

In [142]:
t = similarity(embeddings=embeddings)
t = dict(sorted(t.items(), key=lambda x: x[1], reverse=True))
t

{(3, 5): 0.43696494,
 (2, 3): 0.31346095,
 (1, 4): 0.18181101,
 (2, 4): 0.14582334,
 (3, 4): 0.10573868,
 (4, 5): 0.03519999,
 (1, 5): -0.03886083,
 (1, 2): -0.15456349,
 (1, 3): -0.27053417,
 (2, 5): -0.45207678}

In [146]:
n = 1
res_dict = {}
for i in t.items():
    res_dict[n] = list(i)
    n += 1
    
dict(list(res_dict.items())[:2])

{1: [(3, 5), 0.43696494], 2: [(2, 3), 0.31346095]}

In [154]:
{i: res_dict[i] for i in list(res_dict.keys())[:2]}

{1: [(3, 5), 0.43696494], 2: [(2, 3), 0.31346095]}