In [407]:
from typing import Dict, Tuple, List
from itertools import combinations
from collections import Counter, defaultdict
import copy

In [47]:
import pandas as pd
import numpy as np
from scipy.stats import linregress
from sklearn.metrics.pairwise import cosine_similarity

In [4]:
embeddings = {
    1: np.array([-26.57, -76.61, 81.61, -9.11, 74.8, 54.23, 32.56, -22.62, -72.44, -82.78]),
    2: np.array([-55.98, 82.87, 86.07, 18.71, -18.66, -46.74, -68.18, 60.29, 98.92, -78.95]),
    3: np.array([-27.97, 25.39, -96.85, 3.51, 95.57, -27.48, -80.27, 8.39, 89.96, -36.68]),
    4: np.array([-37.0, -49.39, 43.3, 73.36, 29.98, -56.44, -15.91, -56.46, 24.54, 12.43]),
    5: np.array([-22.71, 4.47, -65.42, 10.11, 98.34, 17.96, -10.77, 2.5, -26.55, 69.16])
}

prices = {
    1: 100.5,
    2: 12.2,
    3: 60.0,
    4: 11.1,
    5: 245.2
}

In [5]:
embeddings

{1: array([-26.57, -76.61,  81.61,  -9.11,  74.8 ,  54.23,  32.56, -22.62,
        -72.44, -82.78]),
 2: array([-55.98,  82.87,  86.07,  18.71, -18.66, -46.74, -68.18,  60.29,
         98.92, -78.95]),
 3: array([-27.97,  25.39, -96.85,   3.51,  95.57, -27.48, -80.27,   8.39,
         89.96, -36.68]),
 4: array([-37.  , -49.39,  43.3 ,  73.36,  29.98, -56.44, -15.91, -56.46,
         24.54,  12.43]),
 5: array([-22.71,   4.47, -65.42,  10.11,  98.34,  17.96, -10.77,   2.5 ,
        -26.55,  69.16])}

In [79]:
@staticmethod
def similarity(embeddings: Dict[int, np.ndarray]) -> Dict[Tuple[int, int], float]:
    """Calculate pairwise similarities between each item
    in embedding.

    Args:
        embeddings (Dict[int, np.ndarray]): Items embeddings.

    Returns:
        Dict[Tuple[int, int], float]:
        Keys are in form of (i, j) - combinations pairs of item_ids
        with i < j.
        Round each value to 8 decimal places.
    """
    keys = embeddings.keys()
    combination = combinations(keys, 2)
    pair_sims = {}
    for left, right in combination:
        left_elements = np.array(embeddings.get(left)).reshape(1, -1)
        right_elements = np.array(embeddings.get(right)).reshape(1, -1)
        cos_sim = cosine_similarity(left_elements, right_elements)
        pair_sims[(left, right)] = round(cos_sim[0, 0], 8)

    return pair_sims

In [82]:
sim = similarity(embeddings)
sim

{(1, 2): -0.15456349,
 (1, 3): -0.27053417,
 (1, 4): 0.18181101,
 (1, 5): -0.03886083,
 (2, 3): 0.31346095,
 (2, 4): 0.14582334,
 (2, 5): -0.45207678,
 (3, 4): 0.10573868,
 (3, 5): 0.43696494,
 (4, 5): 0.03519999}

In [265]:
def knn(
        sim: Dict[Tuple[int, int], float], top: int
) -> Dict[int, List[Tuple[int, float]]]:
    """Return closest neighbors for each item.

    Args:
        sim (Dict[Tuple[int, int], float]): <similarity> method output.
        top (int): Number of top neighbors to consider.

    Returns:
        Dict[int, List[Tuple[int, float]]]: Dict with top closest neighbors
        for each item.
    """
    sorted_dict = dict(sorted(sim.items(), key=lambda item: item[1], reverse=True))
    result_dict = {}
    for key, value in sorted_dict.items():
        if result_dict.get(key[0]) is None:
            result_dict[key[0]] = []
        if result_dict.get(key[1]) is None:
            result_dict[key[1]] = []

        if len(result_dict.get(key[0])) < top:
            result_dict[key[0]].append((key[1], value))
        if len(result_dict.get(key[1])) < top:
            result_dict[key[1]].append((key[0], value))

    return result_dict


In [473]:
knn_values = knn(sim, 3)

In [474]:
knn_values

{3: [(5, 0.43696494), (2, 0.31346095), (4, 0.10573868)],
 5: [(3, 0.43696494), (4, 0.03519999), (1, -0.03886083)],
 2: [(3, 0.31346095), (4, 0.14582334), (1, -0.15456349)],
 1: [(4, 0.18181101), (5, -0.03886083), (2, -0.15456349)],
 4: [(1, 0.18181101), (2, 0.14582334), (3, 0.10573868)]}

In [586]:
prices = {
    1: 100.5,
    2: 12.2,
    3: 60.0,
    4: 11.1,
    5: 245.2
}


def knn_price(
        knn_dict: Dict[int, List[Tuple[int, float]]],
        prices: Dict[int, float],
) -> Dict[int, float]:
    """Calculate weighted average prices for each item.
    Weights should be positive numbers in [0, 2] interval.

    Args:
        knn_dict (Dict[int, List[Tuple[int, float]]]): <knn> method output.
        prices (Dict[int, float]): Price dict for each item.

    Returns:
        Dict[int, float]: New prices dict, rounded to 2 decimal places.
    """
    knn_price_dict = copy.deepcopy(knn_dict)

    # for key, value_list in knn_price_dict.items():
    #     for i, (inner_key, inner_value) in enumerate(value_list):
    #         new_value = prices.get(inner_key, 0.0)
    #         knn_price_dict[key][i] = (inner_key, inner_value, new_value)

    for key, value_list in knn_price_dict.items():
        norm_weight = sum(map(lambda t: t[-1] + 1, value_list))
        print(norm_weight)
        for i, (inner_key, inner_value) in enumerate(value_list):
            price = prices.get(inner_key, 0.0)
            knn_price_dict[key][i] = ((inner_value+ 1) / norm_weight ) * price

        knn_price_dict[key] = round(sum(knn_price_dict.get(key)),2)

    return knn_price_dict


In [587]:
knn_prises = knn_price(knn_values, prices)
knn_prises

3.85616457
3.4333041
3.3047208
2.98838669
3.4333730300000003


{3: 98.71, 5: 56.59, 2: 53.41, 1: 86.7, 4: 57.99}

In [483]:
data = {
    3: [(5, 0.43696494), (2, 0.31346095), (4, 0.10573868)],
    5: [(3, 0.43696494), (4, 0.03519999), (1, -0.03886083)],
    2: [(3, 0.31346095), (4, 0.14582334), (1, -0.15456349)],
    1: [(4, 0.18181101), (5, -0.03886083), (2, -0.15456349)],
    4: [(1, 0.18181101), (2, 0.14582334), (3, 0.10573868)]
}

prices = {
    1: 100.5,
    2: 12.2,
    3: 60.0,
    4: 11.1,
    5: 245.2
}

for key, value_list in data.items():
    print(value_list)
    for i, (inner_key, inner_value) in enumerate(value_list):
        new_value = prices.get(inner_key, 0.0)
        data[key][i] = (inner_key, inner_value, new_value)

print(data)


[(5, 0.43696494), (2, 0.31346095), (4, 0.10573868)]
[(3, 0.43696494), (4, 0.03519999), (1, -0.03886083)]
[(3, 0.31346095), (4, 0.14582334), (1, -0.15456349)]
[(4, 0.18181101), (5, -0.03886083), (2, -0.15456349)]
[(1, 0.18181101), (2, 0.14582334), (3, 0.10573868)]
{3: [(5, 0.43696494, 245.2), (2, 0.31346095, 12.2), (4, 0.10573868, 11.1)], 5: [(3, 0.43696494, 60.0), (4, 0.03519999, 11.1), (1, -0.03886083, 100.5)], 2: [(3, 0.31346095, 60.0), (4, 0.14582334, 11.1), (1, -0.15456349, 100.5)], 1: [(4, 0.18181101, 11.1), (5, -0.03886083, 245.2), (2, -0.15456349, 12.2)], 4: [(1, 0.18181101, 100.5), (2, 0.14582334, 12.2), (3, 0.10573868, 60.0)]}


In [558]:
def transform(
    embeddings: Dict[int, np.ndarray],
    prices: Dict[int, float],
    top: int,
) -> Dict[int, float]:
    """Transforming input embeddings into a dictionary
    with weighted average prices for each item.

    Args:
        embeddings (Dict[int, np.ndarray]): Items embeddings.
        prices (Dict[int, float]): Price dict for each item.
        top (int): Number of top neighbors to consider.

    Returns:
        Dict[int, float]: Dict with weighted average prices for each item.
    """
    knn_price_dict = knn_price(knn(similarity(embeddings), top), prices)
    return knn_price_dict

In [561]:
transform(embeddings,prices, 3)

{3: 364.511772813341,
 5: 181.43734755802598,
 2: 173.09089689318515,
 1: 268.6091215395021,
 4: 184.13904236458595}