## Уникальность товара
- Построение эмбеддингов см. [Middle/SKU_Embeddings]()
- Вычисление цены на похожие товары [JUNIOR/Similar_Items_Price]()

Необходимо отсеить однотипные товары в рекомендациях. Напишем функцию, которая оценивает уникальность каждого товара (uniqueness),  насколько эмбеддинг товара удален (изолирован) от других эмбеддингов

Мера близости - евклидово расстояние

### Step-1: KNN

In [1]:
import numpy as np
from sklearn.neighbors import NearestNeighbors

In [2]:
def knn_uniqueness(embeddings: np.ndarray, num_neighbors: int) -> np.ndarray:
    """Estimate uniqueness of each item in item embeddings group. Based on knn.

    Parameters
    ----------
    embeddings: np.ndarray :
        embeddings group 
    num_neighbors: int :
        number of neighbors to estimate uniqueness    

    Returns
    -------
    np.ndarray
        uniqueness estimates

    """
    
    knn = NearestNeighbors(n_neighbors = num_neighbors)
    knn.fit(embeddings)
    distances, _ = knn.kneighbors(embeddings)
    uniqueness = np.mean(distances, axis=1)
    
    return uniqueness

In [3]:
#сгенирируем случайный эмбеддинг
embeddings = np.random.normal(size=(6, 2))

In [4]:
knn_uniqueness(embeddings, 4)

array([0.79792952, 0.75986192, 0.92500148, 1.27355236, 0.75559718,
       0.66968766])

In [5]:
print(embeddings)

[[-0.75318295 -1.49677626]
 [-0.14712388 -0.31160288]
 [-1.73635873 -0.74181199]
 [ 1.30373384 -1.3900693 ]
 [ 0.1008485  -1.14930397]
 [-0.96289078 -0.48824138]]


### Step-2: Kernel Density Uniqueness

In [12]:
from sklearn.neighbors import KernelDensity
import numpy as np

def kde_uniqueness(embeddings: np.ndarray) -> np.ndarray:
    """Estimate uniqueness of each item in item embeddings group. Based on KDE.

    Parameters
    ----------
    embeddings: np.ndarray :
        embeddings group 

    Returns
    -------
    np.ndarray
        uniqueness estimates

    """
    kde = KernelDensity(kernel='gaussian').fit(embeddings)
    likelihood = np.exp(kde.score_samples(embeddings))
    return 1/likelihood

In [13]:
kde_uniqueness(embeddings)

array([1.50792525, 1.50760707, 1.50758296, 1.50796446, 1.50781947,
       1.50733142])

### Step-3: Diversity

In [19]:
from typing import Tuple

def group_diversity(embeddings: np.ndarray, threshold: float) -> Tuple[bool, float]:
    """Calculate group diversity based on kde uniqueness.

    Parameters
    ----------
    embeddings: np.ndarray :
        embeddings group
    threshold: float :
       group deversity threshold for reject group

    Returns
    -------
    Tuple[bool, float]
        reject
        group diverstity

    """
    diversity = np.mean(kde_uniqueness(embeddings))
    if diversity < threshold:
        #reject
        return (True, diversity)
    else:
        return (False, diversity)


In [21]:
group_diversity(embeddings, 1.6)

(True, 1.507705106376016)

### Step-4: Uniqueness-as-a-Service
Реализуем FastAPI service

In [25]:
#!pip install fastapi_utils

In [24]:
import os
import uvicorn
from fastapi import FastAPI
from fastapi_utils.tasks import repeat_every


DIVERSITY_THRESHOLD = 10

app = FastAPI()
embeddings = {}

In [27]:
#подгружает все эмбеддинги каждые 10 секунд

@app.on_event("startup")
@repeat_every(seconds=10)
def load_embeddings() -> dict:
    """Load new embeddings every 10 seconds"""
    embeddings_raw = np.load("embeddings.npy", allow_pickle=True).item()
    for item_id, embedding in embeddings_raw.items():
        embeddings[item_id] = embedding

    return {}

In [28]:
#GET-запрос, который просит оценить выборку товаров

@app.get("/uniqueness/")
def uniqueness(item_ids: str) -> dict:
    """Calculate uniqueness of each product"""

    # Parse item IDs
    item_ids = [int(item) for item in item_ids.split(",")]

    # Default answer
    item_uniqueness = {item_id: 0.0 for item_id in item_ids}
    
    # Calculate uniqueness
    item_embeddings = []
    for item_id in item_ids:
        item_embeddings.append(embeddings[item_id])
    uniqueness = kde_uniqueness(np.array(item_embeddings)).tolist()

    # Fill answer
    for idx, item_id in enumerate:
        item_uniqueness[item_id] = uniqueness[idx]
        
    return item_uniqueness

In [29]:
#GET-запрос, который возвращает метрику разнообразия и вердикт «отвергнуть»/«принять» выдачу

@app.get("/diversity/")
def diversity(item_ids: str) -> dict:
    """Calculate diversity of group of products"""

    # Parse item IDs
    item_ids = [int(item) for item in item_ids.split(",")]

    # Calculate diversity
    response = {"diversity": 0.0, "reject": True}
    
    item_embeddings = []
    for item_id in item_ids:
        item_embeddings.append(emdeddings[item_id])
    
    diversity = group_diversity(item_embeddings,DIVERSITY_THRESHOLD)    
    response["diversity"] = float(diversity[1])
    response["reject"] = bool(diversity[0])
    return response