### Деплой модели на Yandex Datasphere



In [61]:
import os
import functools as ft

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD

import numpy as np
import pickle
import pandas as pd
import torch
import json

class RecommenderSVDKNN:
    def __init__(self, emb_dim=500, num_neighb=30, metric='cosine'):
        self.params = {'embedding_dim': emb_dim,
                       'num_neighbours': num_neighb,
                       'metric': metric}
        self.svd = TruncatedSVD(self.params['embedding_dim'])


    def fit(self, x_train_sparse):
        self.X_train_svd = self.svd.fit_transform(x_train_sparse)
    
    def argpartition(self, row, num):
        row_sim, row_y = row
        mask = np.zeros(len(row_sim), bool)
        mask[row_y] = 1
        sim_temp = row_sim.copy()
        sim_temp[mask] = -float('inf')

        return np.argpartition(sim_temp, -num, axis=0)[-num:]
    
    def predict_sample(self, x_test_row, num=3):
        r = np.array(x_test_row)
        sim = cosine_similarity(self.X_train_svd, 
                                self.X_train_svd[r].sum(0).reshape((1, -1)))
        
        return self.argpartition((sim, r), num)

class MainRecommender():
    """Верхнеуровневый класс выбора алгоритма рекомендательной системы для исполнения."""
    
    def __init__(self):
        
        path_to_clients = ft.reduce(os.path.join, ['..', 'data', 'processed', 'rfm_client_hist_context_target_product.csv'])
        self.clients = pd.read_csv(path_to_clients)['client_idx'].unique().tolist()
        
        self.anon_model = AnonymousXGBRecommender()
        self.hist_model = XGBRecommender()
        
    def predict_sample(self, user_id, basket, store_idx, time, slot_num):
        
        if (user_id in self.clients):
            return self.hist_model.predict_sample(user_id, basket, store_idx, time, slot_num)
        else:
            return self.anon_model.predict_sample(user_id, basket, store_idx, time, slot_num)  
        
    def get_rec(self, data_dict):
        """Обработка запроса и вызов модели.

        :input: 
            data_dict - словарь с данными из запроса.

        :return: результат работы модели или исключение.  
        """
        try: 

            user_id = data_dict.get('user_id')
            session_id = data_dict.get('session_id')
            created_at = data_dict.get('created_at')
            store_id = int(data_dict.get('store_id'))
            slot_num = data_dict.get('slot_num')
            slot_num = int(slot_num) if slot_num is not None else 3

            codes = [int(data_dict.get('items')[i].get('code')) for i in range(len(data_dict['items']))]
            codes = [x for x in codes if x is not None]   

            if len(codes) == 0:
                raise ValueError('Empty items')

            pred = self.predict_sample(user_id=user_id, basket=codes, store_idx=store_id, time=created_at, slot_num=slot_num)

            items = [int(i) for i in pred]

            response = {
                "user_id": data_dict.get('user_id'),
                "session_id": data_dict.get('session_id'),
                "items": json.dumps(items)
            }

            return response

        except Exception as exception:

            response = {
                'error': exception.args[0]
            }

            return response

            
            
class AnonymousXGBRecommender():
    
    """Класс-обертка для анонимного градиентного бустинга."""
    
    def __init__(self):
        """Инициализация класса. Загрузка состояний моделей и датасетов."""
        
        self.svd_model = self.load_model('svd_model_anon')
        self.xgb_model = self.load_model('xgb_model_anon')
        
        self.products = pd.read_csv(ft.reduce(os.path.join, ['..', 'data', 'processed', 'products.csv']), index_col=0)
        self.prod_num = self.products.shape[0]
        self.products = self.products.sort_index()
        self.products['target_product'] = range(self.prod_num) 
        
        path_to_dataset = ft.reduce(os.path.join, ['..', 'data', 'processed', 'client_hist_context_target_product.csv'])
        dataset_header = pd.read_csv(path_to_dataset, index_col=0, nrows=0)
        cols = ['h_' + str(x) for x in range(self.prod_num)] + ['is_new_client', 'age', 'gender', 'target']
        dataset_header.drop(cols, inplace=True, axis=1)
        dataset_header = dataset_header.columns.tolist()
        self.dataset_header = dataset_header
    

    def load_model(self, name):
        
        path = ft.reduce(os.path.join, ['..', 'models','pkl' ,name+'.pkl'])
        with open(path, 'rb') as handle:
            return pickle.load(handle)         
    
    def predict_sample(self, user_id, basket, store_idx, time, slot_num):
        """Функция возвращает рекомендованные к покупке товары и их рейтинг.
        Рекомендации осуществляются на основе id клиента и его текущей корзины. """
        
        basket_header = ['b_' + str(x) for x in range(self.prod_num)]
        
        # Приведение списка товаров в корзине к вектору-индикатору наличия товара в корзине
        basket_ = np.full((self.prod_num), False, dtype=bool)
        basket_[basket] = True
        
        # Сбор контекста
        user = pd.Series(basket_, index=basket_header)
        time = pd.to_datetime(time)
        user['store_idx'] = store_idx
        user['product_count'] = np.sum(basket_)
        user['hour'] = time.hour
        user['dayofweek'] = time.dayofweek
        user['day'] = time.day
        user['month'] = time.month
        user['year'] = time.year
        
        # Подготовка датасета вида клиент(контекст)-товары для скоринга товаров
        user_df = pd.concat([user] * self.prod_num, axis=1).transpose()
        user_df = pd.concat([user_df.reset_index(drop=True), self.products], axis=1)
        user_df = user_df[self.dataset_header]
        
        # Расчет скора SVD_KNN
        user_df['knn_score'] = cosine_similarity(self.svd_model.X_train_svd, self.svd_model.X_train_svd[basket].sum(0).reshape(1, -1))

        y_pred = self.xgb_model.predict(user_df.values)
        
        # Ранжируем, отбираем топ
        y_pred[basket] = -np.inf
        recs = np.argpartition(y_pred, -slot_num, axis=0)[-slot_num:]
        recs = list(recs[np.argsort(y_pred[recs])])
        recs.reverse()
        
        return recs

    
class XGBRecommender():
    """Класс-обертка для градиентного бустинга."""
    
    def __init__(self):
        """Инициализация класса. Загрузка состояний моделей и датасетов."""
             
        self.svd_model = self.load_model('svd_model')
        self.xgb_model = self.load_model('xgb_model')
        
        self.dataset = pd.read_csv(ft.reduce(os.path.join, ['..', 'data', 'processed', 'rfm_client_hist_context_target_product.csv']), index_col=0)
        self.dataset = self.dataset[self.dataset.target == 1]
        self.dataset.drop(['is_new_client'], axis=1, inplace=True)
        
        self.products = pd.read_csv(ft.reduce(os.path.join, ['..', 'data', 'processed', 'products.csv']), index_col=0)
        self.prod_num = self.products.shape[0]
        self.products = self.products.sort_index()
        self.products['target_product'] = range(self.prod_num) 
   
        
    def load_model(self, name):
        
        path = ft.reduce(os.path.join, ['..', 'models','pkl' ,name+'.pkl'])
        with open(path, 'rb') as handle:
            return pickle.load(handle)         
    
    def predict_sample(self, user_id, basket, store_idx, time, slot_num):
        """Функция возвращает рекомендованные к покупке товары и их рейтинг.
        Рекомендации осуществляются на основе id клиента и его текущей корзины. """
        
        basket_header = ['b_' + str(x) for x in range(self.prod_num)]
        history_header = ['h_' + str(x) for x in range(self.prod_num)]

        # Формируем историю покупок клиента
        user = self.dataset.loc[user_id].copy()
        user.loc[history_header] = user.loc[basket_header].values | user.loc[history_header].values
        user.loc['h_'+str(user.target_product)] = True

        # Приведение списка товаров в корзине к вектору-индикатору наличия товара в корзине
        basket_ = np.full((self.prod_num), False, dtype=bool)
        basket_[basket] = True
        user.loc[basket_header] = basket_
        user.drop(self.products.columns, inplace=True)
        
        # Сбор контекста
        time = pd.to_datetime(time)
        user['store_idx'] = store_idx
        user['product_count'] = np.sum(basket_)
        user['hour'] = time.hour
        user['dayofweek'] = time.dayofweek
        user['day'] = time.day
        user['month'] = time.month
        user['year'] = time.year
        
        # Подготовка датасета вида клиент(контекст)-товары для скоринга товаров
        user_df = pd.concat([user] * self.prod_num, axis=1).transpose().drop('target', axis=1)
        user_df = pd.concat([user_df.reset_index(drop=True), self.products], axis=1)
        
        user_df = user_df[self.dataset.drop('target', axis=1).columns]
        
        # Расчет скора SVD_KNN
        user_df['knn_score'] = cosine_similarity(self.svd_model.X_train_svd, self.svd_model.X_train_svd[basket].sum(0).reshape(1, -1))

        y_pred = self.xgb_model.predict(user_df.values)
        
        # Ранжируем, отбираем топ        
        y_pred[basket] = -np.inf

        recs = np.argpartition(y_pred, -slot_num, axis=0)[-slot_num:]
        recs = list(recs[np.argsort(y_pred[recs])])
        recs.reverse()
        
        return recs


### Обработка запросов

In [88]:
mainrec = MainRecommender()

In [100]:
input_data = {'user_id': 290000000,
       'created_at' : "2021-04-23T18:25:43.511Z",
        'store_id': 742,
        'slot_num': 4,
        'session_id': 'qwe-123-asd',
        'items': [{
            'name': 'dish_1',
            'code': 130
        },
        {
            'name': 'dish_2',
            'code': 77
        },
        {
            'name': 'dish_3',
            'code': 39
        }]
       }

In [101]:
#deploy
try:
    output_data = mainrec.get_rec(input_data)
    
except Exception as e:
    output_data = {'error': e.args[0]}

In [102]:
# Анонимный клиент
output_data

{'user_id': 290000000, 'session_id': 'qwe-123-asd', 'items': '[4, 6, 87, 62]'}

In [99]:
# Клиент с историей
output_data

{'user_id': 29, 'session_id': 'qwe-123-asd', 'items': '[4, 5, 87, 6]'}