# Практическое задание № 7
## Выполнил - Назарова Алёна Алексеевна

**Инструкция:**

- Шаг 1.  Изучите материалы лекционных и практических занятий по теме 11.3. 
- Шаг 2. Постройте рекомендательную систему на примере данных о покупках. Исходные файлы: recommend_1.csv, trx_data.csv 
   * **recommend_1.csv** список из 1000 идентификаторов клиентов, рекомендуемых в качестве выходных данных.
   * **trx_data.csv** пользовательские транзакции

- Шаг 3. Реализуйте коллаборативную фильтрацию данных на основе пользователей. Используйте модель kNN. Проверить модель на покупателях с customer_id = 4 и customer_id = 21.
- Шаг 4. Опубликуйте файл расширения ipynb на платформе Odin.

У сети продуктовых магазинов разрабатывается новое мобильное приложение, позволяющее покупателям размещать заказы еще до того, как они зайдут в магазин.

В приложении должна быть возможность показывать рекомендации: когда покупатель впервые нажимает на страницу «заказ», мы можем порекомендовать добавить в его корзину 10 лучших товаров, например, одноразовую посуду, свежее мясо, чипсы и т. д.

**Цель работы: получить список рекомендаций для указанного пользователя**, например:

Входные данные: идентификатор клиента

Результат: ранжированный список товаров (идентификаторов продуктов), которые пользователь, скорее всего, захочет положить в свою (пустую) «корзину».

In [1]:
import pandas as pd
import numpy as np

import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

from sklearn.neighbors import NearestNeighbors  
from scipy.sparse import csr_matrix 

import warnings
warnings.filterwarnings('ignore')

import math, random
from collections import defaultdict, Counter

In [2]:
customers = pd.read_csv('recommend_1.csv')
transactions = pd.read_csv('trx_data.csv')

In [3]:
print(customers.shape)
customers.head()

(1000, 1)


Unnamed: 0,customerId
0,1553
1,20400
2,19750
3,6334
4,27773


In [4]:
print(transactions.shape)
transactions.head()

(62483, 2)


Unnamed: 0,customerId,products
0,0,20
1,1,2|2|23|68|68|111|29|86|107|152
2,2,111|107|29|11|11|11|33|23
3,3,164|227
4,5,2|2


# Преобразование

In [5]:
transactions['products'] = transactions['products'].apply(lambda x: [int(i) for i in x.split('|')])

data = pd.melt(transactions.set_index('customerId')['products'].apply(pd.Series).reset_index(), 
             id_vars=['customerId'],
             value_name='products') \
    .dropna().drop(['variable'], axis=1) \
    .groupby(['customerId', 'products']) \
    .agg({'products': 'count'}) \
    .rename(columns={'products': 'purchase_count'}) \
    .reset_index() \
    .rename(columns={'products': 'productId'})
data['productId'] = data['productId'].astype(np.int64)

In [6]:
print(data.shape)
data.head()

(133585, 3)


Unnamed: 0,customerId,productId,purchase_count
0,0,1,2
1,0,13,1
2,0,19,3
3,0,20,1
4,0,31,2


In [7]:
df_matrix = pd.pivot_table(data, values='purchase_count', index='customerId', columns='productId')
df_matrix.head()

productId,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
customerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,,2.0,,,,,,,,,...,,,,,,,,,,
1,,,6.0,,,,,,,,...,,,,1.0,,,1.0,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,


In [8]:
df_matrix.fillna(0, inplace=True)
df_matrix.head()

productId,0,1,2,3,4,5,6,7,8,9,...,290,291,292,293,294,295,296,297,298,299
customerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,6.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [9]:
df_matrix.shape

(24429, 300)

# Вариант 1.  kNN sklearn.neighbors

In [10]:
from sklearn.neighbors import NearestNeighbors  

knn = NearestNeighbors(n_neighbors=10, algorithm= 'brute', metric= 'cosine')
model_knn = knn.fit(df_matrix)

In [11]:
def most_similar_users_to(customer_id):
    most_similar_users_to = []
    distance, indice = model_knn.kneighbors(df_matrix.iloc[customer_id,:].values.reshape(1,-1), n_neighbors=10)
    print('Рекомендации для ## {0} ##:'.format(df_matrix.index[customer_id]))
    for i in range(1, len(distance.flatten())):
        customer_id1 = df_matrix.index[indice.flatten()[i]]
        most_similar_users_to.append((customer_id1, distance.flatten()[i]))

    most_similar_users_to.sort(key=lambda x: x[1], reverse=True)

    return most_similar_users_to[:10]   

In [12]:
print("Подобие (схожесть) на основе пользователя")
print(most_similar_users_to(1))

Подобие (схожесть) на основе пользователя
Рекомендации для ## 1 ##:
[(7941, 0.35300336077936956), (26200, 0.35300336077936956), (7956, 0.35300336077936956), (7857, 0.35300336077936956), (6582, 0.3478886832486221), (9055, 0.27663576674438195), (6503, 0.27663576674438195), (7923, 0.27663576674438195), (1566, 0.27663576674438195)]


In [13]:
def user_based_suggestions(customer_id):
    # суммировать все коэффициенты подобия
    suggestions = defaultdict(float)
    non_interacted_movies = df_matrix.iloc[customer_id][df_matrix.iloc[customer_id]==0].index.tolist()
    for other_user_id, similarity in most_similar_users_to(customer_id):
        movies_user_id = df_matrix.loc[other_user_id][df_matrix.loc[other_user_id]>0]
        for interest in movies_user_id.index.tolist():
            if interest in non_interacted_movies:
                suggestions[interest] += similarity

    # преобразовать их в сортированный список
    suggestions = sorted(suggestions.items(),
                         key=lambda x: x[1],
                         reverse=True)
    return suggestions[:10]

In [14]:
print("Рекомендации для пользователя")
print(user_based_suggestions(4))

Рекомендации для пользователя
Рекомендации для ## 4 ##:
[(1, 0.7139999243979872), (5, 0.7139999243979872), (7, 0.35949714876589), (25, 0.35949714876589), (31, 0.35949714876589), (33, 0.35949714876589), (52, 0.35949714876589), (57, 0.35949714876589), (61, 0.35949714876589), (87, 0.35949714876589)]


In [15]:
print("Рекомендации для пользователя")
print(user_based_suggestions(21))

Рекомендации для пользователя
Рекомендации для ## 21 ##:
[(1, 0.364224468608779), (38, 0.364224468608779), (142, 0.364224468608779), (179, 0.364224468608779), (273, 0.364224468608779)]
