<a href="https://colab.research.google.com/github/HassanSherwani/Product_Purchase_Frequency/blob/master/Product_Freq.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
"""
Problem Statement:

How often a certain product has been sold in the past days.

"""

In [0]:
#1)- Importing key modules

In [0]:
# Let's be rebels and ignore warnings for now
import warnings
warnings.filterwarnings('ignore')
warnings.filterwarnings("ignore",category=DeprecationWarning)

In [0]:
import pandas as pd
import numpy as np
import time
from sklearn.model_selection import train_test_split
import sys

In [4]:
! pip install turicreate



In [0]:
import turicreate as tc

In [0]:
# 2)-Loading Data

In [0]:
url = 'https://raw.githubusercontent.com/HassanSherwani/Product_Purchase_Frequency/master/20190207_transactions%20.json'

In [0]:
transactions = pd.read_json(url, lines= True)

In [0]:
# 3)-Exploring dataset

In [10]:
transactions.head()

Unnamed: 0,id,products
0,0,"[185, 30, 77, 188, 78, 125, 45, 155, 241, 229,..."
1,1,"[119, 148, 108, 34, 157, 82, 113, 45, 165]"
2,2,"[173, 103, 229, 240]"
3,3,[91]
4,4,"[175, 192, 54, 172]"


In [11]:
transactions.shape

(2500, 2)

In [12]:
transactions.info() # checking missing values

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2500 entries, 0 to 2499
Data columns (total 2 columns):
id          2500 non-null int64
products    2500 non-null object
dtypes: int64(1), object(1)
memory usage: 58.6+ KB


In [13]:
transactions.describe()

Unnamed: 0,id
count,2500.0
mean,1249.5
std,721.83216
min,0.0
25%,624.75
50%,1249.5
75%,1874.25
max,2499.0


In [0]:
### Adding Features: Create a separate dataframe for recommending users
customers=transactions['id']

In [0]:
cust_2_rec=customers[:1000]

In [16]:
### break down each list of items in the products column into rows and count the number of products bought by a user

pd.melt(transactions.head(2).set_index('id')['products'].apply(pd.Series).reset_index(), 
             id_vars=['id'],
             value_name='products') \
    .dropna().drop(['variable'], axis=1) \
    .groupby(['id', 'products']) \
    .agg({'products': 'count'}) \
    .rename(columns={'products': 'purchase_count'}) \
    .reset_index() \
    .rename(columns={'products': 'productId'})

Unnamed: 0,id,productId,purchase_count
0,0,30.0,1
1,0,45.0,1
2,0,77.0,1
3,0,78.0,1
4,0,89.0,2
5,0,125.0,1
6,0,133.0,1
7,0,155.0,1
8,0,161.0,1
9,0,185.0,1


In [0]:
# 4)- Making data for user, item, and target field

In [18]:
### 4a)-One for purchase count
s=time.time()

data = pd.melt(transactions.set_index('id')['products'].apply(pd.Series).reset_index(), 
             id_vars=['id'],
             value_name='products') \
    .dropna().drop(['variable'], axis=1) \
    .groupby(['id', 'products']) \
    .agg({'products': 'count'}) \
    .rename(columns={'products': 'purchase_count'}) \
    .reset_index() \
    .rename(columns={'products': 'productId'})
data['productId'] = data['productId'].astype(np.int64)

print("Execution time:", round((time.time()-s)/60,2), "minutes")

Execution time: 0.01 minutes


In [19]:
data.shape

(24811, 3)

In [20]:
data.head()

Unnamed: 0,id,productId,purchase_count
0,0,30,1
1,0,45,1
2,0,77,1
3,0,78,1
4,0,89,2


In [0]:
#Let find top frequent items
from heapq import nlargest

class Product:
    def __init__ (self, name, sales):
        self.name= name
        self.sales= sales
    def getname(self):
        return self.name
    
    def getsales(self):
        return self.sales

def gettopselling(productlist):
    return nlargest(10, productlist, key=lambda product : product.getsales())

In [0]:
# Let just focus on our list. We are not interested in customers so, only productID and purchase_count
    
productlist= data[['productId','purchase_count']]
# converting to list values
productlist=productlist.values.tolist()

In [79]:
productlist[:5]

[[30, 1], [45, 1], [77, 1], [78, 1], [89, 2]]

In [0]:
top= gettopselling(productlist)
print('This is the most frequent sold product:')
for i in top:
    print(i.getname(), ':', i.getsales())

In [0]:
### 4-b)-Dummy as target 
def create_data_dummy(data):
    data_dummy = data.copy()
    data_dummy['purchase_dummy'] = 1
    return data_dummy

In [0]:
data_dummy = create_data_dummy(data)

In [23]:
data_dummy.head()

Unnamed: 0,id,productId,purchase_count,purchase_dummy
0,0,30,1,1
1,0,45,1,1
2,0,77,1,1
3,0,78,1,1
4,0,89,2,1


In [0]:
### 4-c)-Normalize item
"""
we normalize purchase frequency of each item across users by first creating a user-item matrix 

"""
df_matrix = pd.pivot_table(data, values='purchase_count', index='id', columns='productId')

In [25]:
df_matrix.head()

productId,1,2,3,4,5,6,7,8,9,10,...,241,242,243,244,245,246,247,248,249,250
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,,,,,,,,,,,...,1.0,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,


In [26]:
df_matrix.shape

(2378, 250)

In [27]:
df_matrix_norm = (df_matrix-df_matrix.min())/(df_matrix.max()-df_matrix.min())

df_matrix_norm.head()

productId,1,2,3,4,5,6,7,8,9,10,...,241,242,243,244,245,246,247,248,249,250
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,,,,,,,,,,,...,0.0,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,


In [28]:
print(df_matrix_norm.shape)

(2378, 250)


In [0]:
### 4-d)- create a table for input to the modeling

d = df_matrix_norm.reset_index()
d.index.names = ['scaled_purchase_freq']
data_norm = pd.melt(d, id_vars=['id'], value_name='scaled_purchase_freq').dropna()

In [30]:
data_norm.head()

Unnamed: 0,id,productId,scaled_purchase_freq
11,12,1,0.0
38,41,1,0.0
40,43,1,0.0
93,96,1,0.0
100,103,1,0.0


In [31]:
data_norm.shape

(22530, 3)

In [0]:
###4-e) a function for normalizing data
def normalize_data(data):
    df_matrix = pd.pivot_table(data, values='purchase_count', index='id', columns='productId')
    df_matrix_norm = (df_matrix-df_matrix.min())/(df_matrix.max()-df_matrix.min())
    d = df_matrix_norm.reset_index()
    d.index.names = ['scaled_purchase_freq']
    return pd.melt(d, id_vars=['id'], value_name='scaled_purchase_freq').dropna()

In [0]:
# 5)-Split train and test set

In [34]:
train, test = train_test_split(data, test_size = .2)
print(train.shape, test.shape)

(19848, 3) (4963, 3)


In [0]:
"""convert dataframe to SFrame """
train_data = tc.SFrame(train)
test_data = tc.SFrame(test)

In [36]:
train_data

id,productId,purchase_count
819,234,1
373,58,1
990,22,1
1096,185,1
2448,200,1
1963,213,1
2452,241,1
2403,63,1
2011,207,1
501,151,1


In [37]:
test_data

id,productId,purchase_count
2378,216,1
567,230,1
2439,190,1
1702,234,1
413,160,1
1347,116,1
165,121,1
2486,31,1
1237,137,1
23,9,1


In [0]:
### 5.2)- Define a split_data function for splitting data to training and test set
def split_data(data):
    '''
    Splits dataset into training and test set.
    
    Args:
        data (pandas.DataFrame)
        
    Returns
        train_data (tc.SFrame)
        test_data (tc.SFrame)
    '''
    train, test = train_test_split(data, test_size = .2)
    train_data = tc.SFrame(train)
    test_data = tc.SFrame(test)
    return train_data, test_data

In [0]:
### 5.3)-Apply for both dummy table and scaled/normalized purchase table

In [0]:
train_data_dummy, test_data_dummy = split_data(data_dummy)
train_data_norm, test_data_norm = split_data(data_norm)

In [0]:
#6)-Model

In [0]:
### 6.1)- Parameters to define field names for purchase count as target feature
user_id = 'id'
item_id = 'productId'
users_to_recommend=list(cust_2_rec)
target = 'purchase_count'
n_rec = 10 # number of items to recommend
n_display = 30 # 1st 30 rows to display

In [43]:
popularity_model = tc.popularity_recommender.create(train_data, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target)

In [44]:
popularity_recomm = popularity_model.recommend(users=users_to_recommend, k=n_rec)
popularity_recomm.print_rows(n_display)

+----+-----------+--------------------+------+
| id | productId |       score        | rank |
+----+-----------+--------------------+------+
| 0  |    207    | 1.0795454545454546 |  1   |
| 0  |    153    | 1.0769230769230769 |  2   |
| 0  |     96    | 1.072289156626506  |  3   |
| 0  |    112    | 1.0705882352941176 |  4   |
| 0  |     35    | 1.0704225352112675 |  5   |
| 0  |    129    | 1.0675675675675675 |  6   |
| 0  |    201    | 1.0666666666666667 |  7   |
| 0  |    150    | 1.0666666666666667 |  8   |
| 0  |     63    | 1.064516129032258  |  9   |
| 0  |    120    | 1.0632911392405062 |  10  |
| 1  |    207    | 1.0795454545454546 |  1   |
| 1  |    153    | 1.0769230769230769 |  2   |
| 1  |     96    | 1.072289156626506  |  3   |
| 1  |    112    | 1.0705882352941176 |  4   |
| 1  |     35    | 1.0704225352112675 |  5   |
| 1  |    129    | 1.0675675675675675 |  6   |
| 1  |    201    | 1.0666666666666667 |  7   |
| 1  |    150    | 1.0666666666666667 |  8   |
| 1  |     63

In [45]:
"""
Through this model, we predicted the recommendation items using scores by popularity. As you can tell for each model results above, the rows show the first 30 records from 1000 users
with 10 recommendations. These 30 records include 3 users and their recommended items, along with score and descending ranks.

"""

'\nThrough this model, we predicted the recommendation items using scores by popularity. As you can tell for each model results above, the rows show the first 30 records from 1000 users\nwith 10 recommendations. These 30 records include 3 users and their recommended items, along with score and descending ranks.\n\n'

In [46]:
# Checking most frequent items  
train.groupby(by=item_id)['purchase_count'].mean().sort_values(ascending=False).head(20)

productId
207    1.079545
153    1.076923
96     1.072289
112    1.070588
35     1.070423
129    1.067568
150    1.066667
201    1.066667
63     1.064516
120    1.063291
131    1.062500
125    1.057471
53     1.055556
122    1.054348
170    1.053763
156    1.053333
102    1.053191
85     1.051546
247    1.051282
117    1.050633
Name: purchase_count, dtype: float64

In [47]:
"""
products 207,96, 201, 153, and 63 are the most popular (best-selling) across customers.


"""

'\nproducts 207,96, 201, 153, and 63 are the most popular (best-selling) across customers.\n\n\n'

In [48]:
"""



# Define a model
def model(train_data, name, user_id, item_id, target, users_to_recommend, n_rec, n_display):
    if name == 'popularity':
        model = tc.popularity_recommender.create(train_data, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target)
    recom = model.recommend(users=users_to_recommend, k=n_rec)
    recom.print_rows(n_display)
    return model
    
 """

"\n\n\n\n# Define a model\ndef model(train_data, name, user_id, item_id, target, users_to_recommend, n_rec, n_display):\n    if name == 'popularity':\n        model = tc.popularity_recommender.create(train_data, \n                                                    user_id=user_id, \n                                                    item_id=item_id, \n                                                    target=target)\n    recom = model.recommend(users=users_to_recommend, k=n_rec)\n    recom.print_rows(n_display)\n    return model\n    \n "

In [0]:
### 6.2)- purchase dummy as target feature
user_id = 'id'
item_id = 'productId'
users_to_recommend=list(cust_2_rec)
target = 'purchase_dummy'
n_rec = 10 # number of items to recommend
n_display = 30 # 1st 30 rows to display

In [50]:
popularity_model_dummy = tc.popularity_recommender.create(train_data_dummy, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target)

In [51]:
popularity_recomm_4_dummy = popularity_model_dummy.recommend(users=users_to_recommend, k=n_rec)
popularity_recomm_4_dummy.print_rows(n_display)

+----+-----------+-------+------+
| id | productId | score | rank |
+----+-----------+-------+------+
| 0  |     2     |  1.0  |  1   |
| 0  |     25    |  1.0  |  2   |
| 0  |     80    |  1.0  |  3   |
| 0  |     26    |  1.0  |  4   |
| 0  |    236    |  1.0  |  5   |
| 0  |    174    |  1.0  |  6   |
| 0  |    129    |  1.0  |  7   |
| 0  |    150    |  1.0  |  8   |
| 0  |     46    |  1.0  |  9   |
| 0  |     31    |  1.0  |  10  |
| 1  |     2     |  1.0  |  1   |
| 1  |     25    |  1.0  |  2   |
| 1  |     80    |  1.0  |  3   |
| 1  |     26    |  1.0  |  4   |
| 1  |    236    |  1.0  |  5   |
| 1  |    174    |  1.0  |  6   |
| 1  |    129    |  1.0  |  7   |
| 1  |    150    |  1.0  |  8   |
| 1  |     46    |  1.0  |  9   |
| 1  |     31    |  1.0  |  10  |
| 2  |     2     |  1.0  |  1   |
| 2  |     25    |  1.0  |  2   |
| 2  |     80    |  1.0  |  3   |
| 2  |     26    |  1.0  |  4   |
| 2  |    236    |  1.0  |  5   |
| 2  |    174    |  1.0  |  6   |
| 2  |    129 

In [0]:
### 6.3)- Applying 'scaled_purchase_freq' as target feature on model
user_id = 'id'
item_id = 'productId'
users_to_recommend=list(cust_2_rec)
target = 'scaled_purchase_freq'
n_rec = 10 # number of items to recommend
n_display = 30 # 1st 30 rows to display

In [53]:
popularity_model_scaled = tc.popularity_recommender.create(train_data_norm, 
                                                    user_id=user_id, 
                                                    item_id=item_id, 
                                                    target=target)

In [54]:
popularity_recomm_4_scaled = popularity_model_scaled.recommend(users=users_to_recommend, k=n_rec)
popularity_recomm_4_scaled.print_rows(n_display)

+----+-----------+----------------------+------+
| id | productId |        score         | rank |
+----+-----------+----------------------+------+
| 0  |    201    | 0.08695652173913043  |  1   |
| 0  |    207    | 0.08641975308641975  |  2   |
| 0  |    101    | 0.07042253521126761  |  3   |
| 0  |    152    | 0.06741573033707865  |  4   |
| 0  |     96    |  0.0641025641025641  |  5   |
| 0  |    129    | 0.06329113924050633  |  6   |
| 0  |     71    |        0.0625        |  7   |
| 0  |    117    | 0.06172839506172839  |  8   |
| 0  |     85    | 0.061224489795918366 |  9   |
| 0  |    156    | 0.06097560975609756  |  10  |
| 1  |    201    | 0.08695652173913043  |  1   |
| 1  |    207    | 0.08641975308641975  |  2   |
| 1  |    101    | 0.07042253521126761  |  3   |
| 1  |    152    | 0.06741573033707865  |  4   |
| 1  |     96    |  0.0641025641025641  |  5   |
| 1  |    129    | 0.06329113924050633  |  6   |
| 1  |     71    |        0.0625        |  7   |
| 1  |    117    | 0

In [0]:
# 7)- Evaluate

In [0]:
### 7.1)- For Popularity Model on Purchase Counts

In [0]:
models_counts = [popularity_model]

In [0]:
model_names=['Popularity Model on Purchase Counts']

In [59]:
eval_counts = tc.recommender.util.compare_models(test_data, models_counts, model_names)

PROGRESS: Evaluate model Popularity Model on Purchase Counts



Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.009302325581395371 | 0.003764611787867597 |
|   2    | 0.009819121447028432 | 0.007586030105409956 |
|   3    | 0.009474590869939696 | 0.011022722611869897 |
|   4    | 0.009819121447028392 | 0.01517923793117595  |
|   5    |  0.0097157622739018  | 0.019227472211968362 |
|   6    | 0.009560723514211882 | 0.022784750420409346 |
|   7    | 0.009007013658176466 |  0.0250937205200771  |
|   8    | 0.009754521963824267 | 0.03091198064066275  |
|   9    | 0.009704277921332199 | 0.034391739469258935 |
|   10   | 0.009870801033591722 | 0.03918071449079202  |
+--------+----------------------+----------------------+
[10 rows x 3 columns]


Overall RMSE: 0.13935375769222003

Per User RMSE (best)
+-----+------+-------+
|  id | rmse | count |
+-----+------+-------+
| 

In [60]:
### 7.2)- For Popularity Model on Purchase Counts (dummy)
models_counts = [popularity_model_dummy]
model_names=['Popularity Model on Dummy Purchase Counts']
eval_counts_dummy = tc.recommender.util.compare_models(test_data_dummy, models_counts, model_names)

PROGRESS: Evaluate model Popularity Model on Dummy Purchase Counts



Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.010892116182572641 | 0.003938203912270309 |
|   2    | 0.010114107883817442 | 0.007854179016004745 |
|   3    | 0.010892116182572626 | 0.014246813870776531 |
|   4    | 0.011799792531120333 | 0.01979434235658301  |
|   5    | 0.011099585062240663 | 0.023502848580649417 |
|   6    | 0.011237897648686015 | 0.028265988276361786 |
|   7    | 0.011484884410195613 | 0.03321066324178353  |
|   8    | 0.012059128630705409 |  0.0388057366791807  |
|   9    | 0.011698939603503917 | 0.04257907692814336  |
|   10   | 0.011618257261410804 | 0.04738173450569719  |
+--------+----------------------+----------------------+
[10 rows x 3 columns]


Overall RMSE: 0.0

Per User RMSE (best)
+-----+------+-------+
|  id | rmse | count |
+-----+------+-------+
| 918 | 0.0  |   3

In [61]:
### 7.3)- For Popularity Model on Scaled Purchase Counts
models_counts = [popularity_model_scaled]
model_names=['Popularity Model on Scaled Purchase Counts']
eval_counts_dummy = tc.recommender.util.compare_models(test_data_norm, models_counts, model_names)

PROGRESS: Evaluate model Popularity Model on Scaled Purchase Counts



Precision and recall summary statistics by cutoff
+--------+----------------------+----------------------+
| cutoff |    mean_precision    |     mean_recall      |
+--------+----------------------+----------------------+
|   1    | 0.010320478001086379 | 0.004314425389927838 |
|   2    | 0.011135252580119511 | 0.009796694343136484 |
|   3    |  0.0115879051240268  | 0.014903908331393382 |
|   4    | 0.012357414448669205 | 0.020734073096919343 |
|   5    | 0.012167300380228136 | 0.024600372468378994 |
|   6    | 0.011497374615245352 | 0.027466309717803422 |
|   7    | 0.011484441685419407 | 0.031223325832233972 |
|   8    | 0.010999456816947317 | 0.03452768940275722  |
|   9    | 0.010863661053775111 | 0.03872830501021699  |
|   10   | 0.010809342748506227 | 0.042458161972013235 |
+--------+----------------------+----------------------+
[10 rows x 3 columns]


Overall RMSE: 0.1630846073844427

Per User RMSE (best)
+------+------+-------+
|  id  | rmse | count |
+------+------+-------+


In [62]:
# 8) Submission
"""
Dummy purchase count model gives lowest rmse . So we will choose that as best model for our final step.

"""

'\nDummy purchase count model gives lowest rmse . So we will choose that as best model for our final step.\n\n'

In [63]:
users_to_recommend = list(cust_2_rec)

final_model = tc.item_similarity_recommender.create(tc.SFrame(data_dummy), 
                                            user_id=user_id, 
                                            item_id=item_id, 
                                            target='purchase_dummy')


In [64]:
recom = final_model.recommend(users=users_to_recommend, k=n_rec)
recom.print_rows(n_display)

+----+-----------+----------------------+------+
| id | productId |        score         | rank |
+----+-----------+----------------------+------+
| 0  |    168    | 0.025631868839263917 |  1   |
| 0  |    248    | 0.025486195087432863 |  2   |
| 0  |     23    | 0.024235220750172932 |  3   |
| 0  |     32    | 0.02150735855102539  |  4   |
| 0  |    214    | 0.021062183380126952 |  5   |
| 0  |    126    | 0.02020918925603231  |  6   |
| 0  |     63    | 0.020147351423899333 |  7   |
| 0  |    139    | 0.019966328144073488 |  8   |
| 0  |     74    | 0.01874724229176839  |  9   |
| 0  |    110    | 0.018568030993143716 |  10  |
| 1  |     23    | 0.029968857765197754 |  1   |
| 1  |     79    | 0.025663250022464328 |  2   |
| 1  |     80    | 0.02526219023598565  |  3   |
| 1  |    112    | 0.025098118517133925 |  4   |
| 1  |     2     | 0.02443495061662462  |  5   |
| 1  |     40    | 0.024198671181996662 |  6   |
| 1  |     17    | 0.023633764849768743 |  7   |
| 1  |    177    | 0

In [65]:
# 8.2)- Checking most frequent items in final model
data_dummy.groupby(by=item_id)['purchase_count'].mean().sort_values(ascending=False).head(20)

productId
96     1.080808
207    1.076923
153    1.071429
201    1.069767
26     1.063158
152    1.060345
125    1.058824
112    1.058824
85     1.058824
131    1.057143
35     1.056180
63     1.054545
44     1.054545
129    1.053763
101    1.053763
117    1.052632
150    1.051724
122    1.051724
156    1.051546
120    1.050000
Name: purchase_count, dtype: float64

In [0]:
#8.3)- CSV output file

In [67]:
df_rec = recom.to_dataframe()
df_rec.head()

Unnamed: 0,id,productId,score,rank
0,0,168,0.025632,1
1,0,248,0.025486,2
2,0,23,0.024235,3
3,0,32,0.021507,4
4,0,214,0.021062,5


In [68]:
print(df_rec.shape)


(10000, 4)


In [0]:
#8.4)- Bonus Part- Recommending products to customers

In [0]:
df_rec['recommendedProducts'] = df_rec.groupby([id])[item_id].transform(lambda x: '|'.join(x.astype(str)))
df_output = df_rec[['id', 'recommendedProducts']].drop_duplicates().sort_values('id').set_index('id')

In [71]:
recomendation = final_model.recommend(users=users_to_recommend, k=n_rec)

In [0]:
df_rec = recomendation.to_dataframe()


In [0]:
df_rec['recommendedProducts'] = df_rec.groupby([user_id])[item_id] \
        .transform(lambda x: '|'.join(x.astype(str)))

In [0]:

df_output = df_rec[['id', 'recommendedProducts']].drop_duplicates() \
        .sort_values('id').set_index('id')

In [75]:
df_output.head()

Unnamed: 0_level_0,recommendedProducts
id,Unnamed: 1_level_1
0,168|248|23|32|214|126|63|139|74|110
1,23|79|80|112|2|40|17|177|122|150
2,19|36|119|11|236|55|250|37|174|167
3,160|36|138|107|212|150|215|22|218|72
4,23|153|211|176|143|129|131|152|3|234
