In [1]:
#!pip install recommenders[gpu] -f https://download.pytorch.org/whl/cu111/torch_stable.html

In [None]:
%load_ext autoreload
%autoreload 2

In [1]:
import sys
import pandas as pd
import tensorflow as tf
tf.get_logger().setLevel('ERROR') # only show error messages

from recommenders.utils.timer import Timer
from recommenders.models.ncf.ncf_singlenode import NCF
from recommenders.models.ncf.dataset import Dataset as NCFDataset
#from recommenders.datasets import movielens
from recommenders.utils.notebook_utils import is_jupyter
from recommenders.datasets.python_splitters import python_chrono_split,python_stratified_split
from recommenders.evaluation.python_evaluation import (rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, 
                                                     recall_at_k, get_top_k_items)

print("System version: {}".format(sys.version))
print("Pandas version: {}".format(pd.__version__))
print("Tensorflow version: {}".format(tf.__version__))

System version: 3.7.13 (default, Mar 28 2022, 08:03:21) [MSC v.1916 64 bit (AMD64)]
Pandas version: 1.3.5
Tensorflow version: 2.7.4


In [2]:
# top k items to recommend
TOP_K = 10

# Select MovieLens data size: 100k, 1m, 10m, or 20m
#MOVIELENS_DATA_SIZE = '100k'

# Model parameters
EPOCHS = 50
BATCH_SIZE = 256

SEED = 42

##1. Load dataset

In [15]:
df = pd.read_excel('Rec_sys_data.xlsx')

In [16]:
df= df[['CustomerID','StockCode','Quantity','DeliveryDate']]

In [17]:
df["StockCode"] = df["StockCode"].apply(lambda x: pd.to_numeric(x, errors='coerce')).dropna()

In [34]:
df = df.dropna()
print(df.shape)
df

(246706, 4)


Unnamed: 0,userID,itemID,rating,timestamp
1,17850,71053.0,6,2010-12-02 08:26:00
2,17850,21730.0,6,2010-12-03 08:26:00
4,17850,22752.0,2,2010-12-04 08:26:00
7,17850,22633.0,6,2010-12-04 08:28:00
8,17850,22632.0,6,2010-12-03 08:28:00
...,...,...,...,...
272399,15249,23399.0,12,2011-10-08 11:37:00
272400,15249,22727.0,4,2011-10-08 11:37:00
272401,15249,23434.0,12,2011-10-08 11:37:00
272402,15249,23340.0,12,2011-10-07 11:37:00


In [35]:
#header=["userID", "itemID", "rating", "timestamp"]

df = df.rename(columns={
    
    'CustomerID':"userID",'StockCode':"itemID",'Quantity':"rating",'DeliveryDate':"timestamp"
    
})

df["userID"] = df["userID"].astype(int)
df["itemID"] = df["itemID"].astype(int)

##2. Split the data using the Spark chronological splitter provided in utilities

In [36]:
train, test = python_chrono_split(df, 0.75)

In [37]:
train_file = "./train.csv"
test_file = "./test.csv"
train.to_csv(train_file, index=False)
test.to_csv(test_file, index=False)

In [38]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 185041 entries, 37126 to 137334
Data columns (total 4 columns):
 #   Column     Non-Null Count   Dtype         
---  ------     --------------   -----         
 0   userID     185041 non-null  int32         
 1   itemID     185041 non-null  int32         
 2   rating     185041 non-null  int64         
 3   timestamp  185041 non-null  datetime64[ns]
dtypes: datetime64[ns](1), int32(2), int64(1)
memory usage: 5.6 MB


##3. Train the NCF model on the training data, and get the top-k recommendations for our testing data
NCF accepts implicit feedback and generates prospensity of items to be recommended to users in the scale of 0 to 1. A recommended item list can then be generated based on the scores. Note that this quickstart notebook is using a smaller number of epochs to reduce time for training. As a consequence, the model performance will be slighlty deteriorated.

In [39]:
data = NCFDataset(train_file=train_file, test_file=test_file, seed=SEED)

INFO:recommenders.models.ncf.dataset:Indexing ./train.csv ...
INFO:recommenders.models.ncf.dataset:Indexing ./test.csv ...
INFO:recommenders.models.ncf.dataset:Creating full leave-one-out test file ./test_full.csv ...
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3498/3498 [01:03<00:00, 54.95it/s]
INFO:recommenders.models.ncf.dataset:Indexing ./test_full.csv ...


In [40]:
model = NCF (
    n_users=data.n_users, 
    n_items=data.n_items,
    model_type="NeuMF",
    n_factors=4,
    layer_sizes=[16,8,4],
    n_epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=1e-3,
    verbose=10,
    seed=SEED
)

ImportError: DLL load failed: The specified procedure could not be found.

In [None]:
with Timer() as train_time:
    model.fit(data)

print("Took {} seconds for training.".format(train_time))

Took 842.6078 seconds for training.


In [None]:
with Timer() as test_time:
    users, items, preds = [], [], []
    item = list(train.itemID.unique())
    for user in train.userID.unique():
        user = [user] * len(item) 
        users.extend(user)
        items.extend(item)
        preds.extend(list(model.predict(user, item, is_list=True)))

    all_predictions = pd.DataFrame(data={"userID": users, "itemID":items, "prediction":preds})

    merged = pd.merge(train, all_predictions, on=["userID", "itemID"], how="outer")
    all_predictions = merged[merged.rating.isnull()].drop('rating', axis=1)

print("Took {} seconds for prediction.".format(test_time))

Took 24.8943 seconds for prediction.


##4. Evaluate how well NCF performs
The ranking metrics are used for evaluation.

In [None]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.020692
NDCG:	0.064364
Precision@K:	0.047777
Recall@K:	0.051526


In [None]:
df_order = pd.read_excel('/content/drive/My Drive/Rec_sys_data.xlsx', 'order')
df_customer = pd.read_excel('/content/drive/My Drive/Rec_sys_data.xlsx', 'customer')
df_product = pd.read_excel('/content/drive/My Drive/Rec_sys_data.xlsx', 'product')

In [None]:
all_predictions[all_predictions['userID']==12346].nlargest(5,'prediction')

Unnamed: 0,userID,itemID,timestamp,prediction
206767,12346,22678,NaT,0.973725
204556,12346,22193,NaT,0.97297
206766,12346,22676,NaT,0.97128
206765,12346,22683,NaT,0.962661
206763,12346,22680,NaT,0.962542


In [None]:
all_predictions = all_predictions[['userID','itemID','prediction']]

In [None]:
all_predictions = all_predictions.rename(columns={
    
    "userID":'CustomerID',"itemID":'StockCode',"rating":'Quantity','prediction':'probability'
    
})

In [None]:
def recommend_product(customer_id):

  print(" \n---------- Top 5 Bought StockCodes -----------\n")
  

  print(df_order[df_order['CustomerID']==customer_id][['CustomerID','StockCode','Quantity']].nlargest(5,'Quantity'))

  top_5_bought = df_order[df_order['CustomerID']==customer_id][['CustomerID','StockCode','Quantity']].nlargest(5,'Quantity')

  print('\n-------Product Name of bought StockCodes ------\n')

  print(df_product[df_product.StockCode.isin(top_5_bought.StockCode)]['Product Name'])


  print("\n --------- Top 5 Recommendations ------------ \n")

  print(all_predictions[all_predictions['CustomerID']==customer_id].nlargest(5,'probability'))

  recommend = all_predictions[all_predictions['CustomerID']==customer_id].nlargest(5,'probability')

  print('\n-------Product Name of Recommendations ------\n')

  print(df_product[df_product.StockCode.isin(recommend.StockCode)]['Product Name'])


##5. Recommendation's

In [None]:
recommend_product(13137)

 
---------- Top 5 Bought StockCodes -----------

        CustomerID StockCode  Quantity
234414       13137     84077        48
234443       13137     23321        13
50797        13137     21985        12
234404       13137     22296        12
234418       13137     22297        12

-------Product Name of bought StockCodes ------

70      MightySkins Skin Decal Wrap Compatible with Li...
490           Window Tint Film Mitsubishi (all doors) DIY
694     Harriton Men's Paradise Short-Sleeve Performan...
1065    MightySkins Skin For Samsung Galaxy J3 (2016),...
1339    MightySkins Skin Decal Wrap Compatible with Le...
Name: Product Name, dtype: object

 --------- Top 5 Recommendations ------------ 

         CustomerID StockCode  probability
1951608       13137    85123A     0.975194
1952595       13137     21034     0.971388
1951667       13137     22197     0.960145
1951758       13137    85099F     0.929778
1952914       13137     22766     0.917395

-------Product Name of Recommendat

In [None]:
recommend_product(15127)


 
---------- Top 5 Bought StockCodes -----------

        CustomerID StockCode  Quantity
272296       15127     23263        48
272287       15127     23354        24
272288       15127     22813        24
272289       15127     23096        24
272285       15127     21181        12

-------Product Name of bought StockCodes ------

13                billyboards Porcelain School Chalkboard
374     MightySkins Protective Vinyl Skin Decal for Po...
923     Zoan Synchrony Duo Sport Electric Snow Helmet ...
952     MightySkins Skin Decal Wrap Compatible with Sm...
1576    EMPIRE KLIX Klutch Designer Wallet Case for LG G2
Name: Product Name, dtype: object

 --------- Top 5 Recommendations ------------ 

         CustomerID StockCode  probability
6135734       15127     84879     0.973742
6137006       15127     35970     0.935546
6136832       15127     21034     0.931347
6137564       15127     23356     0.925915
6137220       15127    85049A     0.922400

-------Product Name of Recommendat