**Installing DeepFM Library**

In [None]:
!pip install deepctr



**Importing the Library**

In [None]:
#Importing required Libraries
import pandas as pd
import torch
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from deepctr.models import DeepFM
from deepctr.feature_column import SparseFeat, VarLenSparseFeat,get_feature_names

**Reading the data**

In [None]:
#reading data using pandas 
from google.colab import files
uploaded = files.upload()
import io
data = pd.read_csv(io.BytesIO(uploaded['transaction_data.csv']))

Saving transaction_data.csv to transaction_data (2).csv


**Find out the shape of the data**

In [None]:
data.shape

(403046, 6)

In [None]:
sparse_features = ["merchant_id", "customer_id"]
target = ['rating']

**Label Encoder**

In [None]:
for feat in sparse_features:
  lbe = LabelEncoder()
  data[feat] = lbe.fit_transform(data[feat])

**count unique features for each sparse field**

In [None]:
fixlen_feature_columns = [SparseFeat(feat, data[feat].nunique(),embedding_dim=4)for feat in sparse_features]
linear_feature_columns = fixlen_feature_columns
dnn_feature_columns = fixlen_feature_columns
feature_names = get_feature_names(linear_feature_columns + dnn_feature_columns)

**Input data for model**

In [None]:
train, test = train_test_split(data, test_size=0.2)
train_model_input = {name:train[name].values for name in feature_names}
test_model_input = {name:test[name].values for name in feature_names}

**Model Building**

In [None]:
model = DeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
model.compile("adam", "mse", metrics=['mse'], )

**Fitting, Predicting and Print**

In [None]:
history = model.fit(train_model_input, train[target].values,batch_size=256, epochs=5, verbose=2, validation_split=0.2, )
pred_ans = model.predict(test_model_input, batch_size=256)
print("test MSE", round(mean_squared_error(test[target].values, pred_ans), 4))

Epoch 1/5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


1008/1008 - 5s - loss: 0.0334 - mse: 0.0333 - val_loss: 0.0149 - val_mse: 0.0147
Epoch 2/5
1008/1008 - 5s - loss: 0.0134 - mse: 0.0131 - val_loss: 0.0149 - val_mse: 0.0145
Epoch 3/5
1008/1008 - 4s - loss: 0.0123 - mse: 0.0119 - val_loss: 0.0149 - val_mse: 0.0145
Epoch 4/5
1008/1008 - 4s - loss: 0.0118 - mse: 0.0113 - val_loss: 0.0154 - val_mse: 0.0148
Epoch 5/5
1008/1008 - 4s - loss: 0.0116 - mse: 0.0110 - val_loss: 0.0159 - val_mse: 0.0152
test MSE 0.0153


**Predictions using DeepFM Library**

In [None]:
pred_ans

array([[1.0373538 ],
       [1.0156748 ],
       [0.99645656],
       ...,
       [1.102609  ],
       [1.0622998 ],
       [1.1202776 ]], dtype=float32)

**Importing the relevant libraries**

In [None]:
from deepctr.models import xDeepFM

**Model Building**

In [None]:
xDeepFMmodel = xDeepFM(linear_feature_columns, dnn_feature_columns, task='regression')
xDeepFMmodel.compile("adam", "mse", metrics=['mse'], )

**Fitting, Predicting and Evaluating the Model**

In [None]:
historyxDeepFM = xDeepFMmodel.fit(train_model_input, train[target].values,batch_size=256, epochs=5, verbose=2, validation_split=0.2, )
pred_ans_xDeepFM = xDeepFMmodel.predict(test_model_input, batch_size=256)
print("test MSE", round(mean_squared_error(test[target].values, pred_ans_xDeepFM), 4))

Epoch 1/5


  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


1008/1008 - 17s - loss: 0.0231 - mse: 0.0229 - val_loss: 0.0152 - val_mse: 0.0149
Epoch 2/5
1008/1008 - 17s - loss: 0.0135 - mse: 0.0131 - val_loss: 0.0150 - val_mse: 0.0146
Epoch 3/5
1008/1008 - 17s - loss: 0.0124 - mse: 0.0119 - val_loss: 0.0153 - val_mse: 0.0148
Epoch 4/5
1008/1008 - 17s - loss: 0.0118 - mse: 0.0112 - val_loss: 0.0153 - val_mse: 0.0147
Epoch 5/5
1008/1008 - 17s - loss: 0.0114 - mse: 0.0108 - val_loss: 0.0157 - val_mse: 0.0151
test MSE 0.0149


**Predictions using xDeepFM**

In [None]:
pred_ans_xDeepFM

array([[1.0461034 ],
       [0.98672485],
       [0.99297035],
       ...,
       [1.0994622 ],
       [1.0628743 ],
       [1.0721788 ]], dtype=float32)

**Find the Shape of the Predictions**

In [None]:
pred_ans_xDeepFM.shape

(80610, 1)

**Reshaping the predictions using xDeepFM**

In [None]:
import numpy as np

predicted_rating = np.reshape(pred_ans_xDeepFM,80610)
predicted_rating

array([1.0494889 , 1.2944921 , 1.0605603 , ..., 1.114514  , 1.143002  ,
       0.96763444], dtype=float32)

**Creating a Dataframe with the test dictionary**

In [None]:
df = pd.DataFrame(test_model_input)

**Find out the shape of the df**

In [None]:
df.shape

(80610, 2)

In [None]:
#Train test split
train, test = train_test_split(data, test_size=0.2)

**Creating a DataFrame with the Actual Rating and Predicted Rating for comparison**

In [None]:
#Comparing the actual rating with the predicted rating
dictionary = {'Actual_Rating': test["rating"], 'Predicted_Rating': predicted_rating} 
comparison = pd.DataFrame(dictionary)
comparison

Unnamed: 0,Actual_Rating,Predicted_Rating
145745,1.000000,1.049489
366256,1.000000,1.294492
56201,1.000000,1.060560
48123,1.121951,1.114017
54524,1.000000,1.174566
...,...,...
348755,1.000000,1.047130
117593,1.121951,1.087824
195411,1.000000,1.114514
40218,1.121951,1.143002


**Data Preparation for Building Recommendation Engine**

In [None]:
dict1 = {'CustId': test.customer_id,'MerchantId':test.merchant_id,'Predicted_Rating': comparison.Predicted_Rating}
recommendation_df = pd.DataFrame(dict1)
recommendation_df

Unnamed: 0,CustId,MerchantId,Predicted_Rating
145745,6749,2846,1.049489
366256,5595,3043,1.294492
56201,4254,3683,1.060560
48123,3872,876,1.114017
54524,2506,3257,1.174566
...,...,...,...
348755,4679,7230,1.047130
117593,4223,7537,1.087824
195411,5050,5619,1.114514
40218,747,8163,1.143002


**Missing Value Analysis**

In [None]:
recommendation_df.isnull().sum()

CustId              0
MerchantId          0
Predicted_Rating    0
dtype: int64

**Creation of Pivot Table using the Recommendation data**

In [None]:
piv = recommendation_df.pivot_table(index=['CustId'], columns=['MerchantId'], values='Predicted_Rating').fillna(0)

piv.head(10)

MerchantId,0,1,2,3,4,5,6,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,...,8498,8499,8500,8501,8502,8503,8504,8505,8506,8507,8508,8509,8510,8511,8512,8513,8514,8515,8516,8518,8519,8520,8521,8522,8523,8524,8525,8526,8527,8528,8529,8530,8531,8532,8533,8535,8536,8537,8538,8539
CustId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.047565,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.006243,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.010158,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.008425,0.0,1.054565,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.029526,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.011462,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
#Find the shape of the pivot table

piv.shape

(8582, 7909)

**Normalization**

In [None]:
# Normalize the values
piv_norm = piv.apply(lambda x: (x-np.mean(x))/(np.max(x)-np.min(x)), axis=1)


# Drop all columns containing only zeros representing users who did not rate
piv_norm.fillna(0, inplace=True)
piv_norm = piv_norm.T
piv_norm = piv_norm.loc[:, (piv_norm != 0).any(axis=0)]

**Sparse Matrix Creation**

In [None]:
# Our data needs to be in a sparse matrix format to be read by the following functions
import scipy as sp
from sklearn.metrics.pairwise import cosine_similarity
piv_sparse = sp.sparse.csr_matrix(piv_norm.values)

**Cosine Similarity Matrixes for Merchant and Customer**

In [None]:
item_similarity = cosine_similarity(piv_sparse)
user_similarity = cosine_similarity(piv_sparse.T)

**Creating the DataFrame**

In [None]:
# Inserting the similarity matricies into dataframe objects

item_sim_df = pd.DataFrame(item_similarity, index = piv_norm.index, columns = piv_norm.index)
user_sim_df = pd.DataFrame(user_similarity, index = piv_norm.columns, columns = piv_norm.columns)

**Creation of Function which display the top 5 Users with highest cosine similarity**

In [None]:
# This function will return the top 5 users with the highest similarity value 

def top_users(user):
    
    if user not in piv_norm.columns:
        return('No data available on user {}'.format(user))
    
    print('Most Similar Users:\n')
    sim_values = user_sim_df.sort_values(by=user, ascending=False).loc[:,user].tolist()[1:11]
    sim_users = user_sim_df.sort_values(by=user, ascending=False).index[1:11]
    zipped = zip(sim_users, sim_values,)
    for user, sim in zipped:
        print('User #{0}, Similarity value: {1:.2f}'.format(user, sim)) 

In [None]:
top_users(6749)

Most Similar Users:

User #4224, Similarity value: 0.79
User #858, Similarity value: 0.71
User #1393, Similarity value: 0.67
User #2787, Similarity value: 0.66
User #2337, Similarity value: 0.65
User #6097, Similarity value: 0.63
User #6416, Similarity value: 0.63
User #6456, Similarity value: 0.62
User #8202, Similarity value: 0.62
User #2863, Similarity value: 0.61


**Installing the Surprise library**

In [None]:
!pip install surprise

Collecting surprise
  Downloading https://files.pythonhosted.org/packages/61/de/e5cba8682201fcf9c3719a6fdda95693468ed061945493dea2dd37c5618b/surprise-0.1-py2.py3-none-any.whl
Collecting scikit-surprise
[?25l  Downloading https://files.pythonhosted.org/packages/97/37/5d334adaf5ddd65da99fc65f6507e0e4599d092ba048f4302fe8775619e8/scikit-surprise-1.1.1.tar.gz (11.8MB)
[K     |████████████████████████████████| 11.8MB 334kB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp36-cp36m-linux_x86_64.whl size=1670931 sha256=6f3ba2be69914d8748868f034281277eead78ef8e61470f24afa0a9dfcf394db
  Stored in directory: /root/.cache/pip/wheels/78/9c/3d/41b419c9d2aff5b6e2b4c0fc8d25c538202834058f9ed110d0
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.1 surprise-0.1


**Importing the required libraries**

In [None]:
from surprise import Reader
from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise import SVD, SlopeOne, NMF
from surprise.accuracy import rmse
from surprise import accuracy
from surprise.model_selection import train_test_split

In [None]:
min_merchant_ratings = 10
filter_merchant = recommendation_df['MerchantId'].value_counts() > min_merchant_ratings
filter_merchant = filter_merchant[filter_merchant].index.tolist()

min_user_ratings = 10
filter_users = recommendation_df['CustId'].value_counts() > min_user_ratings
filter_users = filter_users[filter_users].index.tolist()

df_new = recommendation_df[(recommendation_df['MerchantId'].isin(filter_merchant)) & (recommendation_df['CustId'].isin(filter_users))]
print('The original data frame shape:\t{}'.format(recommendation_df.shape))
print('The new data frame shape:\t{}'.format(df_new.shape))

The original data frame shape:	(80610, 3)
The new data frame shape:	(26708, 3)


**Data Preparation for Recommendation Engine**

In [None]:
reader = Reader(rating_scale=(0, 10))
data1 = Dataset.load_from_df(df_new[['CustId', 'MerchantId', 'Predicted_Rating']], reader)

In [None]:
benchmark = []
for algorithm in [SVD(biased=False), SlopeOne(), NMF()]:
    results = cross_validate(algorithm, data, measures=['RMSE'], cv=5, verbose=False)
    
    tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm']))
    benchmark.append(tmp)

In [None]:
%time
surprise_results = pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse')

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 5.72 µs


In [None]:
surprise_results

Unnamed: 0_level_0,test_rmse,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
NMF,0.129569,1.681182,0.071642
SlopeOne,0.207334,0.080936,0.080829
SVD,0.99458,1.371554,0.073767


**Model Creation using SVD**

In [None]:
%time
algo = SVD(biased=False)
cross_validate(algo, data1, measures=['RMSE'], cv=5, verbose=False)

CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 4.77 µs


{'fit_time': (1.3719799518585205,
  1.3787424564361572,
  1.3798658847808838,
  1.3667173385620117,
  1.3857371807098389),
 'test_rmse': array([0.99918794, 0.9985056 , 1.00006245, 0.99260821, 0.98959826]),
 'test_time': (0.03465461730957031,
  0.034332990646362305,
  0.03411984443664551,
  0.03354072570800781,
  0.21748113632202148)}

**Fitting, Predicting and Evaluating the Data**

In [None]:
#Fitting
%time
trainset, testset = train_test_split(data, test_size=0.20)
predictions = algo.test(testset)
accuracy.rmse(predictions)

CPU times: user 0 ns, sys: 4 µs, total: 4 µs
Wall time: 7.87 µs
RMSE: 0.8477


0.8476802120533318

**Predictions Results**

In [None]:
predictions

[Prediction(uid=741, iid=5345, r_ui=1.0882441997528076, est=0.09391645404998572, details={'was_impossible': False}),
 Prediction(uid=586, iid=6533, r_ui=1.0839083194732666, est=0.4011275321541241, details={'was_impossible': False}),
 Prediction(uid=808, iid=1296, r_ui=1.084241509437561, est=0.2815304938052234, details={'was_impossible': False}),
 Prediction(uid=1526, iid=1430, r_ui=1.0516453981399536, est=0.3509455897334868, details={'was_impossible': False}),
 Prediction(uid=85, iid=2314, r_ui=1.1364225149154663, est=0.19359411799215467, details={'was_impossible': False}),
 Prediction(uid=819, iid=3849, r_ui=1.0109783411026, est=0.121231393732014, details={'was_impossible': False}),
 Prediction(uid=3801, iid=6828, r_ui=1.0063700675964355, est=0.12103919967725113, details={'was_impossible': False}),
 Prediction(uid=5, iid=2326, r_ui=1.0268950462341309, est=0.1701879253836916, details={'was_impossible': False}),
 Prediction(uid=1131, iid=855, r_ui=0.9665343165397644, est=0.0219678073538

In [None]:
predictions[0]

Prediction(uid=741, iid=5345, r_ui=1.0882441997528076, est=0.09391645404998572, details={'was_impossible': False})

In [None]:
#mapping merchant name to merchant id instead of itemid
merchantid = data['merchant_id'].to_list()
merchant_name_new = data['merchant_name_new'].to_list()
data_dict = dict(zip(merchantid,merchant_name_new))

**Recommending Top 20 Movies based on Predicted Rating**

In [None]:
def get_top_n(predictions, n):
# First map the predictions to each user.
    top_n = defaultdict(list)
    for uid, iid, true_r, est, _ in predictions:
        top_n[uid].append((iid, est))

    # Then sort the predictions for each user and retrieve the k highest ones.
    for uid, user_ratings in top_n.items():
        user_ratings.sort(key=lambda x: x[1], reverse=True)
        top_n[uid] = user_ratings[:n]

    return top_n

In [None]:
get_top_n(predictions,20)

defaultdict(list,
            {741: [(4114, 0.1352668880214491), (5345, 0.09391645404998572)],
             586: [(6533, 0.4011275321541241)],
             808: [(1296, 0.2815304938052234), (261, 0.16298566271188264)],
             1526: [(6059, 0.4409929891975926),
              (1430, 0.3509455897334868),
              (387, 0.26065504240424353),
              (4160, 0.25792298788492846)],
             85: [(7660, 0.6139350840238379),
              (4194, 0.414288364744663),
              (6094, 0.22845479709211033),
              (2314, 0.19359411799215467)],
             819: [(3849, 0.121231393732014)],
             3801: [(6828, 0.12103919967725113), (2951, 0), (332, 0)],
             5: [(3849, 0.3183826432271296),
              (8076, 0.281822216047157),
              (6886, 0.20316719583481843),
              (2326, 0.1701879253836916),
              (7040, 0.1498016590182486),
              (3992, 0)],
             1131: [(1327, 0.2889355982543938), (855, 0.021967807353812006

**Recommendations based on Actual Rating**

In [None]:
#data with actual rating
df_actualrating = data[["customer_id","merchant_id","rating"]]
df_actualrating.head()

Unnamed: 0,customer_id,merchant_id,rating
0,1000000,1000004,1.0
1,1000010,1000004,1.0
2,1000041,1000004,1.0
3,1000111,1000004,1.0
4,1000135,1000004,1.0


In [None]:
#finding the unique movies and unique users
n_merchants_ar = df_actualrating["merchant_id"].nunique()
n_customers_ar = df_actualrating["customer_id"].nunique()
print(n_merchants_ar)
print(n_customers_ar)

8540
8699


In [None]:
#calculating the sparsity for acutual rating
available_rating_ar = df_actualrating['rating'].count()
total_rating_ar = n_merchants_ar*n_customers_ar
n_missing_rating_ar = total_rating_ar - available_rating_ar
sparsity_ar = (total_rating_ar/n_missing_rating_ar)*100

In [None]:
print(sparsity_ar)

100.5454940606537


In [None]:
#Data Preparation
reader_ar = Reader(rating_scale= (0.5,5))
final_data_ar = Dataset.load_from_df(df_actualrating,reader_ar)

In [None]:
%time
#Fitting
trainset_ar ,testset_ar = train_test_split(final_data_ar, test_size=0.20)
predictions_ar = algo.test(testset_ar)
accuracy.rmse(predictions_ar)

CPU times: user 4 µs, sys: 0 ns, total: 4 µs
Wall time: 8.11 µs
RMSE: 0.1550


0.15501821770965338

**Recommendations with Actual Rating**

In [None]:
get_top_n(predictions_ar,20)

defaultdict(list,
            {1000098: [(1008248, 1.0615722612303187),
              (1006052, 1.0615722612303187),
              (1000195, 1.0615722612303187),
              (1003226, 1.0615722612303187),
              (1003961, 1.0615722612303187),
              (1006725, 1.0615722612303187),
              (1001898, 1.0615722612303187),
              (1006723, 1.0615722612303187),
              (1006832, 1.0615722612303187),
              (1004781, 1.0615722612303187),
              (1006044, 1.0615722612303187),
              (1003648, 1.0615722612303187),
              (1004749, 1.0615722612303187),
              (1004647, 1.0615722612303187),
              (1005470, 1.0615722612303187),
              (1002777, 1.0615722612303187),
              (1000750, 1.0615722612303187),
              (1006527, 1.0615722612303187),
              (1000820, 1.0615722612303187),
              (1003481, 1.0615722612303187)],
             1000461: [(1008516, 1.0615722612303187),
              (100