<a href="https://colab.research.google.com/github/AbdelMoumene-Hadfi/Product_Recommendation/blob/master/PFA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip install surprise



In [0]:
import pandas as pd
import numpy as np
from surprise import *
from surprise.model_selection import cross_validate
from surprise.model_selection import train_test_split

In [0]:
file = pd.read_csv("/content/out.csv",names=['userId', 'productId','Rating','timestamp'])
file = file.drop(columns=['timestamp'])
file.head()

Unnamed: 0,userId,productId,Rating
631,A3TAS1AG6FMBQW,0972683275,5.0
2162,A5JLAU2ARJ0BO,1400532655,1.0
6268,A18HE80910BTZI,9983891212,5.0
8147,A1T1YSCDW0PD25,B00000DM9W,5.0
9805,A2HMF8ZR67BNZS,B00000J1EQ,5.0


In [0]:
print("Total data ")
print("-"*50)
print("Total No of ratings :",file.shape[0])
print("Total No of Unique Users   :", len(np.unique(file.userId)))
print("Total No of Unique products  :", len(np.unique(file.productId)))

Total data 
--------------------------------------------------
Total No of ratings : 9472
Total No of Unique Users   : 85
Total No of Unique products  : 4450


In [0]:
reader = Reader(rating_scale=(0, 9))
data = Dataset.load_from_df(file[["userId","productId","Rating"]], reader)

In [0]:
benchmark = []
# Iterate over all algorithms
for algorithm in [SVD(), SVDpp(), SlopeOne(), NMF(), NormalPredictor(), KNNBaseline(), KNNBasic(), KNNWithMeans(), KNNWithZScore(), BaselineOnly(), CoClustering()]:
    # Perform cross validation
    results = cross_validate(algorithm, data, measures=['RMSE'], cv=3, verbose=False)
    
    # Get results & append algorithm name
    tmp = pd.DataFrame.from_dict(results).mean(axis=0)
    tmp = tmp.append(pd.Series([str(algorithm).split(' ')[0].split('.')[-1]], index=['Algorithm']))
    benchmark.append(tmp)
    
pd.DataFrame(benchmark).set_index('Algorithm').sort_values('test_rmse')

Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Estimating biases using als...


Unnamed: 0_level_0,test_rmse,fit_time,test_time
Algorithm,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
BaselineOnly,0.878134,0.015024,0.01522
SVDpp,0.881788,6.293937,0.18791
SVD,0.881895,0.36655,0.017752
KNNBaseline,0.959964,0.014482,0.036462
KNNWithMeans,0.98495,0.003938,0.03213
KNNWithZScore,0.986494,0.008524,0.034048
CoClustering,1.00131,0.384176,0.015362
KNNBasic,1.003674,0.002176,0.05255
SlopeOne,1.043512,0.314046,0.09324
NMF,1.072655,0.552993,0.016278


In [0]:
trainset, testset = train_test_split(data, test_size=0.3,random_state=10)

In [0]:
algo = KNNWithMeans(k=4, sim_options={'name': 'cosine', 'user_based': False})
train_pred = algo.fit(trainset)
test_pred = train_pred.test(testset)

Computing the cosine similarity matrix...
Done computing similarity matrix.


In [0]:
print("Item-based Model : Recommander")
accuracy.rmse(test_pred, verbose=True)

Item-based Model : Recommander
RMSE: 1.0107


1.0106502286432053

In [0]:
def get_Iu(uid):
    """ return the number of items rated by given user
    args: 
      uid: the id of the user
    returns: 
      the number of items rated by the user
    """
    try:
        return len(trainset.ur[trainset.to_inner_uid(uid)])
    except ValueError: # user was not part of the trainset
        return 0
    
def get_Ui(iid):
    """ return number of users that have rated given item
    args:
      iid: the raw id of the item
    returns:
      the number of users that have rated the item.
    """
    try: 
        return len(trainset.ir[trainset.to_inner_iid(iid)])
    except ValueError:
        return 0
    
df = pd.DataFrame(test_pred, columns=['uid', 'iid', 'r_ui', 'est', 'details'])
df['Iu'] = df.uid.apply(get_Iu)
df['Ui'] = df.iid.apply(get_Ui)
df['err'] = abs(df.est - df.r_ui)
best_predictions = df.sort_values(by='err')[:10]
worst_predictions = df.sort_values(by='err')[-10:]
df

Unnamed: 0,uid,iid,r_ui,est,details,Iu,Ui,err
0,A3W4D8XOGLWUN5,B0009WKBGC,4.0,5.000000,"{'actual_k': 0, 'was_impossible': False}",53,1,1.000000
1,A3J8A5L5AF5TX9,B004M8SCHO,4.0,4.352338,"{'was_impossible': True, 'reason': 'User and/o...",80,0,0.352338
2,A3NHUQ33CFH3VM,B00BOHNYTW,5.0,4.484347,"{'actual_k': 4, 'was_impossible': False}",79,16,0.515653
3,A1T1YSCDW0PD25,B001QUA6R0,5.0,5.000000,"{'actual_k': 0, 'was_impossible': False}",90,1,0.000000
4,A3R4794K2RVU1S,B007KZQM9G,5.0,4.500000,"{'actual_k': 0, 'was_impossible': False}",63,2,0.500000
...,...,...,...,...,...,...,...,...
2837,A2BMZRO0H7TFCS,B00HPM1FZ0,2.0,4.000000,"{'actual_k': 0, 'was_impossible': False}",84,3,2.000000
2838,AEJAGHLC675A7,B00834SJNA,5.0,5.000000,"{'actual_k': 2, 'was_impossible': False}",78,11,0.000000
2839,A3CG93783LP0FO,B000JMJWV2,5.0,3.964170,"{'actual_k': 4, 'was_impossible': False}",78,10,1.035830
2840,A1EVV74UQYVKRY,B00834SJNA,5.0,4.710071,"{'actual_k': 4, 'was_impossible': False}",57,11,0.289929


In [0]:
df_true = df[df.details == {'actual_k': 1, 'was_impossible': False}]
df_true

In [0]:
algo.predict(uid='A20DZX38KRBIT8',iid='B00834SJNA').est 

4.990530303030304

In [0]:
import pickle

In [0]:
model_file = 'knnmean.sav'
pickle.dump(algo,open(model_file,'wb'))