In [1]:
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import string
import pickle

from sklearn.model_selection import train_test_split
import datetime
import math

from recommenders.utils.timer import Timer
from recommenders.datasets.python_splitters import python_random_split
from recommenders.evaluation.python_evaluation import rmse, mae, rsquared, exp_var, map_at_k, ndcg_at_k, precision_at_k, recall_at_k, get_top_k_items
from recommenders.models.surprise.surprise_utils import predict, compute_ranking_predictions


from surprise import Dataset
from surprise import Reader
from surprise import SVDpp
from surprise.model_selection import cross_validate, train_test_split
from surprise.model_selection import KFold
from surprise.model_selection import GridSearchCV
from surprise import accuracy

In [2]:
similaritiy = pickle.load(open("content_based_similaritiy-AMAZON.pkl","rb" ))
new_df = pickle.load(open("content_based_data-AMAZON.pkl", "rb")) 
ratings = pickle.load(open("ratings-amazon.pkl","rb" ))

In [3]:
ratings_data=ratings[["userId", "itemId","ratings", "title", "brand", "category", "main_cat", "also_buy"]]

In [4]:
ratings_data = ratings_data[ratings_data["main_cat"]=="Home Audio & Theater"]
ratings_data.reset_index(drop=True, inplace=True)

In [5]:
ratings_data

Unnamed: 0,userId,itemId,ratings,title,brand,category,main_cat,also_buy
0,435,10,5.0,Advanced High Speed Digital 6 feet Hdmi 24k Go...,LE,"[Electronics, Accessories & Supplies, Audio & ...",Home Audio & Theater,"[B000R9KYZA, B00068NUO4, B001CHTJ8O, B003XTWJM..."
1,436,10,5.0,Advanced High Speed Digital 6 feet Hdmi 24k Go...,LE,"[Electronics, Accessories & Supplies, Audio & ...",Home Audio & Theater,"[B000R9KYZA, B00068NUO4, B001CHTJ8O, B003XTWJM..."
2,437,10,5.0,Advanced High Speed Digital 6 feet Hdmi 24k Go...,LE,"[Electronics, Accessories & Supplies, Audio & ...",Home Audio & Theater,"[B000R9KYZA, B00068NUO4, B001CHTJ8O, B003XTWJM..."
3,438,10,5.0,Advanced High Speed Digital 6 feet Hdmi 24k Go...,LE,"[Electronics, Accessories & Supplies, Audio & ...",Home Audio & Theater,"[B000R9KYZA, B00068NUO4, B001CHTJ8O, B003XTWJM..."
4,439,10,5.0,Advanced High Speed Digital 6 feet Hdmi 24k Go...,LE,"[Electronics, Accessories & Supplies, Audio & ...",Home Audio & Theater,"[B000R9KYZA, B00068NUO4, B001CHTJ8O, B003XTWJM..."
...,...,...,...,...,...,...,...,...
199190,101147,40003,5.0,Brainwavz S5 IEM Noise Isolating Earphones wit...,BRAINWAVZ,"[Electronics, Headphones]",Home Audio & Theater,[]
199191,78955,40003,5.0,Brainwavz S5 IEM Noise Isolating Earphones wit...,BRAINWAVZ,"[Electronics, Headphones]",Home Audio & Theater,[]
199192,8613,40003,5.0,Brainwavz S5 IEM Noise Isolating Earphones wit...,BRAINWAVZ,"[Electronics, Headphones]",Home Audio & Theater,[]
199193,29769,40003,5.0,Brainwavz S5 IEM Noise Isolating Earphones wit...,BRAINWAVZ,"[Electronics, Headphones]",Home Audio & Theater,[]


In [6]:
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(ratings_data[["userId", "itemId", "ratings"]], reader=reader)

train_set, test_set = train_test_split(data, test_size=.20)

In [7]:
from surprise.model_selection import KFold
from collections import defaultdict

def precision_recall_at_k(predictions, k=5, threshold=3.5):

    user_est_true = defaultdict(list)
    for uid, _, true_r, est, _ in predictions:
        user_est_true[uid].append((est, true_r))

    precisions = dict()
    recalls = dict()
    for uid, user_ratings in user_est_true.items():

        user_ratings.sort(key=lambda x: x[0], reverse=True)

        n_rel = sum((true_r >= threshold) for (_, true_r) in user_ratings)

        n_rec_k = sum((est >= threshold) for (est, _) in user_ratings[:k])

        n_rel_and_rec_k = sum(
            ((true_r >= threshold) and (est >= threshold))
            for (est, true_r) in user_ratings[:k]
        )

        precisions[uid] = n_rel_and_rec_k / n_rec_k if n_rec_k != 0 else 0

        recalls[uid] = n_rel_and_rec_k / n_rel if n_rel != 0 else 0

    return precisions, recalls




algo = SVDpp(random_state=0, n_factors=1000,reg_all=0.3, n_epochs=100, lr_all= 0.006)

algo.fit(train_set)
predictions = algo.test(test_set)
precisions, recalls = precision_recall_at_k(predictions, threshold=4)

print("RMSE: {}".format(accuracy.rmse(predictions)))
print("MAE: {}".format(accuracy.mae(predictions)))
print("Precision value : {}".format(sum(prec for prec in precisions.values()) / len(precisions)))
print("Recalls value : {}".format(sum(rec for rec in recalls.values()) / len(recalls)))


RMSE: 1.0676
RMSE: 1.0675544043794454
MAE:  0.7922
MAE: 0.7921975883660387
Precision value : 0.6752271493398071
Recalls value : 0.6781397320809139


In [8]:
pickle.dump(algo, open("model_svdppLAST-Amazon.pkl","wb" ))

In [6]:
algo= pickle.load(open("model_svdppLAST-Amazon.pkl","rb" ))

In [10]:
def reccomend(movie, userId):
    movie_index = new_df[new_df['title'] == movie].index[0]
    distances = similaritiy[movie_index]
    movies_list = sorted(list(enumerate(distances)), reverse=True, key=lambda x:x[1])[1:200]
    
    item=[]
    
    for i in movies_list:
        item.append(new_df.iloc[i[0]].itemId)
    
    rect= pd.DataFrame(item, columns={"itemId"})
    rect["userId"]= userId
    predictions = predict(algo, rect, usercol='userId', itemcol='itemId').sort_values("prediction", ascending=False)[:10]
    predictions = pd.merge(predictions, new_df[["title", "itemId", "category", "main_cat", "brand"]], how="inner", on="itemId")
    
    return predictions


## Recommend to item for  user 437

In [8]:
a =reccomend("Advanced High Speed Digital 6 feet Hdmi 24k Gold Sealed Connector Cable", 437)
a

Unnamed: 0,userId,itemId,prediction,title,category,main_cat,brand
0,437,31348,4.715639,BlueRigger High Speed HDMI Cable with Ethernet...,"[Electronics, Accessories & Supplies, Audio & ...",Home Audio & Theater,BlueRigger
1,437,30979,4.682983,PNY C-H-P10-A06-H 6-Feet HDMI to HDMI Cable (A...,"[Electronics, Accessories & Supplies, Audio & ...",Home Audio & Theater,PNY
2,437,7883,4.647315,Belkin PureAV AV22300-03 3-Foot HDMI-to-HDMI A...,"[Electronics, Accessories & Supplies, Audio & ...",Home Audio & Theater,Belkin
3,437,17066,4.622445,"High Speed HDMI Cable, 6 feet (Colors may vary)","[Electronics, Accessories & Supplies, Audio & ...",Home Audio & Theater,MIG
4,437,33094,4.619798,Belkin Pro 2000 3D High Speed HDMI Cable (Supp...,"[Electronics, Accessories & Supplies, Audio & ...",Home Audio & Theater,Belkin
5,437,25058,4.617324,eForCity TOTHHDMHMF03 10-Feet Male to Female H...,"[Electronics, Accessories & Supplies, Audio & ...",Home Audio & Theater,eForCity
6,437,19900,4.614761,2 Gold HIGH SPEED HDMI WITH ETHERNET Cables fo...,"[Electronics, Accessories & Supplies, Audio & ...",Home Audio & Theater,eForCity
7,437,28202,4.612092,BlueRigger High Speed HDMI to DVI Adapter Cabl...,"[Electronics, Accessories & Supplies, Audio & ...",Home Audio & Theater,BlueRigger
8,437,7671,4.597831,Panasonic RP-CDHG15 High Speed HDMI Cable - 4....,"[Electronics, Accessories & Supplies, Audio & ...",Home Audio & Theater,Panasonic
9,437,20903,4.597085,Philips SWV3474S/27 Swivel High Speed HDMI Cab...,"[Electronics, Accessories & Supplies, Audio & ...",Home Audio & Theater,Philips


## Which item bought the user 437 ?

In [9]:
ratings_data[ratings_data["userId"]==437]

Unnamed: 0,userId,itemId,ratings,title,brand,category,main_cat,also_buy
2,437,10,5.0,Advanced High Speed Digital 6 feet Hdmi 24k Go...,LE,"[Electronics, Accessories & Supplies, Audio & ...",Home Audio & Theater,"[B000R9KYZA, B00068NUO4, B001CHTJ8O, B003XTWJM..."
