In [85]:
import pandas as pd
from surprise import Dataset, Reader , SVD , accuracy
from surprise.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import TruncatedSVD
from sklearn.neighbors import NearestNeighbors
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error , mean_absolute_error
from tensorflow.keras.layers import Input , Embedding , Flatten , Dense , Concatenate
from tensorflow.keras.models import Model 
from tensorflow.keras.optimizers import Adam
import warnings
warnings.filterwarnings('ignore')
from collections import defaultdict

In [86]:
import pandas as pd
from surprise import Dataset , SVD , Reader , accuracy


In [87]:
from scipy.sparse import csr_matrix


In [88]:
data = Dataset.load_builtin('ml-100k')
raw_ratings = data.raw_ratings
df = pd.DataFrame(raw_ratings , columns=["userId" , "itemId" , "rating","timestamp"])
df.head()

Unnamed: 0,userId,itemId,rating,timestamp
0,196,242,3.0,881250949
1,186,302,3.0,891717742
2,22,377,1.0,878887116
3,244,51,2.0,880606923
4,166,346,1.0,886397596


In [89]:
user_item_matrix = df.pivot(index="userId", columns="itemId", values="rating").fillna(0)
csr_data = csr_matrix(user_item_matrix.values)

In [90]:
#Tuning HyperParameters
from surprise.model_selection import GridSearchCV
param_grid = {
    'n_factors': [20, 50, 100],  # no  of latent features
    'lr_all': [0.002, 0.005, 0.01], #learrning Rate
    'reg_all': [0.02, 0.1, 0.2] #Rgularization strrength penalizes large weights in latent features or biases
}


In [91]:
cv = GridSearchCV(SVD , param_grid, measures=["rmse","mae"] , cv= 3 , n_jobs=-1 , joblib_verbose=1 )

In [92]:
cv.fit(data)

[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    8.4s
[Parallel(n_jobs=-1)]: Done  81 out of  81 | elapsed:   19.5s finished


In [93]:
print(cv.best_score["rmse"])
print(cv.best_params["rmse"])

0.9281127890192854
{'n_factors': 100, 'lr_all': 0.01, 'reg_all': 0.1}


In [94]:
from surprise.model_selection import train_test_split
trainset , testset = train_test_split(data , test_size=0.2 , random_state=42)

In [95]:
best_params = cv.best_params["rmse"]
model_svd = SVD(n_factors = best_params["n_factors"],
                lr_all = best_params["lr_all"], 
                reg_all = best_params["reg_all"])
model_svd.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.SVD at 0x1662350a9c0>

In [96]:
pred = model_svd.test(testset)

In [97]:
print(accuracy.rmse(pred))
print(accuracy.mae(pred))

RMSE: 0.9198
0.91978849480023
MAE:  0.7272
0.7271823475164751


In [98]:
prediction = model_svd.predict(uid='196',iid=302)
print(prediction)

user: 196        item: 302        r_ui = None   est = 3.58   {'was_impossible': False}


In [99]:
print(f"\nPredicted rating by user 196 for item 302: {prediction.est:.2f}")


Predicted rating by user 196 for item 302: 3.58


In [100]:
#For getting a List of all user and item id
all_users = set([str(uid) for (uid, _, _, _) in data.raw_ratings])
all_items = set([str(iid) for (_, iid, _, _) in data.raw_ratings])

In [101]:
train_set_items = set((trainset.to_raw_uid(u) , trainset.to_raw_iid(i)) for (u ,i , _) in trainset.all_ratings())
test_candidates = [(u,i) for u in all_users for i in all_items if (u,i) not in train_set_items]

In [102]:
predictions = model_svd.test([(u,i,0) for (u,i) in test_candidates])

In [106]:
# function for top n recommendation
def get_top_n (predictions ,  n=5 ): 
    top_n = defaultdict(list)
    
    for pred in predictions:
        top_n[pred.uid].append((pred.iid, pred.est))

# sort prediction for each user
    for uid , user_ratings in top_n.items():
        user_ratings.sort(key = lambda x:x[1], reverse = True)
        top_n[uid] = user_ratings[:n]

    return top_n

top_n_recs = get_top_n(predictions , n=5)


# Example: print recommendations for user '196'
print("Top 5 recommendations for user 196:")
for item_id, est_rating in top_n_recs['196']:
    print(f"Item {item_id}: Predicted rating {est_rating:.2f}")

Top 5 recommendations for user 196:
Item 318: Predicted rating 4.46
Item 603: Predicted rating 4.41
Item 483: Predicted rating 4.40
Item 488: Predicted rating 4.39
Item 64: Predicted rating 4.38
