In [1]:
import pandas as pd
import numpy as np
from surprise import KNNWithMeans
from surprise import Dataset
from surprise import accuracy
from surprise import Reader
from surprise.model_selection import train_test_split

In [2]:
ratings = pd.read_csv('./Data/rating.csv')
ratings.head()

Unnamed: 0,User ID,Movie ID,Rating
0,1,1,5
1,1,2,6
2,1,3,7
3,1,4,4
4,1,5,3


In [3]:
reader = Reader(rating_scale = (1,10))
data = Dataset.load_from_df(ratings[['User ID','Movie ID','Rating']],reader)

In [4]:
[trainset, testset] = train_test_split(data,test_size=.15,shuffle=True)

In [5]:
recom = KNNWithMeans(k=50, sim_options={'name':'cosine','user_based':True})
recom.fit(data.build_full_trainset()) #Using full training set and not doing k
test_pred = recom.test(testset)
RMSE = accuracy.rmse(test_pred)

Computing the cosine similarity matrix...
Done computing similarity matrix.
RMSE: 0.4182


In [6]:
print(ratings['Movie ID'].unique())
ratings_Gp = ratings.groupby('User ID')['Movie ID'].unique().reset_index()
ratings_Gp

[1 2 3 4 5 6]


Unnamed: 0,User ID,Movie ID
0,1,"[1, 2, 3, 4, 5]"
1,2,"[1, 3, 5, 6]"
2,3,"[2, 3, 4, 5]"
3,4,"[1, 2, 3, 4, 6]"
4,5,"[1, 3, 4, 5, 6]"


In [7]:
pre=recom.predict(1,6)
print("The rating value for item 6 by user 1 is", pre[3])
pre=recom.predict(2,2)
print("The rating value for item 2 by user 2 is", pre[3])
pre=recom.predict(2,4)
print("The rating value for item 4 by user 2 is", pre[3])
pre=recom.predict(3,1)
print("The rating value for item 1 by user 3 is", pre[3])
pre=recom.predict(3,6)
print("The rating value for item 6 by user 3 is", pre[3])
pre=recom.predict(4,5)
print("The rating value for item 5 by user 4 is", pre[3])
pre=recom.predict(5,2)
print("The rating value for item 2 by user 5 is", pre[3])

The rating value for item 6 by user 1 is 5.394185729496399
The rating value for item 2 by user 2 is 4.817930889083921
The rating value for item 4 by user 2 is 4.095496050876971
The rating value for item 1 by user 3 is 2.182836935492127
The rating value for item 6 by user 3 is 2.7108251104856285
The rating value for item 5 by user 4 is 3.9781864004198617
The rating value for item 2 by user 5 is 3.0035648451555907


In [8]:
pre

Prediction(uid=5, iid=2, r_ui=None, est=3.0035648451555907, details={'actual_k': 3, 'was_impossible': False})

In [9]:
#### Keep a threshold of 4.5 and then display recommendation
for i in range(1, 6):
    movies_notwatch = list(set(ratings['Movie ID'].unique())-set(ratings_Gp['Movie ID'][i-1]))
    for j in movies_notwatch:
        pre=recom.predict(i, j)
        if (pre[3]>4.5):
            print(f"The recommending item {j} to user {i} and the predicted rating is {pre[3]}" )

The recommending item 6 to user 1 and the predicted rating is 5.394185729496399
The recommending item 2 to user 2 and the predicted rating is 4.817930889083921
