In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore")

In [2]:
data = {
    "user_id":[1, 1, 1, 2, 2, 3, 3, 4],
    "movie_id":[101, 102, 103, 101, 103, 101, 104, 102],
    "ratings":[5, 3, 4, 4, 5, 3, 4, 2]
}

In [3]:
ratings = pd.DataFrame(data)

ratings

In [4]:
user_items_matrix = ratings.pivot(index="user_id", columns="movie_id", values="ratings")

In [5]:
uim = user_items_matrix.copy()

In [6]:
user_items_matrix

movie_id,101,102,103,104
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,5.0,3.0,4.0,
2,4.0,,5.0,
3,3.0,,,4.0
4,,2.0,,


In [7]:
user_items_matrix.fillna(0, inplace=True)

In [8]:
user_items_matrix

movie_id,101,102,103,104
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,5.0,3.0,4.0,0.0
2,4.0,0.0,5.0,0.0
3,3.0,0.0,0.0,4.0
4,0.0,2.0,0.0,0.0


In [9]:
uim

movie_id,101,102,103,104
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,5.0,3.0,4.0,
2,4.0,,5.0,
3,3.0,,,4.0
4,,2.0,,


In [10]:
user_similarity = cosine_similarity(user_items_matrix)

In [11]:
user_similarity

array([[1.        , 0.88345221, 0.42426407, 0.42426407],
       [0.88345221, 1.        , 0.37481703, 0.        ],
       [0.42426407, 0.37481703, 1.        , 0.        ],
       [0.42426407, 0.        , 0.        , 1.        ]])

In [12]:
user_similarity_df = pd.DataFrame(user_similarity, index=user_items_matrix.index, 
                                  columns=user_items_matrix.index)

In [13]:
user_similarity_df

user_id,1,2,3,4
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1.0,0.883452,0.424264,0.424264
2,0.883452,1.0,0.374817,0.0
3,0.424264,0.374817,1.0,0.0
4,0.424264,0.0,0.0,1.0


In [14]:
def predict_ratings(user_id, user_items_matrix, user_similarity_df):
    user_ratings = user_items_matrix.iloc[user_id].values
    similarity_score = user_similarity_df[user_id].values
    weighted_sum = np.dot(similarity_score, user_items_matrix.fillna(0))
    similarity_sum = np.sum(np.abs(similarity_score))
    predicted_ratings = weighted_sum / (similarity_sum+1e-10)
    return predicted_ratings

In [15]:
user_id = 1
predicted_ratings = predict_ratings(user_id, uim, user_similarity_df)

In [16]:
predicted_ratings

array([3.58955768, 1.4086954 , 3.08101083, 0.62118173])

In [17]:
movie_ids = user_items_matrix.columns

In [18]:
unrated_movies = uim.loc[user_id].isna()

In [19]:
unrated_movies

movie_id
101    False
102    False
103    False
104     True
Name: 1, dtype: bool

In [20]:
recomended_movies = movie_ids[unrated_movies].values

In [21]:
recomended_movies

array([104], dtype=int64)

In [22]:
recommended_ratings = predicted_ratings[unrated_movies]

In [23]:
recommended_ratings

array([0.62118173])

In [24]:
recommendations = sorted(zip(recomended_movies, recommended_ratings), 
                         key=lambda x: x[1], reverse=True)

In [25]:
for i, j in recommendations:
    print(f"Movie ID : {i}  Ratings : {j}" )

Movie ID : 104  Ratings : 0.6211817289448168
