In [2]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


In [3]:
data = {
    'User': ['A', 'A', 'A', 'B', 'B', 'C', 'C', 'D'],
    'Movie': ['M1', 'M2', 'M3', 'M1', 'M4', 'M2', 'M3', 'M4'],
    'Rating': [5, 4, 3, 5, 4, 4, 2, 5]
}

df = pd.DataFrame(data)
df


Unnamed: 0,User,Movie,Rating
0,A,M1,5
1,A,M2,4
2,A,M3,3
3,B,M1,5
4,B,M4,4
5,C,M2,4
6,C,M3,2
7,D,M4,5


In [4]:
user_item_matrix = df.pivot_table(
    index='User',
    columns='Movie',
    values='Rating'
).fillna(0)

user_item_matrix


Movie,M1,M2,M3,M4
User,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,5.0,4.0,3.0,0.0
B,5.0,0.0,0.0,4.0
C,0.0,4.0,2.0,0.0
D,0.0,0.0,0.0,5.0


In [5]:
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(
    user_similarity,
    index=user_item_matrix.index,
    columns=user_item_matrix.index
)

user_similarity_df


User,A,B,C,D
User,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,1.0,0.552158,0.695701,0.0
B,0.552158,1.0,0.0,0.624695
C,0.695701,0.0,1.0,0.0
D,0.0,0.624695,0.0,1.0


In [6]:
def recommend_movies(user, matrix, similarity_df):
    similar_users = similarity_df[user].sort_values(ascending=False)[1:]
    recommendations = {}

    for sim_user in similar_users.index:
        for movie in matrix.columns:
            if matrix.loc[user, movie] == 0:
                recommendations[movie] = recommendations.get(movie, 0) + matrix.loc[sim_user, movie]

    return sorted(recommendations.items(), key=lambda x: x[1], reverse=True)

recommend_movies('A', user_item_matrix, user_similarity_df)


[('M4', np.float64(9.0))]

In [7]:
from sklearn.metrics import mean_squared_error

actual = [5, 4, 3]
predicted = [4.5, 4, 3.2]

mse = mean_squared_error(actual, predicted)
mse


0.09666666666666669