In [1]:
import pandas as pd
import numpy as np

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.model_selection import train_test_split


In [2]:
data = {
    "user_id": [1,1,1,2,2,2,3,3,3,4,4],
    "item_id": ["Movie A","Movie B","Movie C",
                "Movie A","Movie B","Movie D",
                "Movie B","Movie C","Movie D",
                "Movie A","Movie D"],
    "rating": [5,4,3,4,5,2,4,5,3,5,4]
}

df = pd.DataFrame(data)
df


Unnamed: 0,user_id,item_id,rating
0,1,Movie A,5
1,1,Movie B,4
2,1,Movie C,3
3,2,Movie A,4
4,2,Movie B,5
5,2,Movie D,2
6,3,Movie B,4
7,3,Movie C,5
8,3,Movie D,3
9,4,Movie A,5


In [3]:
user_item_matrix = df.pivot_table(
    index="user_id",
    columns="item_id",
    values="rating"
).fillna(0)

user_item_matrix


item_id,Movie A,Movie B,Movie C,Movie D
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,5.0,4.0,3.0,0.0
2,4.0,5.0,0.0,2.0
3,0.0,4.0,5.0,3.0
4,5.0,0.0,0.0,4.0


In [4]:
user_similarity = cosine_similarity(user_item_matrix)
user_similarity_df = pd.DataFrame(
    user_similarity,
    index=user_item_matrix.index,
    columns=user_item_matrix.index
)

user_similarity_df


user_id,1,2,3,4
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1,1.0,0.843274,0.62,0.552158
2,0.843274,1.0,0.548128,0.651868
3,0.62,0.548128,1.0,0.265036
4,0.552158,0.651868,0.265036,1.0


In [5]:
def recommend_items(user_id, num_recommendations=2):
    similar_users = user_similarity_df[user_id].sort_values(ascending=False)
    similar_users = similar_users.drop(user_id)

    weighted_scores = pd.Series(dtype=float)

    for sim_user, similarity in similar_users.items():
        user_ratings = user_item_matrix.loc[sim_user]
        weighted_scores = weighted_scores.add(
            user_ratings * similarity, fill_value=0
        )

    user_seen_items = user_item_matrix.loc[user_id]
    weighted_scores = weighted_scores[user_seen_items > 0].drop(user_seen_items.index[user_seen_items > 0])

    return weighted_scores.sort_values(ascending=False).head(num_recommendations)


In [6]:
recommend_items(user_id=1)


Unnamed: 0_level_0,0
item_id,Unnamed: 1_level_1


In [7]:
from sklearn.metrics import mean_squared_error

train, test = train_test_split(df, test_size=0.2, random_state=42)

def predict_rating(user_id, item_id):
    if item_id in user_item_matrix.columns:
        user_index = user_item_matrix.index.get_loc(user_id)
        item_index = user_item_matrix.columns.get_loc(item_id)
        return user_item_matrix.iloc[user_index, item_index]
    return 0

y_true = []
y_pred = []

for _, row in test.iterrows():
    y_true.append(row["rating"])
    y_pred.append(predict_rating(row["user_id"], row["item_id"]))

rmse = np.sqrt(mean_squared_error(y_true, y_pred))
print("RMSE:", rmse)


RMSE: 0.0
