In [None]:
import numpy as np
import pandas as pd
from pandas import DataFrame
from tqdm import tqdm


In [None]:
import sys

sys.path.append("../")

from src.constants import MOVIE_PATH_SANDBOX, RATINGS_PATH_SANDBOX
from src.utils import (
    MovieEncoder,
    mean_average_precision,
    normalized_average_precision,
    train_test_split,
)

In [None]:
ratings = pd.read_csv(RATINGS_PATH_SANDBOX)
ratings.head(5)

# Train Test split

In [None]:
%time
train_ratings, test_ratings = train_test_split(ratings)

# Dummy baseline

In [None]:
def calculate_top_movies(df: DataFrame) -> dict:
    # movies_metric = df.groupby('movieId')['rating'].sum()
    movies_metric = df.groupby("movieId")["rating"].count()
    movie_top = movies_metric.sort_values(ascending=False).to_dict()
    return movie_top

In [None]:
top = calculate_top_movies(train_ratings)

In [None]:
encoder = MovieEncoder(movie_csv_path=MOVIE_PATH_SANDBOX)

for idx, (movieId, rating) in enumerate(top.items()):
    print(f"{encoder.to_title(movieId)} с суммой рейтингов {rating}")
    if idx >= 10:
        break

# Check Hit Ratio @ 10

In [None]:
all_movieIds = ratings["movieId"].unique()
top_movieIDs = [idx for idx, _ in top.items()]

In [None]:
"""
Проходимся по списку и если в ТОП10 фильмах есть фильм который он посмотрел => рекоменадция считается УДАЧНОЙ
Считаем среднее по hits
"""
test_user_item_dict = test_ratings.groupby("userId")["movieId"].apply(list).to_dict()

hits = []

for u, i in tqdm(test_user_item_dict.items()):
    # Calculate Hit Ratio
    if i in top_movieIDs[:10]:
        hits.append(1)
    else:
        hits.append(0)
print(f"The Hit Ratio @ 10 is {np.average(hits)}")

In [None]:
# The Hit Ratio @ 10 is 0.0

In [None]:
top_10_recommended_dict = {
    user: top_movieIDs[:10] for (user, _) in test_user_item_dict.items()
}
actual_dict = {user: items for (user, items) in test_user_item_dict.items()}
print(f"MAP@6 is {mean_average_precision(actual_dict, top_10_recommended_dict)}")
print(f"NAP@6 is {normalized_average_precision(actual_dict, top_10_recommended_dict)}")


In [None]:
# MAP@6 is 0.008033498684650784
# NAP@6 is 0.008033498684650784