In [None]:
"""# Import Libraries"""

import math
import warnings
import pandas as pd
import numpy as np

warnings.simplefilter(action='ignore', category=FutureWarning)  # skip Future Warnings for a more clean log
warnings.simplefilter(action='ignore', category=RuntimeWarning)  # skip Runtime Warnings for a more clean log


In [None]:
"""# Load Datasets"""

ratings = pd.read_csv('ml-latest-small/ratings.csv')
movies = pd.read_csv('ml-latest-small/movies.csv')
dataset = pd.merge(ratings, movies, on='movieId', how='inner')
dataset = dataset[:20000]



In [None]:
"""# User Based Recommendation Systems"""


def user_based_sim(rates_by_a, rates_by_b):
    rate_avg_a = rates_by_a.rating.mean()
    rate_avg_b = rates_by_b.rating.mean()

    similar_rated_movies = list(np.intersect1d(rates_by_a.movieId, rates_by_b.movieId))

    if len(similar_rated_movies) == 0:
        return 0

    fraction_top = 0
    fraction_bottom_a = 0
    fraction_bottom_b = 0

    for mid in similar_rated_movies:
        rate_of_a = rates_by_a[rates_by_a.movieId == mid].rating.mean()
        rate_of_b = rates_by_b[rates_by_b.movieId == mid].rating.mean()

        fraction_top += (rate_of_a - rate_avg_a) * (rate_of_b - rate_avg_b)
        fraction_bottom_a += (rate_of_a - rate_avg_a) ** 2
        fraction_bottom_b += (rate_of_b - rate_avg_b) ** 2

    frac = fraction_top / (math.sqrt(fraction_bottom_a) * math.sqrt(fraction_bottom_b))
    return 1 if np.isnan(frac) else frac


def user_based_pred(rates_by_a, p):
    if rates_by_a.empty:
        return 0

    rate_of_a = rates_by_a[rates_by_a.movieId == p].rating
    if not rate_of_a.empty:  # if already rated to this movie use his own rate
        return rate_of_a.mean()

    rate_avg_a = rates_by_a.rating.mean()

    N = dataset[dataset.movieId == p].userId

    fraction_top = 0
    fraction_bottom = 0

    for b in N:
        rates_by_b = dataset[dataset.userId == b]
        rate_avg_b = rates_by_b.rating.mean()
        rate_of_b = rates_by_b[rates_by_b.movieId == p].rating.mean()

        similarity = user_based_sim(rates_by_a, rates_by_b)
        fraction_top += similarity * (rate_of_b - rate_avg_b)
        fraction_bottom += abs(similarity)

    try:
        frac = fraction_top / fraction_bottom
        frac = 0 if np.isnan(frac) else frac
    except ZeroDivisionError:
        frac = 0

    return min([5, rate_avg_a + frac])


def user_preferences(a):
    rates_by_a = dataset[dataset.userId == a]
    avg_rating = rates_by_a.rating.mean()

    # Predict Not Watched Items
    not_watched = dataset.copy().drop(rates_by_a, axis=1)  # Drop Watched Items

    scores = {}
    for movieId in dataset.movieId.unique():
        rate_of_a = rates_by_a[rates_by_a.movieId == movieId]
        if not rate_of_a.empty:
            scores[movieId] = rates_by_a.rating.mean()
            continue

        scores[movieId] = user_based_pred(rates_by_a, movieId)

    # Convert dictionary to pandas dataframe
    scores = pd.DataFrame(scores.items(), columns=['movieId', 'rating'])
    scores = pd.merge(scores, movies, on='movieId', how='inner')
    scores["genres"] = scores["genres"].str.split("|", expand=False)

    scores = scores.sort_values(by='rating', ascending=False)

    return scores


In [None]:
"""# Group Based Recommendation Systems"""


def group_based_pred(preferences_array):
    scores = {}
    for movieId in dataset.movieId.unique():
        rates = []
        for user_pref in preferences_array:
            rates.append(user_pref[user_pref.movieId == movieId].rating.mean())

        scores[movieId] = sum(rates) / len(rates)

    # Convert dictionary to pandas dataframe
    scores = pd.DataFrame(scores.items(), columns=['movieId', 'rating'])
    scores = pd.merge(scores, movies, on='movieId', how='inner')
    scores["genres"] = scores["genres"].str.split("|", expand=False)

    scores = scores.sort_values(by='rating', ascending=False)

    return scores



In [None]:
"""# Interface """


def select_group_interface():
    userIds = dataset.userId.unique()
    print(f"Dataset UserIds: {min(userIds)}-{max(userIds)}")
    group_users = [int(x) for x in input("Enter Group User Ids seperated by space: ").split(" ")]

    print("Making Predictions, may take a few minutes...")
    user_top_preferences = {uid: user_preferences(uid) for uid in group_users}
    group_top_preferences = group_based_pred(user_top_preferences.values()).reset_index()

    print("Top 20 Recommendation for Selected Group:")
    print(group_top_preferences.head(20))

    return user_top_preferences, group_top_preferences


def why_not_interface(user_top_preferences=None, group_top_preferences=None):
    while True:
        if user_top_preferences is None or group_top_preferences is None:  # no group selected
            # send to select a group
            user_top_preferences, group_top_preferences = select_group_interface()

        top_20 = group_top_preferences[:20]
        top_100 = group_top_preferences[:100]

        print("\n \n \n \n ------ ")
        print("Select Question Type")
        print("1. Why Not a specific film?")
        print("2. Why Not a specific genre?")
        print("3. Why Not a specific film is not n-th?")
        print("4. Change The User Group")
        print("5. Exit")
        q_type = input()

        if q_type == "1":
            film_name = input("Enter Film Name: ")

            # check if it is not included in dataset
            if dataset[dataset.title == film_name].empty:
                print(f"Because Film *{film_name}* is not included in our database yet!")
                continue

            film_rate = group_top_preferences[group_top_preferences.title == film_name].rating.item()
            film_rank = group_top_preferences[group_top_preferences.title == film_name].index.item() + 1

            # check if it has low rank in group recommendations

            if film_rank <= 50:
                print(f"Because Film *{film_name}* is ranked as {film_rank}th item of our recommendations "
                      f"with rate {round(film_rate, 5)}!")
                continue

            # check if no one likes this film
            userIds = user_top_preferences.keys()
            liked = [not user_top_preferences[uid][:50].where(lambda x: x.title == film_name).empty for uid in userIds]
            if True not in liked:
                print(f"Because No one of users likes *{film_name}*!")
                continue

            # finally
            recommended_max_rate = top_20.rating.max()
            recommended_min_rate = top_20.rating.min()
            print(f"Because Film *{film_name}* has a low rating: {round(film_rate, 5)}, "
                  f"but rating range of recommended list is between "
                  f"[{round(recommended_min_rate, 5)}, {round(recommended_max_rate, 5)}].")
            continue

        if q_type == "2":
            dataset_genres = dataset.genres.explode().unique()
            recommended_genres = group_top_preferences.genres.explode().unique()
            genre_name = input("Enter Film Genre: ")

            # check if it is not included in dataset
            if genre_name in recommended_genres:
                print(f"Wrong! there is some *{genre_name}* films in the recommendation!")
                continue

            # check if it is not included in dataset
            if genre_name not in dataset_genres:
                print(f"Because there is no film with genre *{genre_name}* in our database yet!")
                continue

            likes = {x: 0 for x in dataset_genres}
            for idx in top_100.index:
                movie_rate = top_100.iloc[idx].rating.item()
                movie_genres = top_100.iloc[idx].genres
                for genre in movie_genres:
                    likes[genre] += movie_rate

            all_genres_likes = sum(likes.values())
            likes = {genre: scores / all_genres_likes for genre, scores in likes.items()}

            print(f"Because interest rate of group about genre *{genre_name}* is {round(100 * likes[genre_name], 3)}%")
            continue

        if q_type == "3":
            film_name = input("Enter Film Name: ")

            # check if it is not included in recommendations
            if top_20[top_20.title == film_name].empty:
                print(f"Wrong! Film *{film_name}* is not included in the recommendation list!")
                continue

            # check if requested rank is wrong
            film_rank = int(input(f"Why *{film_name}* is not in which rank? "))
            if film_rank > 20:
                print(f"Wrong! our recommendation list size is {20} and you requested for {film_rank}th item!")
                continue

            requested_rank_rate = group_top_preferences[group_top_preferences.title == film_name].rating.item()
            our_suggestion_film_name = group_top_preferences.iloc[film_rank - 1].title
            our_suggestion_film_rate = group_top_preferences.iloc[film_rank - 1].rating.item()

            if our_suggestion_film_rate >= requested_rank_rate:
                how_much_is_better = (our_suggestion_film_rate - requested_rank_rate) / requested_rank_rate
                print(f"Because our {film_rank}th suggestion, *{our_suggestion_film_name}*, is "
                      f"{round(100 * how_much_is_better, 3)}% better than *{film_name}*.")
            else:
                how_much_is_better = (requested_rank_rate - our_suggestion_film_rate) / our_suggestion_film_rate
                print(f"Because rate of {film_rank}nth *{film_name}* is {round(requested_rank_rate, 5)} and it is "
                      f"{round(100 * how_much_is_better, 3)}% better than *{our_suggestion_film_name}* with rank "
                      f"{round(our_suggestion_film_rate, 5)}.")

            continue

        if q_type == "4":
            user_top_preferences = None
            group_top_preferences = None

        if q_type == "5":
            return



In [None]:
"""# Run"""
why_not_interface()
