# _Imports and Opening Datasets_


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
animes_path = "../data/AnimeList.csv"
users_path = "../data/UserList.csv"
reviews_path = "../data/UserAnimeList.parquet"


In [None]:
animes_df = pd.read_csv(animes_path)
users_df = pd.read_csv(users_path)
reviews_df = pd.read_parquet(reviews_path, columns=["username", "anime_id", "my_score"])


In [None]:
animes_df[animes_df["title_english"] == "Bleach"].to_dict(
        orient="records"
    )

# Reviews_df Exploration & Preparation


In [None]:
reviews_df.head()


In [None]:
all_df = pd.merge(
    reviews_df, animes_df[["anime_id", "title"]], how="left", on="anime_id"
)
all_df.head()


In [None]:
count_series = all_df["title"].value_counts()

all_df = all_df[all_df.title.isin(count_series[count_series > 2000].index)]
all_df.head()


In [None]:
reviews_df.drop_duplicates(subset=["username", "anime_id"], keep="last", inplace=True)
all_df.drop_duplicates(subset=["username", "anime_id"], keep="last", inplace=True)

score_matrix_df = all_df.pivot_table(
    index="username", columns="title", values="my_score"
)


In [None]:
# score_matrix_df.to_parquet('../data/score_matrix.parquet')
score_matrix_df = pd.read_parquet("../data/score_matrix.parquet")

In [None]:
def recommendation_system(anime_name):

    # grab user ratings for the a certain anime
    anime_user_ratings = score_matrix_df[anime_name]

    # Use Corrwith as a method to get user correlation
    similar_to_anime = score_matrix_df.corrwith(anime_user_ratings)

    # Clean the null values from both movies
    corr_anime = pd.DataFrame(similar_to_anime, columns=["Correlation"])

    # sort dataframe by correlation and choosing only the top 10 anime
    return corr_anime.sort_values("Correlation", ascending=False).head(10).reset_index()


In [None]:
recommendation_system("Bleach")


In [None]:
recommendation_system("Dragon Ball")


In [None]:
recommendation_system("One Outs")
