# MyAnimeList User Recommendations
* This is a signal based off of MyAnimeList user-submitted 
  recommendations. (See https://myanimelist.net/recommendations.php?s=recentrecs&t=anime)
* The predicted score for a series is the weighted sum over all recommended series that
  the user has seen
* To get the weight between two series, we first construct the undirected adjacency graph of recommendations
* Then, we normalize each edge (i->j) by dividing by degree of i and of j.
* We raise the adjacency matrix to a given power, normalizing at each step, to reduce sparsity
* Finally we apply the transformation weight -> weight^alpha for some fixed alpha

In [1]:
import os
import pickle

import numpy as np
import pandas as pd
from tqdm import tqdm

In [2]:
# TODO read these hyperparameters from a file
# Change this to get recommendations for a different user
recommendee = "Fro116"

# parameters chosen by cross-validation
power = 3
α = 1
symmetric_recommendations = True
signal_name = f"maluserrec{power}"

In [3]:
outdir = f"../../data/recommendations/{recommendee}"
os.chdir(outdir)

In [4]:
anime = pd.read_csv("../../cleaned_data/anime.csv")

In [5]:
user_df = pickle.load(open("user_anime_list.pkl", "rb"))

In [6]:
rec_df = pickle.load(open(f"../../processed_data/mal_user_recs_pow_{power}.pkl", "rb"))

In [7]:
rec_df["weight"] = rec_df["weight"] ** α

In [8]:
user_recs = (
    user_df.set_index("anime_id")
    .merge(rec_df, left_on=["anime_id"], right_on=["source"])
    .drop("source", axis=1)
    .rename({"target": "anime_id"}, axis=1)
)

In [9]:
pred_scores = user_recs.groupby("anime_id").apply(
    lambda x: np.dot(x["score"], x["weight"]) / x["weight"].sum()
)

In [10]:
pred_vars = user_recs.groupby("anime_id").apply(
    lambda x: np.dot(x["score_var"], x["weight"] ** 2) / (x["weight"]).sum() ** 2
)
# TODO apply a bessel correction
#pred_vars = 1

In [11]:
rec_df

Unnamed: 0,source,target,weight
0,1,5,5.115315e-08
1,6,5,2.733039e-09
2,7,5,8.681316e-09
3,8,5,6.587286e-10
4,15,5,9.044291e-11
...,...,...,...
18052357,39113,39511,2.797939e-02
18052358,39114,39511,1.980068e-02
18052359,39113,39112,1.980068e-02
18052360,39114,39112,2.797939e-02


In [12]:
signal = pd.DataFrame()
signal["delta"] = pred_scores
signal["delta_var"] = pred_vars

In [13]:
# This signal does not use the rating of an item
# in its prediction for the score of that item
# so there are no overfitting concerns
signal.to_pickle(f"{signal_name}_loocv.pkl")
signal.to_pickle(f"{signal_name}.pkl")

In [14]:
len(signal)

5944

In [15]:
import functools
import scipy.stats as st
import statsmodels.formula.api as smf
@functools.wraps(smf.ols)
def lm(*args, **kwargs):
    return smf.ols(*args, **kwargs).fit()

In [16]:
pred_df = user_df.merge(signal, on = 'anime_id', how='left').fillna(0)

In [17]:
print(lm('score ~ delta ', pred_df).summary())

                            OLS Regression Results                            
Dep. Variable:                  score   R-squared:                       0.160
Model:                            OLS   Adj. R-squared:                  0.158
Method:                 Least Squares   F-statistic:                     66.16
Date:                Wed, 26 May 2021   Prob (F-statistic):           7.47e-15
Time:                        22:03:41   Log-Likelihood:                -646.19
No. Observations:                 349   AIC:                             1296.
Df Residuals:                     347   BIC:                             1304.
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept      0.0584      0.094      0.621      0.5

In [18]:
len(signal)

5944

In [19]:
signal

Unnamed: 0_level_0,delta,delta_var
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1
1,-0.796281,0.160605
5,-0.857400,0.105230
6,-1.032556,0.101370
7,-1.424144,0.133403
8,-0.476658,0.112314
...,...,...
48759,-0.898856,0.197013
48869,1.223411,1.327175
48961,-0.148136,2.035165
48971,-0.148136,2.035165


### 