In [1]:
import functools
import os
import pickle
import random

import numpy as np
import pandas as pd
import scipy.stats as st
import statsmodels.formula.api as smf


@functools.wraps(smf.ols)
def lm(*args, **kwargs):
    return smf.ols(*args, **kwargs).fit()

In [2]:
recommendee = "mpfei"
confidence_interval = 0.99
delta_sources = ["item", "user"]  # "item", "user"
cross_validate = True  # if true, train linear model on out of sample data

In [3]:
os.chdir(f"../../data/deltas/{recommendee}")

In [4]:
def get_deltas(sources):
    deltas = []
    for i, source in enumerate(sources):
        delta = pickle.load(open(source, "rb"))
        delta = delta.rename({x: x + f"_{i}" for x in delta.columns}, axis=1)
        deltas.append(delta)
    return pd.concat(deltas, axis=1)

In [5]:
def clean_data(df):
    # fill missing data with reasonable defaults
    num_deltas = len([x for x in df.columns if "delta_var" in x])
    for i in range(num_deltas):
        df[f"delta_{i}"] = df[f"delta_{i}"].fillna(0)
        df[f"delta_var_{i}"] = df[f"delta_var_{i}"].fillna(df[f"delta_var_{i}"].max())
    return df

In [6]:
if cross_validate:
    train_df = get_deltas([f"{x}_oos.pkl" for x in delta_sources])
else:
    train_df = get_deltas([f"{x}_is.pkl" for x in delta_sources])
delta_corrs = train_df[[f"delta_{i}" for i in range(len(delta_sources))]].corr()

In [7]:
labelled_data = pickle.load(open("recommendee.pkl", "rb"))
labelled_data = clean_data(labelled_data.merge(train_df, on="anime_id"))

In [8]:
delta_corrs

Unnamed: 0,delta_0,delta_1
delta_0,1.0,0.115057
delta_1,0.115057,1.0


In [9]:
# get model
delta_cols = [f"delta_{i}" for i in range(len(delta_sources))]
formula = "normalized_score ~ 0 +" + " + ".join(delta_cols)
model = lm(formula, labelled_data)
print(model.summary())

                                 OLS Regression Results                                
Dep. Variable:       normalized_score   R-squared (uncentered):                   0.328
Model:                            OLS   Adj. R-squared (uncentered):              0.309
Method:                 Least Squares   F-statistic:                              17.29
Date:                Mon, 03 May 2021   Prob (F-statistic):                    7.62e-07
Time:                        01:45:42   Log-Likelihood:                         -107.84
No. Observations:                  73   AIC:                                      219.7
Df Residuals:                      71   BIC:                                      224.3
Df Model:                           2                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [10]:
df = clean_data(get_deltas([f"{x}_is.pkl" for x in delta_sources]))

In [11]:
blp = pickle.load(open("blp.pkl", "rb"))
df["blp"] = blp
df["score"] = model.predict(df) + df["blp"]
df["delta"] = df["score"] - df["blp"]

In [12]:
# compute error bars
model_vars = pd.DataFrame()
for col in delta_cols:
    i = col.split("_")[1]
    model_vars[f"model_delta_var_{i}"] = (
        (df[f"delta_var_{i}"] + df[f"delta_{i}"] ** 2)
        * (model.bse[f"delta_{i}"] ** 2 + model.params[f"delta_{i}"] ** 2)
    ) - df[f"delta_{i}"] ** 2 * model.params[f"delta_{i}"] ** 2
model_stds = np.sqrt(model_vars)

delta_corrs = delta_corrs.loc[lambda x: (x.index.isin(delta_cols)), delta_cols]
delta_variance = np.sum(
    (model_stds.values @ delta_corrs.values) * model_stds.values, axis=1
)
intercept_variance = 0
if "Intercept" in model.bse:
    intercept_variance = model.bse["Intercept"] ** 2
df["std"] = np.sqrt(delta_variance + intercept_variance)

zscore = st.norm.ppf(1 - (1 - confidence_interval) / 2)
df["score_lower_bound"] = df["score"] - df["std"] * zscore
df["score_upper_bound"] = df["score"] + df["std"] * zscore

In [13]:
anime = pd.read_csv("../../AnimeList.csv")
anime = anime[["anime_id", "title", "type"]]
df = df.merge(anime, on="anime_id").set_index("anime_id")

In [14]:
df.head()

Unnamed: 0_level_0,delta_0,delta_var_0,delta_1,delta_var_1,blp,score,delta,std,score_lower_bound,score_upper_bound,title,type
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,-0.94887,0.074116,-0.548368,0.047713,9.213037,8.286692,-0.926345,0.387905,7.287514,9.285869,Cowboy Bebop,TV
5,-1.039187,0.107591,-0.554419,0.236031,8.870691,7.879783,-0.990908,0.508212,6.570716,9.18885,Cowboy Bebop: Tengoku no Tobira,Movie
6,-0.93861,0.111449,-0.423401,0.104371,8.736196,7.880773,-0.855423,0.424287,6.787882,8.973664,Trigun,TV
7,-0.375174,221553.796517,0.419902,2.83801,7.819196,7.778708,-0.040487,339.145908,-865.80326,881.360676,Witch Hunter Robin,TV
8,0.0,221553.796517,0.700584,54.99451,7.513726,7.872182,0.358456,339.569193,-866.800095,882.544458,Beet the Vandel Buster,TV


In [15]:
# reorder the columns
cols = [
    "title",
    "type",
    "score",
    "score_lower_bound",
    "score_upper_bound",
    "delta",
    "std",
] + delta_cols
df = df[cols + [x for x in df.columns if x not in cols]]

In [16]:
new_recs = df.loc[lambda x: ~x.index.isin(labelled_data.anime_id) & (x["type"] == "TV")]

In [17]:
df.loc[lambda x: (x["delta"] > 0) & ~(x[delta_cols] < 0).all(axis=1)].sort_values(
    by="score_lower_bound", ascending=False
)[:20]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
34798,Yuru Camp△,TV,8.743503,7.300025,10.18698,0.004963,0.560393,0.073089,-0.087519,0.243524,0.427565,8.73854
36563,Megalo Box,TV,8.912101,7.282019,10.542183,0.100235,0.632838,0.466534,-0.424649,0.27162,0.522436,8.811866
34612,Saiki Kusuo no Ψ-nan (TV) 2,TV,9.292166,7.216249,11.368082,0.158399,0.805922,0.127687,0.139742,0.254632,1.267454,9.133766
35838,Shoujo Shuumatsu Ryokou,TV,8.646119,7.180949,10.111289,0.135608,0.568815,0.006024,0.257026,0.148809,0.581654,8.510511
34902,Tsurezure Children,TV,8.361045,7.058057,9.664033,0.059873,0.505852,0.124756,-0.048924,0.1357,0.442567,8.301171
9379,Denpa Onna to Seishun Otoko,TV,8.181758,6.991452,9.372064,0.34231,0.462106,0.242306,0.346729,0.230737,0.158929,7.839448
32491,Kanojo to Kanojo no Neko: Everything Flows,TV,8.51137,6.916675,10.106065,0.326387,0.6191,0.532546,-0.070451,0.471987,0.24188,8.184983
6862,K-On!: Live House!,Special,8.491352,6.909876,10.072828,0.120274,0.613968,0.216915,-0.053457,0.467051,0.266839,8.371078
376,Elfen Lied Special,Special,9.372463,6.8744,11.870525,1.607327,0.969809,2.139781,0.295239,0.765826,0.542522,7.765135
4472,Lucky☆Star: Original na Visual to Animation,OVA,8.583881,6.857254,10.310509,0.122258,0.670319,0.275881,-0.128014,0.516358,0.364862,8.461624


In [18]:
new_recs.loc[lambda x: (x["delta"] > 0)].sort_values(
    by="score_lower_bound", ascending=False
)[:30]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
34798,Yuru Camp△,TV,8.743503,7.300025,10.18698,0.004963,0.560393,0.073089,-0.087519,0.243524,0.427565,8.73854
36563,Megalo Box,TV,8.912101,7.282019,10.542183,0.100235,0.632838,0.466534,-0.424649,0.27162,0.522436,8.811866
34612,Saiki Kusuo no Ψ-nan (TV) 2,TV,9.292166,7.216249,11.368082,0.158399,0.805922,0.127687,0.139742,0.254632,1.267454,9.133766
35838,Shoujo Shuumatsu Ryokou,TV,8.646119,7.180949,10.111289,0.135608,0.568815,0.006024,0.257026,0.148809,0.581654,8.510511
34902,Tsurezure Children,TV,8.361045,7.058057,9.664033,0.059873,0.505852,0.124756,-0.048924,0.1357,0.442567,8.301171
9379,Denpa Onna to Seishun Otoko,TV,8.181758,6.991452,9.372064,0.34231,0.462106,0.242306,0.346729,0.230737,0.158929,7.839448
32491,Kanojo to Kanojo no Neko: Everything Flows,TV,8.51137,6.916675,10.106065,0.326387,0.6191,0.532546,-0.070451,0.471987,0.24188,8.184983
6707,Kuroshitsuji II,TV,8.264492,6.787449,9.741536,0.278832,0.573425,0.406391,0.004407,0.296407,0.365819,7.985661
35968,Wotaku ni Koi wa Muzukashii,TV,8.575313,6.683992,10.466634,0.161528,0.734257,0.657243,-0.558524,0.494655,0.477769,8.413785
4214,Rosario to Vampire Capu2,TV,8.751299,6.635,10.867599,1.027466,0.821599,1.446213,0.08447,0.671285,0.412767,7.723833


In [19]:
new_recs.loc[lambda x: (x["delta_0"] < 0) & (x["delta_1"] > 0)].sort_values(
    by="score_lower_bound", ascending=False
)[:20]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
35843,Gintama.: Porori-hen,TV,8.923681,7.363405,10.483958,-0.07288,0.605738,-0.342207,0.312741,0.318674,0.408597,8.996561
10357,Jinrui wa Suitai Shimashita,TV,8.083897,6.860119,9.307675,-0.225007,0.475101,-0.473961,0.190667,0.143706,0.310119,8.308904
5420,Kemono no Souja Erin,TV,8.484468,6.690762,10.278174,-0.339531,0.696361,-0.557663,0.078172,0.19115,0.894963,8.823999
861,xxxHOLiC,TV,8.183243,6.608194,9.758291,-0.370002,0.611473,-0.544316,0.000863,0.285015,0.473521,8.553245
30694,Dragon Ball Super,TV,7.987599,6.607171,9.368027,0.126656,0.535916,-0.017846,0.27128,0.302624,0.259981,7.860943
3091,xxxHOLiC Kei,TV,8.25903,6.584185,9.933874,-0.643898,0.650216,-0.960647,0.019325,0.244099,0.557292,8.902928
3701,Kaiba,TV,8.041358,6.554854,9.527862,-0.577573,0.577097,-0.95779,0.145152,0.133737,0.489569,8.618932
20057,Space☆Dandy,TV,7.67169,6.554362,8.789017,-0.455388,0.433774,-0.779558,0.146885,0.11089,0.208631,8.127078
23277,Saenai Heroine no Sodatekata,TV,7.577628,6.516233,8.639023,-0.528702,0.41206,-0.784151,0.009706,0.154881,0.109984,8.106329
6802,So Ra No Wo To,TV,8.091471,6.513315,9.669627,0.089606,0.612679,-0.0874,0.291384,0.238775,0.568141,8.001865


In [20]:
new_recs.loc[lambda x: (x["delta"] > 0) & (x[delta_cols] < 0).all(axis=1)].sort_values(
    by="score_lower_bound", ascending=False
)[:30]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1


In [21]:
new_recs.loc[lambda x: x["title"].str.contains("Kimi ga Nozomu Eien")]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
147,Kimi ga Nozomu Eien,TV,7.532449,5.420374,9.644524,-0.390958,0.819959,-0.872144,0.395962,0.714391,0.494816,7.923407


In [22]:
new_recs.loc[lambda x: x["title"].str.contains("Panzer")]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
14131,Girls & Panzer,TV,7.541603,5.015969,10.067237,-0.558896,0.980513,-0.62348,-0.26302,1.374374,0.397245,8.100499


In [23]:
anime_id
33089                                       Kemono Friends
32681                                  Uchuu Patrol Luluco
26                                              Texhnolyze
26165                                     Yuri Kuma Arashi
14131                                       Girls & Panzer
2403                                  Kodomo no Jikan (TV)
593                                        Mugen no Ryvius
3604                                 Hidamari Sketch x 365
147                                    Kimi ga Nozomu Eien
182                                   Tenkuu no Escaflowne
1088                                               Macross
8726                          Soredemo Machi wa Mawatteiru
80                                      Mobile Suit Gundam
1454                                            Kemonozume
85                                 Mobile Suit Zeta Gundam
634                                               Koi Kaze
31771                                             Amanchu!
4981                                         Casshern Sins
2402                                         Ashita no Joe
1852                                       Hidamari Sketch
34973                     Love Live! Sunshine!! 2nd Season
1254                                           Saint Seiya
35240                                   Princess Principal
32526                                Love Live! Sunshine!!
21273                          Gochuumon wa Usagi desu ka?
8861     Yosuga no Sora: In Solitude, Where We Are Leas...
444                             Maria-sama ga Miteru: Haru
36259                                    Pingu in the City
18121                                            Teekyuu 2
23269                                Hello!! Kiniro Mosaic
158                                   Maria-sama ga Miteru
8424                                                   MM!
6165                                White Album 2nd Season
32013                                  Oshiete! Galko-chan
874                                         Digimon Tamers
16051                                        Ro-Kyu-Bu! SS
20473                                            Teekyuu 3
31706                Fate/kaleid liner Prisma☆Illya 3rei!!
12149                                              AKB0048
21435                        Yama no Susume: Second Season

SyntaxError: invalid syntax (<ipython-input-23-e644aea90d4c>, line 2)