In [1]:
import functools
import os
import pickle
import random

import numpy as np
import pandas as pd
import scipy.stats as st
import statsmodels.formula.api as smf


@functools.wraps(smf.ols)
def lm(*args, **kwargs):
    return smf.ols(*args, **kwargs).fit()

In [2]:
recommendee = "Fro116"
confidence_interval = 0.99
delta_sources = ["item", "user"]  # "item", "user"
cross_validate = True  # if true, train linear model on out of sample data

In [3]:
os.chdir(f"../../data/deltas/{recommendee}")

In [4]:
def get_deltas(sources):
    deltas = []
    for i, source in enumerate(sources):
        delta = pickle.load(open(source, "rb"))
        delta = delta.rename({x: x + f"_{i}" for x in delta.columns}, axis=1)
        deltas.append(delta)
    return pd.concat(deltas, axis=1)

In [5]:
def clean_data(df):
    # fill missing data with reasonable defaults
    num_deltas = len([x for x in df.columns if "delta_var" in x])
    for i in range(num_deltas):
        df[f"delta_{i}"] = df[f"delta_{i}"].fillna(0)
        df[f"delta_var_{i}"] = df[f"delta_var_{i}"].fillna(df[f"delta_var_{i}"].max())
    return df

In [6]:
if cross_validate:
    train_df = get_deltas([f"{x}_oos.pkl" for x in delta_sources])
else:
    train_df = get_deltas([f"{x}_is.pkl" for x in delta_sources])
delta_corrs = train_df[[f"delta_{i}" for i in range(len(delta_sources))]].corr()

In [7]:
labelled_data = pickle.load(open("recommendee.pkl", "rb"))
labelled_data = clean_data(labelled_data.merge(train_df, on="anime_id"))

In [8]:
delta_corrs

Unnamed: 0,delta_0,delta_1
delta_0,1.0,0.552765
delta_1,0.552765,1.0


In [9]:
# get model
delta_cols = [f"delta_{i}" for i in range(len(delta_sources))]
formula = "normalized_score ~ 0 +" + " + ".join(delta_cols)
model = lm(formula, labelled_data)
print(model.summary())

                                 OLS Regression Results                                
Dep. Variable:       normalized_score   R-squared (uncentered):                   0.247
Model:                            OLS   Adj. R-squared (uncentered):              0.243
Method:                 Least Squares   F-statistic:                              61.14
Date:                Thu, 06 May 2021   Prob (F-statistic):                    1.08e-23
Time:                        23:00:42   Log-Likelihood:                         -658.60
No. Observations:                 375   AIC:                                      1321.
Df Residuals:                     373   BIC:                                      1329.
Df Model:                           2                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [10]:
df = clean_data(get_deltas([f"{x}_is.pkl" for x in delta_sources]))

In [11]:
blp = pickle.load(open("blp.pkl", "rb"))
df["blp"] = blp
df["score"] = model.predict(df) + df["blp"]
df["delta"] = df["score"] - df["blp"]

In [12]:
# compute error bars
model_vars = pd.DataFrame()
for col in delta_cols:
    i = col.split("_")[1]
    model_vars[f"model_delta_var_{i}"] = (
        (df[f"delta_var_{i}"] + df[f"delta_{i}"] ** 2)
        * (model.bse[f"delta_{i}"] ** 2 + model.params[f"delta_{i}"] ** 2)
    ) - df[f"delta_{i}"] ** 2 * model.params[f"delta_{i}"] ** 2
model_stds = np.sqrt(model_vars)

delta_corrs = delta_corrs.loc[lambda x: (x.index.isin(delta_cols)), delta_cols]
delta_variance = np.sum(
    (model_stds.values @ delta_corrs.values) * model_stds.values, axis=1
)
intercept_variance = 0
if "Intercept" in model.bse:
    intercept_variance = model.bse["Intercept"] ** 2
df["std"] = np.sqrt(delta_variance + intercept_variance)

zscore = st.norm.ppf(1 - (1 - confidence_interval) / 2)
df["score_lower_bound"] = df["score"] - df["std"] * zscore
df["score_upper_bound"] = df["score"] + df["std"] * zscore

  (model_stds.values @ delta_corrs.values) * model_stds.values, axis=1


In [13]:
anime = pd.read_csv("../../AnimeList.csv")
anime = anime[["anime_id", "title", "type"]]
df = df.merge(anime, on="anime_id").set_index("anime_id")

In [14]:
df.head()

Unnamed: 0_level_0,delta_0,delta_var_0,delta_1,delta_var_1,blp,score,delta,std,score_lower_bound,score_upper_bound,title,type
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1,-0.467249,0.085123,-0.181562,0.003305,7.646973,7.076186,-0.570787,0.282601,6.348255,7.804117,Cowboy Bebop,TV
5,-0.58307,0.113083,-0.220424,0.006535,7.304627,6.60057,-0.704057,0.341563,5.720761,7.480379,Cowboy Bebop: Tengoku no Tobira,Movie
6,-0.374968,0.13705,-0.389742,0.00529,7.170132,6.385725,-0.784407,0.384008,5.396586,7.374863,Trigun,TV
7,-0.071229,0.53029,-0.258158,0.029067,6.253132,5.857901,-0.395231,0.689256,4.082496,7.633305,Witch Hunter Robin,TV
8,0.0,inf,0.306084,0.370442,5.947662,6.356984,0.409323,inf,-inf,inf,Beet the Vandel Buster,TV


In [15]:
# reorder the columns
cols = [
    "title",
    "type",
    "score",
    "score_lower_bound",
    "score_upper_bound",
    "delta",
    "std",
] + delta_cols
df = df[cols + [x for x in df.columns if x not in cols]]

In [16]:
new_recs = df.loc[lambda x: ~x.index.isin(labelled_data.anime_id) & (x["type"] == "TV")]

In [17]:
df.loc[lambda x: (x["delta"] > 0)].sort_values(
    by="score_lower_bound", ascending=False
)[:20]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3297,Aria The Origination,TV,8.884612,7.513086,10.256138,1.239079,0.53246,0.451652,0.689486,0.151982,0.025926,7.645533
32,Neon Genesis Evangelion: The End of Evangelion,Movie,8.793918,7.401522,10.186314,1.475354,0.540562,-0.000605,1.103561,0.099345,0.004343,7.318564
3784,Evangelion: 2.0 You Can (Not) Advance,Movie,8.498427,7.381906,9.614949,0.95814,0.433461,0.214687,0.603789,0.141216,0.004275,7.540287
30,Neon Genesis Evangelion,TV,8.435128,7.25532,9.614936,1.249445,0.45803,0.174456,0.842739,0.09751,0.003333,7.185683
962,Aria The Natural,TV,8.376139,7.127071,9.625206,1.119169,0.484919,0.713016,0.462627,0.178777,0.016665,7.25697
2001,Tengen Toppa Gurren Lagann,TV,7.765908,7.077825,8.453991,0.109675,0.267131,-0.145418,0.158344,0.081367,0.003116,7.656233
11981,Mahou Shoujo Madoka★Magica Movie 3: Hangyaku n...,Movie,8.064925,7.067232,9.062619,0.661099,0.387329,-0.071634,0.531959,0.105331,0.005885,7.403826
820,Ginga Eiyuu Densetsu,OVA,8.203197,7.022287,9.384106,0.336414,0.458458,-0.521241,0.525167,0.117989,0.021371,7.866783
2759,Evangelion: 1.0 You Are (Not) Alone,Movie,8.103999,7.007976,9.200022,0.958639,0.425503,0.362819,0.526407,0.153981,0.004115,7.14536
2563,Aria The OVA: Arietta,OVA,8.309152,6.981204,9.637099,1.238103,0.515542,0.861488,0.473631,0.182815,0.023215,7.071048


In [18]:
new_recs.loc[lambda x: (x["delta"] > 0)].sort_values(
    by="score_lower_bound", ascending=False
)[:30]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
444,Maria-sama ga Miteru: Haru,TV,7.769043,6.290092,9.247994,0.915025,0.574165,1.05839,0.128683,0.17711,0.055514,6.854018
3604,Hidamari Sketch x 365,TV,7.445105,6.178174,8.712037,0.580521,0.491854,0.63563,0.100457,0.183315,0.030554,6.864584
593,Mugen no Ryvius,TV,7.686111,6.164083,9.208139,1.135475,0.590889,0.944226,0.353457,0.179988,0.05663,6.550637
153,Juuni Kokuki,TV,7.064283,6.067965,8.060601,0.060991,0.386795,0.424859,-0.177403,0.107751,0.019091,7.003291
158,Maria-sama ga Miteru,TV,7.399714,6.05889,8.740538,0.988115,0.520541,1.051517,0.186946,0.148513,0.042268,6.411599
3750,Maria-sama ga Miteru 4th,TV,7.694776,6.03605,9.353503,0.688146,0.643958,1.128924,-0.077997,0.26904,0.059885,7.00663
488,Ichigo Mashimaro,TV,7.085163,6.004322,8.166003,0.339213,0.419609,0.490848,-0.003993,0.144166,0.020661,6.74595
21,One Piece,TV,7.44373,5.97997,8.90749,0.021837,0.568268,0.283343,-0.132399,0.455055,0.008786,7.421893
7062,Hidamari Sketch x ☆☆☆,TV,7.308623,5.969946,8.647301,0.315488,0.519707,0.395628,0.028248,0.208271,0.035696,6.993135
182,Tenkuu no Escaflowne,TV,6.984104,5.954302,8.013906,0.296086,0.399794,0.31314,0.057038,0.118342,0.022051,6.688018


In [19]:
new_recs.loc[lambda x: (x["delta_0"] < 0) & (x["delta_1"] > 0)].sort_values(
    by="score_lower_bound", ascending=False
)[:20]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
32681,Uchuu Patrol Luluco,TV,6.711765,5.596783,7.826746,0.338276,0.432863,-0.150064,0.331727,0.146705,0.018229,6.373488
26,Texhnolyze,TV,6.516369,5.595586,7.437152,0.007943,0.357471,-0.380866,0.205859,0.095917,0.014538,6.508426
634,Koi Kaze,TV,6.481841,5.467052,7.49663,0.213504,0.393966,-0.077875,0.200532,0.10514,0.022156,6.268336
147,Kimi ga Nozomu Eien,TV,6.319061,5.287068,7.351054,-0.038282,0.400645,-0.184693,0.06832,0.177188,0.011066,6.357343
33089,Kemono Friends,TV,7.020587,5.24722,8.793955,0.613971,0.688465,-0.060238,0.490736,0.419147,0.038355,6.406617
4981,Casshern Sins,TV,6.199618,5.211671,7.187565,-0.311254,0.383545,-0.539347,0.050357,0.11255,0.01829,6.510872
5074,Tetsuwan Birdy Decode:02,TV,6.782362,5.207405,8.35732,0.003853,0.611437,-0.076359,0.042962,0.247056,0.060703,6.77851
21273,Gochuumon wa Usagi desu ka?,TV,6.442806,5.186922,7.698691,0.004978,0.487565,-0.025788,0.017259,0.272714,0.015391,6.437828
1454,Kemonozume,TV,6.25714,5.024481,7.4898,-0.107635,0.478549,-0.666587,0.269408,0.139742,0.032858,6.364776
2402,Ashita no Joe,TV,7.256177,5.017904,9.494451,0.275728,0.868953,-0.299183,0.363228,0.392872,0.144073,6.980449


In [20]:
new_recs.loc[lambda x: (x["delta_0"] > 0) & (x["delta_1"] < 0)].sort_values(
    by="score_lower_bound", ascending=False
)[:20]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2164,Dennou Coil,TV,7.020057,6.215939,7.824175,-0.06687,0.312178,0.016037,-0.058422,0.076601,0.012704,7.086927
18195,Little Busters!: Refrain,TV,7.091152,6.08932,8.092983,-0.164334,0.388935,0.183485,-0.219199,0.14479,0.011643,7.255486
153,Juuni Kokuki,TV,7.064283,6.067965,8.060601,0.060991,0.386795,0.424859,-0.177403,0.107751,0.019091,7.003291
3750,Maria-sama ga Miteru 4th,TV,7.694776,6.03605,9.353503,0.688146,0.643958,1.128924,-0.077997,0.26904,0.059885,7.00663
488,Ichigo Mashimaro,TV,7.085163,6.004322,8.166003,0.339213,0.419609,0.490848,-0.003993,0.144166,0.020661,6.74595
21,One Piece,TV,7.44373,5.97997,8.90749,0.021837,0.568268,0.283343,-0.132399,0.455055,0.008786,7.421893
16706,Kami nomi zo Shiru Sekai: Megami-hen,TV,7.016893,5.946582,8.087204,-0.246828,0.415521,0.065404,-0.218904,0.205745,0.007176,7.263721
23623,Non Non Biyori Repeat,TV,7.165178,5.943401,8.386955,-0.016479,0.474324,0.084564,-0.056711,0.274946,0.011924,7.181657
2158,Terra e... (TV),TV,7.080248,5.88436,8.276136,0.051821,0.464273,0.324454,-0.131557,0.131132,0.036298,7.028426
427,Kaleido Star,TV,7.471613,5.820461,9.122764,0.53299,0.641017,0.969124,-0.11014,0.284353,0.056815,6.938623


In [21]:
new_recs.loc[lambda x: (x["delta_0"] > 0) & (x["delta_1"] > 0)].sort_values(
    by="score_lower_bound", ascending=False
)[:20]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
444,Maria-sama ga Miteru: Haru,TV,7.769043,6.290092,9.247994,0.915025,0.574165,1.05839,0.128683,0.17711,0.055514,6.854018
3604,Hidamari Sketch x 365,TV,7.445105,6.178174,8.712037,0.580521,0.491854,0.63563,0.100457,0.183315,0.030554,6.864584
593,Mugen no Ryvius,TV,7.686111,6.164083,9.208139,1.135475,0.590889,0.944226,0.353457,0.179988,0.05663,6.550637
158,Maria-sama ga Miteru,TV,7.399714,6.05889,8.740538,0.988115,0.520541,1.051517,0.186946,0.148513,0.042268,6.411599
7062,Hidamari Sketch x ☆☆☆,TV,7.308623,5.969946,8.647301,0.315488,0.519707,0.395628,0.028248,0.208271,0.035696,6.993135
182,Tenkuu no Escaflowne,TV,6.984104,5.954302,8.013906,0.296086,0.399794,0.31314,0.057038,0.118342,0.022051,6.688018
11239,Hidamari Sketch x Honeycomb,TV,7.403412,5.877084,8.92974,0.461769,0.592558,0.495251,0.085342,0.219284,0.058336,6.941642
1852,Hidamari Sketch,TV,6.936798,5.833046,8.04055,0.547802,0.428504,0.522064,0.135601,0.143081,0.02188,6.388996
1088,Macross,TV,7.454183,5.792012,9.116355,0.523765,0.645296,0.618471,0.067022,0.423378,0.033856,6.930418
34973,Love Live! Sunshine!! 2nd Season,TV,7.83016,5.766042,9.894279,1.292478,0.801341,1.304518,0.281741,0.60018,0.0519,6.537682
