In [1]:
import functools
import os
import pickle
import random

import numpy as np
import pandas as pd
import scipy.stats as st
import statsmodels.formula.api as smf


@functools.wraps(smf.ols)
def lm(*args, **kwargs):
    return smf.ols(*args, **kwargs).fit()

In [2]:
recommendee = "Fro116"
confidence_interval = 0.95
delta_sources = ["item", "user"]  # "item", "user"
cross_validate = True  # if true, train linear model on out of sample data

In [3]:
os.chdir(f"../../data/deltas/{recommendee}")

In [4]:
def get_deltas(sources):
    deltas = []
    for i, source in enumerate(sources):
        delta = pickle.load(open(source, "rb"))
        delta = delta.rename({x: x + f"_{i}" for x in delta.columns}, axis=1)
        deltas.append(delta)
    return pd.concat(deltas, axis=1)

In [5]:
def clean_data(df):
    # fill missing data with reasonable defaults
    num_deltas = len([x for x in df.columns if "delta_var" in x])
    for i in range(num_deltas):
        df[f"delta_{i}"] = df[f"delta_{i}"].fillna(0)
        df[f"delta_var_{i}"] = df[f"delta_var_{i}"].fillna(df[f"delta_var_{i}"].max())
    return df

In [6]:
if cross_validate:
    train_df = get_deltas([f"{x}_oos.pkl" for x in delta_sources])
else:
    train_df = get_deltas([f"{x}_is.pkl" for x in delta_sources])
delta_corrs = train_df[[f"delta_{i}" for i in range(len(delta_sources))]].corr()

In [7]:
labelled_data = pickle.load(open("recommendee.pkl", "rb"))
labelled_data = clean_data(labelled_data.merge(train_df, on="anime_id"))

In [8]:
delta_corrs

Unnamed: 0,delta_0,delta_1
delta_0,1.0,0.603876
delta_1,0.603876,1.0


In [9]:
# get model
delta_cols = [f"delta_{i}" for i in range(len(delta_sources))]
formula = "normalized_score ~ 0 +" + " + ".join(delta_cols)
model = lm(formula, labelled_data)
print(model.summary())

                                 OLS Regression Results                                
Dep. Variable:       normalized_score   R-squared (uncentered):                   0.213
Model:                            OLS   Adj. R-squared (uncentered):              0.209
Method:                 Least Squares   F-statistic:                              50.18
Date:                Sun, 16 May 2021   Prob (F-statistic):                    5.16e-20
Time:                        10:41:42   Log-Likelihood:                         -675.07
No. Observations:                 373   AIC:                                      1354.
Df Residuals:                     371   BIC:                                      1362.
Df Model:                           2                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [10]:
df = clean_data(get_deltas([f"{x}_is.pkl" for x in delta_sources]))

In [11]:
blp = pickle.load(open("blp.pkl", "rb"))
df["blp"] = blp
df["score"] = model.predict(df) + df["blp"]
df["delta"] = df["score"] - df["blp"]

In [12]:
# compute error bars
model_vars = pd.DataFrame()
for col in delta_cols:
    i = col.split("_")[1]
    model_vars[f"model_delta_var_{i}"] = (
        (df[f"delta_var_{i}"] + df[f"delta_{i}"] ** 2)
        * (model.bse[f"delta_{i}"] ** 2 + model.params[f"delta_{i}"] ** 2)
    ) - df[f"delta_{i}"] ** 2 * model.params[f"delta_{i}"] ** 2
model_stds = np.sqrt(model_vars)

delta_corrs = delta_corrs.loc[lambda x: (x.index.isin(delta_cols)), delta_cols]
delta_variance = np.sum(
    (model_stds.values @ delta_corrs.values) * model_stds.values, axis=1
)
intercept_variance = 0
if "Intercept" in model.bse:
    intercept_variance = model.bse["Intercept"] ** 2
df["std"] = np.sqrt(delta_variance + intercept_variance)

zscore = st.norm.ppf(1 - (1 - confidence_interval) / 2)
df["score_lower_bound"] = df["score"] - df["std"] * zscore
df["score_upper_bound"] = df["score"] + df["std"] * zscore

  (model_stds.values @ delta_corrs.values) * model_stds.values, axis=1


In [13]:
anime = pd.read_csv("../../AnimeList.csv")
anime = anime[["anime_id", "title", "type"]]
df = df.merge(anime, on="anime_id").set_index("anime_id")

In [14]:
# reorder the columns
cols = [
    "title",
    "type",
    "score",
    "score_lower_bound",
    "score_upper_bound",
    "delta",
    "std",
] + delta_cols
df = df[cols + [x for x in df.columns if x not in cols]]

In [15]:
related_series = pickle.load(open("../../anime_facts/related_series.pkl", "rb"))
df = df.merge(related_series, on="anime_id").set_index("anime_id")

In [16]:
new_recs = df.loc[lambda x: ~x.index.isin(labelled_data.anime_id) & (x["type"] == "TV")]

In [17]:
df.loc[lambda x: (x["delta"] > 0)].sort_values(by="score_lower_bound", ascending=False)[
    :20
]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp,series_id
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
3297,Aria The Origination,TV,8.540956,7.472542,9.60937,0.976059,0.545119,0.024136,0.759445,0.039415,0.020167,7.564898,339
32,Neon Genesis Evangelion: The End of Evangelion,Movie,8.55118,7.316574,9.785786,1.313252,0.629913,-0.075659,1.123164,0.032373,0.003514,7.237928,20
820,Ginga Eiyuu Densetsu,OVA,8.219784,7.265892,9.173676,0.433637,0.486688,-0.239025,0.571503,0.034026,0.02446,7.786147,553
3784,Evangelion: 2.0 You Can (Not) Advance,Movie,8.111472,7.25813,8.964813,0.65182,0.435386,-0.085374,0.602298,0.034534,0.00345,7.459652,1701
34376,Yuuki Yuuna wa Yuusha de Aru: Washio Sumi no S...,Movie,9.421861,7.220235,11.623487,2.414868,1.123299,1.247625,0.765447,0.210185,0.177855,7.006993,8533
30,Neon Genesis Evangelion,TV,8.142163,7.122223,9.162104,1.037116,0.520388,-0.038354,0.866942,0.029934,0.002736,7.105048,20
962,Aria The Natural,TV,8.017385,7.104521,8.930248,0.84105,0.465755,0.155197,0.528418,0.044016,0.013166,7.176334,339
34375,Yuuki Yuuna wa Yuusha de Aru: Washio Sumi no S...,Movie,9.298207,7.097875,11.498539,2.566165,1.122639,1.434469,0.711535,0.234772,0.151562,6.732042,8533
11981,Mahou Shoujo Madoka★Magica Movie 3: Hangyaku n...,Movie,7.877553,7.096853,8.658253,0.554363,0.398324,-0.129674,0.565735,0.024972,0.004683,7.32319,4026
11979,Mahou Shoujo Madoka★Magica Movie 2: Eien no Mo...,Movie,7.688999,7.055306,8.322691,0.373577,0.323318,0.022722,0.278031,0.028575,0.006785,7.315422,4026


In [18]:
new_recs.loc[lambda x: (x["delta"] > 0)].sort_values(
    by="score_lower_bound", ascending=False
).groupby("series_id").first().sort_values(by="score_lower_bound", ascending=False)[:30]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp
series_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1143,Hidamari Sketch x ☆☆☆,TV,7.151099,6.350553,7.951645,0.2386,0.408449,0.070078,0.12549,0.039982,0.025661,6.912499
5698,Gochuumon wa Usagi desu ka??,TV,7.08432,6.249047,7.919592,0.286126,0.426167,0.1921,0.049193,0.057946,0.01908,6.798193
7708,New Game!!,TV,6.928738,6.166266,7.69121,0.162377,0.389023,0.16318,-0.022853,0.046156,0.017397,6.766361
102,Maria-sama ga Miteru: Haru,TV,7.244508,6.159405,8.32961,0.471125,0.553634,0.312245,0.084805,0.082031,0.040952,6.773382
130,Macross,TV,7.056986,6.116359,7.997612,0.207203,0.47992,0.088949,0.082645,0.063236,0.031113,6.849783
4084,Love Live! Sunshine!! 2nd Season,TV,7.353952,6.089932,8.617972,0.896905,0.64492,0.531112,0.220806,0.113491,0.04674,6.457047
346,Ichigo Mashimaro,TV,6.807836,6.069682,7.545989,0.142521,0.376616,0.096119,0.024098,0.043635,0.016405,6.665315
427,Mugen no Ryvius,TV,7.096539,6.046891,8.146188,0.626538,0.535545,0.145769,0.365377,0.059581,0.040354,6.470001
8062,Kemono Friends,TV,7.15709,6.043041,8.271139,0.831109,0.568403,0.214725,0.464653,0.073477,0.033021,6.325981
315,Kaleido Star,TV,7.211081,6.01453,8.407632,0.353094,0.610496,0.402852,-0.094699,0.104251,0.045804,6.857987


In [19]:
new_recs.loc[lambda x: (x["delta"] > 0)].sort_values(
    by="score_lower_bound", ascending=False
)[:30]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp,series_id
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
7062,Hidamari Sketch x ☆☆☆,TV,7.151099,6.350553,7.951645,0.2386,0.408449,0.070078,0.12549,0.039982,0.025661,6.912499,1143
3604,Hidamari Sketch x 365,TV,7.070441,6.291381,7.849501,0.286492,0.397487,0.066893,0.16685,0.037203,0.02351,6.783948,1143
29787,Gochuumon wa Usagi desu ka??,TV,7.08432,6.249047,7.919592,0.286126,0.426167,0.1921,0.049193,0.057946,0.01908,6.798193,5698
11239,Hidamari Sketch x Honeycomb,TV,7.117378,6.242418,7.992338,0.256371,0.446416,0.060675,0.148544,0.039488,0.03717,6.861007,1143
34914,New Game!!,TV,6.928738,6.166266,7.69121,0.162377,0.389023,0.16318,-0.022853,0.046156,0.017397,6.766361,7708
444,Maria-sama ga Miteru: Haru,TV,7.244508,6.159405,8.32961,0.471125,0.553634,0.312245,0.084805,0.082031,0.040952,6.773382,102
1088,Macross,TV,7.056986,6.116359,7.997612,0.207203,0.47992,0.088949,0.082645,0.063236,0.031113,6.849783,130
34973,Love Live! Sunshine!! 2nd Season,TV,7.353952,6.089932,8.617972,0.896905,0.64492,0.531112,0.220806,0.113491,0.04674,6.457047,4084
488,Ichigo Mashimaro,TV,6.807836,6.069682,7.545989,0.142521,0.376616,0.096119,0.024098,0.043635,0.016405,6.665315,346
593,Mugen no Ryvius,TV,7.096539,6.046891,8.146188,0.626538,0.535545,0.145769,0.365377,0.059581,0.040354,6.470001,427


In [20]:
new_recs.loc[lambda x: (x["delta_0"] < 0) & (x["delta_1"] > 0)].sort_values(
    by="score_lower_bound", ascending=False
)[:20]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp,series_id
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
85,Mobile Suit Zeta Gundam,TV,6.997794,5.904261,8.091328,0.172207,0.557935,-0.007264,0.14479,0.080153,0.044852,6.825587,51
182,Tenkuu no Escaflowne,TV,6.589693,5.833683,7.345702,-0.01769,0.385726,-0.058731,0.040878,0.046723,0.01666,6.607382,124
1453,Maison Ikkoku,TV,6.983249,5.803496,8.163002,-0.030707,0.601926,-0.06306,0.034506,0.108358,0.044287,7.013956,894
31771,Amanchu!,TV,6.536529,5.76016,7.312898,0.344053,0.396114,-0.000105,0.275771,0.034315,0.020682,6.192476,7657
147,Kimi ga Nozomu Eien,TV,6.414748,5.741225,7.088271,0.13804,0.343641,-0.01502,0.124684,0.043462,0.008324,6.276708,91
2921,Ashita no Joe 2,TV,7.464072,5.731568,9.196576,0.003255,0.883947,-0.299554,0.283397,0.09444,0.205213,7.460817,1482
2402,Ashita no Joe,TV,7.191851,5.702906,8.680797,0.292037,0.75968,-0.172064,0.395281,0.10387,0.107229,6.899814,1482
26,Texhnolyze,TV,6.405625,5.689446,7.121805,-0.022164,0.365405,-0.255894,0.222104,0.037665,0.011384,6.42779,16
80,Mobile Suit Gundam,TV,6.709501,5.686195,7.732806,0.042484,0.522104,-0.121945,0.148346,0.081311,0.031058,6.667017,51
26165,Yuri Kuma Arashi,TV,6.226857,5.381944,7.071771,0.417619,0.431086,-0.05635,0.387438,0.041349,0.017762,5.809238,6441


In [21]:
new_recs.loc[lambda x: (x["delta_0"] > 0) & (x["delta_1"] < 0)].sort_values(
    by="score_lower_bound", ascending=False
)[:20]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp,series_id
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
34914,New Game!!,TV,6.928738,6.166266,7.69121,0.162377,0.389023,0.16318,-0.022853,0.046156,0.017397,6.766361,7708
30279,Yuru Yuri San☆Hai!,TV,6.854327,6.063762,7.644892,-0.018147,0.403357,0.055982,-0.067015,0.055767,0.015284,6.872475,4245
19111,Love Live! School Idol Project 2nd Season,TV,6.959401,6.045935,7.872868,0.120971,0.466063,0.199912,-0.090461,0.09123,0.011599,6.83843,4084
427,Kaleido Star,TV,7.211081,6.01453,8.407632,0.353094,0.610496,0.402852,-0.094699,0.104251,0.045804,6.857987,315
3750,Maria-sama ga Miteru 4th,TV,7.062796,5.890836,8.234757,0.136802,0.59795,0.250062,-0.124785,0.092832,0.050255,6.925994,102
30727,Saenai Heroine no Sodatekata ♭,TV,6.540713,5.815138,7.266288,-0.133184,0.370198,0.003351,-0.109855,0.046328,0.012445,6.673897,6028
31706,Fate/kaleid liner Prisma☆Illya 3rei!!,TV,6.661921,5.736454,7.587389,0.206876,0.472186,0.193754,-0.015857,0.06998,0.024533,6.455045,265
2986,Bamboo Blade,TV,6.507504,5.70755,7.307458,0.137259,0.408147,0.170997,-0.050306,0.052305,0.018056,6.370245,1842
10278,The iDOLM@STER,TV,6.367018,5.648128,7.085908,-0.13262,0.366787,0.015037,-0.120357,0.0395,0.015522,6.499638,1044
1974,Glass no Kamen (2005),TV,7.081965,5.635505,8.528425,0.051013,0.738003,0.101163,-0.053951,0.153183,0.072656,7.030952,1210


In [22]:
new_recs.loc[lambda x: (x["delta_0"] > 0) & (x["delta_1"] > 0)].sort_values(
    by="score_lower_bound", ascending=False
)[:20]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp,series_id
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
7062,Hidamari Sketch x ☆☆☆,TV,7.151099,6.350553,7.951645,0.2386,0.408449,0.070078,0.12549,0.039982,0.025661,6.912499,1143
3604,Hidamari Sketch x 365,TV,7.070441,6.291381,7.849501,0.286492,0.397487,0.066893,0.16685,0.037203,0.02351,6.783948,1143
29787,Gochuumon wa Usagi desu ka??,TV,7.08432,6.249047,7.919592,0.286126,0.426167,0.1921,0.049193,0.057946,0.01908,6.798193,5698
11239,Hidamari Sketch x Honeycomb,TV,7.117378,6.242418,7.992338,0.256371,0.446416,0.060675,0.148544,0.039488,0.03717,6.861007,1143
444,Maria-sama ga Miteru: Haru,TV,7.244508,6.159405,8.32961,0.471125,0.553634,0.312245,0.084805,0.082031,0.040952,6.773382,102
1088,Macross,TV,7.056986,6.116359,7.997612,0.207203,0.47992,0.088949,0.082645,0.063236,0.031113,6.849783,130
34973,Love Live! Sunshine!! 2nd Season,TV,7.353952,6.089932,8.617972,0.896905,0.64492,0.531112,0.220806,0.113491,0.04674,6.457047,4084
488,Ichigo Mashimaro,TV,6.807836,6.069682,7.545989,0.142521,0.376616,0.096119,0.024098,0.043635,0.016405,6.665315,346
593,Mugen no Ryvius,TV,7.096539,6.046891,8.146188,0.626538,0.535545,0.145769,0.365377,0.059581,0.040354,6.470001,427
33089,Kemono Friends,TV,7.15709,6.043041,8.271139,0.831109,0.568403,0.214725,0.464653,0.073477,0.033021,6.325981,8062
