In [1]:
import functools
import os
import pickle
import random

import numpy as np
import pandas as pd
import scipy.stats as st
import statsmodels.formula.api as smf


@functools.wraps(smf.ols)
def lm(*args, **kwargs):
    return smf.ols(*args, **kwargs).fit()

In [2]:
recommendee = "Fro116"
confidence_interval = 0.99
delta_sources = ["item", "user"]  # "item", "user"
cross_validate = True  # if true, train linear model on out of sample data

In [3]:
os.chdir(f"../../data/deltas/{recommendee}")

In [4]:
def get_deltas(sources):
    deltas = []
    for i, source in enumerate(sources):
        delta = pickle.load(open(source, "rb"))
        delta = delta.rename({x: x + f"_{i}" for x in delta.columns}, axis=1)
        deltas.append(delta)
    return pd.concat(deltas, axis=1)

In [5]:
def clean_data(df):
    # fill missing data with reasonable defaults
    num_deltas = len([x for x in df.columns if "delta_var" in x])
    for i in range(num_deltas):
        df[f"delta_{i}"] = df[f"delta_{i}"].fillna(0)
        df[f"delta_var_{i}"] = df[f"delta_var_{i}"].fillna(df[f"delta_var_{i}"].max())
    return df

In [6]:
if cross_validate:
    train_df = get_deltas([f"{x}_oos.pkl" for x in delta_sources])
else:
    train_df = get_deltas([f"{x}_is.pkl" for x in delta_sources])
delta_corrs = train_df[[f"delta_{i}" for i in range(len(delta_sources))]].corr()

In [7]:
labelled_data = pickle.load(open("recommendee.pkl", "rb"))
labelled_data = clean_data(labelled_data.merge(train_df, on="anime_id"))

In [8]:
delta_corrs

Unnamed: 0,delta_0,delta_1
delta_0,1.0,0.581461
delta_1,0.581461,1.0


In [9]:
# get model
delta_cols = [f"delta_{i}" for i in range(len(delta_sources))]
formula = "normalized_score ~ 0 +" + " + ".join(delta_cols)
model = lm(formula, labelled_data)
print(model.summary())

                                 OLS Regression Results                                
Dep. Variable:       normalized_score   R-squared (uncentered):                   0.219
Model:                            OLS   Adj. R-squared (uncentered):              0.215
Method:                 Least Squares   F-statistic:                              51.97
Date:                Sat, 15 May 2021   Prob (F-statistic):                    1.27e-20
Time:                        23:32:09   Log-Likelihood:                         -673.66
No. Observations:                 373   AIC:                                      1351.
Df Residuals:                     371   BIC:                                      1359.
Df Model:                           2                                                  
Covariance Type:            nonrobust                                                  
                 coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------

In [10]:
df = clean_data(get_deltas([f"{x}_is.pkl" for x in delta_sources]))

In [11]:
blp = pickle.load(open("blp.pkl", "rb"))
df["blp"] = blp
df["score"] = model.predict(df) + df["blp"]
df["delta"] = df["score"] - df["blp"]

In [12]:
# compute error bars
model_vars = pd.DataFrame()
for col in delta_cols:
    i = col.split("_")[1]
    model_vars[f"model_delta_var_{i}"] = (
        (df[f"delta_var_{i}"] + df[f"delta_{i}"] ** 2)
        * (model.bse[f"delta_{i}"] ** 2 + model.params[f"delta_{i}"] ** 2)
    ) - df[f"delta_{i}"] ** 2 * model.params[f"delta_{i}"] ** 2
model_stds = np.sqrt(model_vars)

delta_corrs = delta_corrs.loc[lambda x: (x.index.isin(delta_cols)), delta_cols]
delta_variance = np.sum(
    (model_stds.values @ delta_corrs.values) * model_stds.values, axis=1
)
intercept_variance = 0
if "Intercept" in model.bse:
    intercept_variance = model.bse["Intercept"] ** 2
df["std"] = np.sqrt(delta_variance + intercept_variance)

zscore = st.norm.ppf(1 - (1 - confidence_interval) / 2)
df["score_lower_bound"] = df["score"] - df["std"] * zscore
df["score_upper_bound"] = df["score"] + df["std"] * zscore

  (model_stds.values @ delta_corrs.values) * model_stds.values, axis=1


In [13]:
anime = pd.read_csv("../../AnimeList.csv")
anime = anime[["anime_id", "title", "type"]]
df = df.merge(anime, on="anime_id").set_index("anime_id")

In [15]:
# reorder the columns
cols = [
    "title",
    "type",
    "score",
    "score_lower_bound",
    "score_upper_bound",
    "delta",
    "std",
] + delta_cols
df = df[cols + [x for x in df.columns if x not in cols]]

In [16]:
new_recs = df.loc[lambda x: ~x.index.isin(labelled_data.anime_id) & (x["type"] == "TV")]

In [17]:
df.loc[lambda x: (x["delta"] > 0)].sort_values(by="score_lower_bound", ascending=False)[
    :20
]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
3297,Aria The Origination,TV,8.577507,7.259643,9.89537,1.012609,0.511627,0.024136,0.759445,0.039415,0.020167,7.564898
32,Neon Genesis Evangelion: The End of Evangelion,Movie,8.610783,7.124759,10.096808,1.372855,0.576911,-0.075659,1.123164,0.032373,0.003514,7.237928
3784,Evangelion: 2.0 You Can (Not) Advance,Movie,8.145666,7.104892,9.186441,0.686015,0.404054,-0.085374,0.602298,0.034534,0.00345,7.459652
820,Ginga Eiyuu Densetsu,OVA,8.260103,7.066259,9.453947,0.473956,0.463479,-0.239025,0.571503,0.034026,0.02446,7.786147
11981,Mahou Shoujo Madoka★Magica Movie 3: Hangyaku n...,Movie,7.912137,6.958112,8.866162,0.588947,0.370376,-0.129674,0.565735,0.024972,0.004683,7.32319
30,Neon Genesis Evangelion,TV,8.187171,6.955523,9.418819,1.082123,0.478156,-0.038354,0.866942,0.029934,0.002736,7.105048
9756,Mahou Shoujo Madoka★Magica,TV,7.701197,6.918146,8.484247,0.339171,0.303999,-0.223311,0.454082,0.017092,0.002482,7.362026
11979,Mahou Shoujo Madoka★Magica Movie 2: Eien no Mo...,Movie,7.701688,6.907411,8.495965,0.386266,0.308358,0.022722,0.278031,0.028575,0.006785,7.315422
962,Aria The Natural,TV,8.03592,6.902361,9.16948,0.859586,0.440076,0.155197,0.528418,0.044016,0.013166,7.176334
9617,K-On! Movie,Movie,7.586856,6.788641,8.385071,0.326063,0.309887,0.111689,0.154857,0.038896,0.005676,7.260793


In [18]:
new_recs.loc[lambda x: (x["delta"] > 0)].sort_values(
    by="score_lower_bound", ascending=False
)[:30]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
7062,Hidamari Sketch x ☆☆☆,TV,7.153846,6.122063,8.185628,0.241346,0.400563,0.070078,0.12549,0.039982,0.025661,6.912499
3604,Hidamari Sketch x 365,TV,7.075402,6.073713,8.077092,0.291454,0.38888,0.066893,0.16685,0.037203,0.02351,6.783948
29787,Gochuumon wa Usagi desu ka??,TV,7.077194,6.005286,8.149102,0.279001,0.416141,0.1921,0.049193,0.057946,0.01908,6.798193
11239,Hidamari Sketch x Honeycomb,TV,7.121739,5.989686,8.253792,0.260733,0.439491,0.060675,0.148544,0.039488,0.03717,6.861007
34914,New Game!!,TV,6.919471,5.93947,7.899473,0.153111,0.38046,0.16318,-0.022853,0.046156,0.017397,6.766361
488,Ichigo Mashimaro,TV,6.804244,5.855621,7.752868,0.13893,0.368279,0.096119,0.024098,0.043635,0.016405,6.665315
1088,Macross,TV,7.056663,5.845558,8.267767,0.20688,0.47018,0.088949,0.082645,0.063236,0.031113,6.849783
444,Maria-sama ga Miteru: Haru,TV,7.233166,5.835349,8.630983,0.459784,0.542667,0.312245,0.084805,0.082031,0.040952,6.773382
19111,Love Live! School Idol Project 2nd Season,TV,6.944944,5.782591,8.107297,0.106514,0.451254,0.199912,-0.090461,0.09123,0.011599,6.83843
593,Mugen no Ryvius,TV,7.10744,5.766853,8.448026,0.637439,0.520449,0.145769,0.365377,0.059581,0.040354,6.470001


In [19]:
new_recs.loc[lambda x: (x["delta_0"] < 0) & (x["delta_1"] > 0)].sort_values(
    by="score_lower_bound", ascending=False
)[:20]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
182,Tenkuu no Escaflowne,TV,6.594651,5.623813,7.565488,-0.012732,0.376903,-0.058731,0.040878,0.046723,0.01666,6.607382
85,Mobile Suit Zeta Gundam,TV,7.005354,5.597014,8.413694,0.179767,0.546752,-0.007264,0.14479,0.080153,0.044852,6.825587
147,Kimi ga Nozomu Eien,TV,6.421694,5.564985,7.278404,0.144987,0.332595,-0.01502,0.124684,0.043462,0.008324,6.276708
31771,Amanchu!,TV,6.550243,5.560566,7.53992,0.357767,0.384217,-0.000105,0.275771,0.034315,0.020682,6.192476
26,Texhnolyze,TV,6.429416,5.520616,7.338216,0.001626,0.352818,-0.255894,0.222104,0.037665,0.011384,6.42779
1453,Maison Ikkoku,TV,6.988106,5.470771,8.505441,-0.02585,0.589067,-0.06306,0.034506,0.108358,0.044287,7.013956
80,Mobile Suit Gundam,TV,6.722951,5.410736,8.035166,0.055934,0.509434,-0.121945,0.148346,0.081311,0.031058,6.667017
2402,Ashita no Joe,TV,7.220074,5.299414,9.140735,0.32026,0.745647,-0.172064,0.395281,0.10387,0.107229,6.899814
2921,Ashita no Joe 2,TV,7.493085,5.230003,9.756167,0.032268,0.878584,-0.299554,0.283397,0.09444,0.205213,7.460817
26165,Yuri Kuma Arashi,TV,6.248925,5.184642,7.313207,0.439687,0.413181,-0.05635,0.387438,0.041349,0.017762,5.809238


In [20]:
new_recs.loc[lambda x: (x["delta_0"] > 0) & (x["delta_1"] < 0)].sort_values(
    by="score_lower_bound", ascending=False
)[:20]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
34914,New Game!!,TV,6.919471,5.93947,7.899473,0.153111,0.38046,0.16318,-0.022853,0.046156,0.017397,6.766361
30279,Yuru Yuri San☆Hai!,TV,6.848207,5.835974,7.860439,-0.024268,0.392973,0.055982,-0.067015,0.055767,0.015284,6.872475
19111,Love Live! School Idol Project 2nd Season,TV,6.944944,5.782591,8.107297,0.106514,0.451254,0.199912,-0.090461,0.09123,0.011599,6.83843
427,Kaleido Star,TV,7.186301,5.646797,8.725806,0.328315,0.597673,0.402852,-0.094699,0.104251,0.045804,6.857987
30727,Saenai Heroine no Sodatekata ♭,TV,6.535085,5.608058,7.462112,-0.138812,0.359894,0.003351,-0.109855,0.046328,0.012445,6.673897
3750,Maria-sama ga Miteru 4th,TV,7.044134,5.534165,8.554103,0.11814,0.586207,0.250062,-0.124785,0.092832,0.050255,6.925994
2986,Bamboo Blade,TV,6.496483,5.469533,7.523434,0.126238,0.398687,0.170997,-0.050306,0.052305,0.018056,6.370245
31706,Fate/kaleid liner Prisma☆Illya 3rei!!,TV,6.65148,5.46271,7.840249,0.196434,0.461509,0.193754,-0.015857,0.06998,0.024533,6.455045
10278,The iDOLM@STER,TV,6.360286,5.438937,7.281634,-0.139352,0.35769,0.015037,-0.120357,0.0395,0.015522,6.499638
1222,Bokura ga Ita,TV,6.421844,5.400334,7.443354,0.057599,0.396575,0.057148,-0.004939,0.060797,0.013973,6.364245


In [21]:
new_recs.loc[lambda x: (x["delta_0"] > 0) & (x["delta_1"] > 0)].sort_values(
    by="score_lower_bound", ascending=False
)[:20]

Unnamed: 0_level_0,title,type,score,score_lower_bound,score_upper_bound,delta,std,delta_0,delta_1,delta_var_0,delta_var_1,blp
anime_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
7062,Hidamari Sketch x ☆☆☆,TV,7.153846,6.122063,8.185628,0.241346,0.400563,0.070078,0.12549,0.039982,0.025661,6.912499
3604,Hidamari Sketch x 365,TV,7.075402,6.073713,8.077092,0.291454,0.38888,0.066893,0.16685,0.037203,0.02351,6.783948
29787,Gochuumon wa Usagi desu ka??,TV,7.077194,6.005286,8.149102,0.279001,0.416141,0.1921,0.049193,0.057946,0.01908,6.798193
11239,Hidamari Sketch x Honeycomb,TV,7.121739,5.989686,8.253792,0.260733,0.439491,0.060675,0.148544,0.039488,0.03717,6.861007
488,Ichigo Mashimaro,TV,6.804244,5.855621,7.752868,0.13893,0.368279,0.096119,0.024098,0.043635,0.016405,6.665315
1088,Macross,TV,7.056663,5.845558,8.267767,0.20688,0.47018,0.088949,0.082645,0.063236,0.031113,6.849783
444,Maria-sama ga Miteru: Haru,TV,7.233166,5.835349,8.630983,0.459784,0.542667,0.312245,0.084805,0.082031,0.040952,6.773382
593,Mugen no Ryvius,TV,7.10744,5.766853,8.448026,0.637439,0.520449,0.145769,0.365377,0.059581,0.040354,6.470001
33089,Kemono Friends,TV,7.16949,5.760714,8.578266,0.843509,0.546921,0.214725,0.464653,0.073477,0.033021,6.325981
14131,Girls & Panzer,TV,6.614022,5.735265,7.492778,0.160222,0.341155,0.068775,0.064104,0.042567,0.011149,6.453799
