In [1]:
import functools
from functools import partial
import itertools
import logging
import math
import os
import pickle
import sys
from pathlib import Path

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import scipy as sp
import pingouin as pg
import seaborn as sns
import yaml

%load_ext autoreload
%autoreload 2

%matplotlib inline

sns.set_context("poster")
sns.set(rc={"figure.figsize": (16, 12.0)})
sns.set_style("whitegrid")

import numpy as np
import pandas as pd
from scipy.stats import kendalltau, rankdata, spearmanr, pearsonr, ttest_rel
import torch.nn.functional as F

pd.set_option("display.max_rows", 120)
pd.set_option("display.max_columns", 120)

logging.basicConfig(level=logging.INFO, stream=sys.stdout)

In [2]:
from lda4rec.datasets import Interactions, DataLoader, random_train_test_split, MOVIELENS_1M,MetaData, get_dataset, items_per_user_train_test_split
import lda4rec.evaluations as lda_eval
from lda4rec.estimators import MFEst, PopEst, LDA4RecEst, SNMFEst
from lda4rec.utils import process_ids, cmp_ranks, Config, split_along_dim_apply, plot_cat
from lda4rec import lda

In [3]:
import pyro
import pyro.distributions as dist
import pyro.optim as optim
import torch
from pyro.distributions import constraints
from pyro.infer import SVI, Predictive, Trace_ELBO, TraceEnum_ELBO, config_enumerate

In [4]:
import neptune.new as neptune
# init dummy neptune to avoid problems with logging
neptune.init(mode='offline') 

offline/eb853f9b-39fc-42db-b963-58e89cfb0691
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


<neptune.new.run.Run at 0x7fe85833b940>

In [5]:
from icecream import ic, install

install()
# configure icecream
def ic_str(obj):
    if hasattr(obj, "shape"):
        return f"{obj} "  #
    else:
        return str(obj)

In [6]:
ic.configureOutput(argToStringFunction=ic_str)

## Evaluation of Interpretation

### Calculating the Statistics

In [32]:
# latent dimensions used in the model
ML_DIM = 64
GB_DIM = 128
AM_DIM = 128 # Check this!!!

In [8]:
results = []

for cfg in get_cfgs_from_path(Path('../configs')):
    cfg_exp = cfg['experiment']
    if not ((cfg_exp['dataset'] == 'movielens-1m' and cfg_exp['est_params']['embedding_dim'] == ML_DIM) 
            or (cfg_exp['dataset'] == 'goodbooks' and cfg_exp['est_params']['embedding_dim'] == GB_DIM)
            or (cfg_exp['dataset'] == 'amazon' and cfg_exp['est_params']['embedding_dim'] == AM_DIM)
           ):
        continue
        
    train, test, data_rng = lda_eval.get_train_test_data(cfg)
    est = lda_eval.load_model(Path('../models'), cfg, train)
    
    v, t, h, b = est.get_lda_params()
    
    cfg['result'] = {}
    cfg_res = cfg['result']
    
    # first experiment
    user_ids, log_probs = lda_eval.cohort_user_interaction_log_probs(train, v, h, rng=data_rng)
    cfg_res['ttest_cohort_user_interaction_train'] = pg.ttest(log_probs[:, 1], log_probs[:, 0], paired=True, alternative='greater')
    user_ids, log_probs = lda_eval.cohort_user_interaction_log_probs(test, v, h, rng=data_rng)
    cfg_res['ttest_cohort_user_interaction_test'] = pg.ttest(log_probs[:, 1], log_probs[:, 0], paired=True, alternative='greater')

    
    # second experiment
    cfg_res['corr_popularity'] = lda_eval.popularity_ranking_corr(train, b)
    
    # third experiment
    emp_pops = lda_eval.get_empirical_pops(train)
    cfg_res['corr_conformity_pop'] = lda_eval.conformity_interaction_pop_ranking_corr(emp_pops, (1/t).numpy(), train)
    cfg_res['corr_conformity_b'] = lda_eval.conformity_interaction_pop_ranking_corr(b, (1/t).numpy(), train)
    
    # fourth experiment
    user_ids, good_twins, bad_twins, rnd_twins = lda_eval.find_good_bad_rnd_twins(v, n_users=2000, rng=data_rng)
    good_jacs = lda_eval.get_twin_jacs(user_ids, good_twins, train)
    bad_jacs = lda_eval.get_twin_jacs(user_ids, bad_twins, train)
    rnd_jacs = lda_eval.get_twin_jacs(user_ids, rnd_twins, train)
    cfg_res['ttest_user_interaction_good_bad_train'] = pg.ttest(good_jacs, bad_jacs, paired=True, alternative='greater')
    cfg_res['ttest_user_interaction_good_rnd_train'] = pg.ttest(good_jacs, rnd_jacs, paired=True, alternative='greater')
    good_jacs = lda_eval.get_twin_jacs(user_ids, good_twins, test)
    bad_jacs = lda_eval.get_twin_jacs(user_ids, bad_twins, test)
    rnd_jacs = lda_eval.get_twin_jacs(user_ids, rnd_twins, test)
    cfg_res['ttest_user_interaction_good_bad_test'] = pg.ttest(good_jacs, bad_jacs, paired=True, alternative='greater')
    cfg_res['ttest_user_interaction_good_rnd_test'] = pg.ttest(good_jacs, rnd_jacs, paired=True, alternative='greater')
    
    results.append(cfg)

  bf10 = 1 / ((1 + t**2 / df)**(-(df + 1) / 2) / integr)
  bf10 = 1 / ((1 + t**2 / df)**(-(df + 1) / 2) / integr)
  bf10 = 1 / ((1 + t**2 / df)**(-(df + 1) / 2) / integr)
  bf10 = 1 / ((1 + t**2 / df)**(-(df + 1) / 2) / integr)
  bf10 = 1 / ((1 + t**2 / df)**(-(df + 1) / 2) / integr)
  bf10 = 1 / ((1 + t**2 / df)**(-(df + 1) / 2) / integr)
  bf10 = 1 / ((1 + t**2 / df)**(-(df + 1) / 2) / integr)
  bf10 = 1 / ((1 + t**2 / df)**(-(df + 1) / 2) / integr)
  bf10 = 1 / ((1 + t**2 / df)**(-(df + 1) / 2) / integr)
  bf10 = 1 / ((1 + t**2 / df)**(-(df + 1) / 2) / integr)


In [9]:
with open('evaluation_results.pickle', 'bw') as fh:
    pickle.dump(results, fh)

In [10]:
with open('evaluation_results.pickle', 'br') as fh:
    results = pickle.load(fh)

### Evaluation of the Statistics

#### Experiment 1

In [50]:
exp1_df = pd.DataFrame([dict(dataset=res['experiment']['dataset'], 
                             train_p=res['result']['ttest_cohort_user_interaction_train']['p-val'].loc['T-test'],
                             train_d=res['result']['ttest_cohort_user_interaction_train']['cohen-d'].loc['T-test'],
                             test_p=res['result']['ttest_cohort_user_interaction_test']['p-val'].loc['T-test'],
                             test_d=res['result']['ttest_cohort_user_interaction_test']['cohen-d'].loc['T-test'],
                        for res in results])
exp1_df = exp1_df.groupby('dataset').agg([np.mean, np.std])
exp1_df.loc['goodbooks'] = exp1_df.loc['goodbooks'] / (2*calc_pairs(GB_DIM))
exp1_df.loc['movielens-1m'] = exp1_df.loc['movielens-1m'] / (2*calc_pairs(ML_DIM))

In [51]:
exp1_df

Unnamed: 0_level_0,corr_cohorts_counts_100,corr_cohorts_counts_100,corr_cohorts_counts_250,corr_cohorts_counts_250,corr_cohorts_counts_1000,corr_cohorts_counts_1000
Unnamed: 0_level_1,mean,std,mean,std,mean,std
dataset,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
goodbooks,0.003396,0.00024,0.104035,0.003396,0.303593,0.003454
movielens-1m,0.023909,0.004051,0.184921,0.009848,0.79127,0.048758


In [97]:
sp.stats.weightedtau(np.arange(1000), np.arange(1000)[::-1], additive=False)

WeightedTauResult(correlation=-1.0, pvalue=nan)

In [92]:
sp.stats.weightedtau(h[:, 1], h[:, 30])

WeightedTauResult(correlation=0.10209813096996548, pvalue=nan)

#### Experiment 2

In [104]:
exp2_df = pd.DataFrame([dict(dataset=res['experiment']['dataset'], 
                             corr_pop=res['result']['corr_popularity'][0],
                             p_val=res['result']['corr_popularity'][1])
                        for res in results])

In [105]:
exp2_df = exp2_df.groupby('dataset').agg([np.mean, np.std])

In [107]:
exp2_df

Unnamed: 0_level_0,corr_pop,corr_pop,p_val,p_val
Unnamed: 0_level_1,mean,std,mean,std
dataset,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
goodbooks,0.224924,0.005144,3.809073e-238,0.0
movielens-1m,0.524872,0.01222,0.0,0.0


#### Experiment 3

In [136]:
exp3_df = pd.DataFrame([dict(dataset=res['experiment']['dataset'], 
                             corr_conf_pop=res['result']['corr_conformity_pop'][0],
                             p_val_conf_pop=res['result']['corr_conformity_pop'][1],
                             corr_conf_b=res['result']['corr_conformity_b'][0],
                             p_val_conf_b=res['result']['corr_conformity_b'][1],)
                        for res in results])

In [137]:
exp3_df = exp3_df.groupby('dataset').agg([np.mean, np.std])

In [138]:
exp3_df

Unnamed: 0_level_0,corr_conf_pop,corr_conf_pop,p_val_conf_pop,p_val_conf_pop,corr_conf_b,corr_conf_b,p_val_conf_b,p_val_conf_b
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std
dataset,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
goodbooks,-0.125499,0.007543,0.0,0.0,-0.102292,0.008147,4.064346e-209,0.0
movielens-1m,0.309979,0.009963,3.070797e-272,0.0,0.428386,0.008681,0.0,0.0


#### Experiment 4

In [147]:
exp4_df = pd.DataFrame([dict(dataset=res['experiment']['dataset'], 
                             good_bad_train_p=res['result']['ttest_user_interaction_good_bad_train']['p-val'].loc['T-test'],
                             good_bad_train_d=res['result']['ttest_user_interaction_good_bad_train']['cohen-d'].loc['T-test'],
                             good_bad_test_p=res['result']['ttest_user_interaction_good_bad_test']['p-val'].loc['T-test'],
                             good_bad_test_d=res['result']['ttest_user_interaction_good_bad_test']['cohen-d'].loc['T-test'],
                             good_rnd_train_p=res['result']['ttest_user_interaction_good_rnd_train']['p-val'].loc['T-test'],
                             good_rnd_train_d=res['result']['ttest_user_interaction_good_rnd_train']['cohen-d'].loc['T-test'],
                             good_rnd_test_p=res['result']['ttest_user_interaction_good_rnd_test']['p-val'].loc['T-test'],
                             good_rnd_test_d=res['result']['ttest_user_interaction_good_rnd_test']['cohen-d'].loc['T-test'])
                        for res in results])

In [148]:
exp4_df = exp4_df.groupby('dataset').agg([np.mean, np.std])

In [149]:
exp4_df

Unnamed: 0_level_0,good_bad_train_p,good_bad_train_p,good_bad_train_d,good_bad_train_d,good_bad_test_p,good_bad_test_p,good_bad_test_d,good_bad_test_d,good_rnd_train_p,good_rnd_train_p,good_rnd_train_d,good_rnd_train_d,good_rnd_test_p,good_rnd_test_p,good_rnd_test_d,good_rnd_test_d
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
dataset,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
goodbooks,0.0,0.0,2.855045,0.05383,8.773512000000001e-62,1.391755e-61,0.573157,0.033733,0.0,0.0,2.052336,0.039997,1.738667e-36,3.872404e-36,0.433807,0.028482
movielens-1m,0.0,0.0,2.112473,0.057101,5.639419e-68,1.261012e-67,0.61397,0.027271,1.000312e-302,0.0,1.379008,0.049892,3.304985e-38,6.773357999999999e-38,0.424972,0.016892


In [150]:
exp4_df.to_latex()

'\\begin{tabular}{lrrrrrrrrrrrrrrrr}\n\\toprule\n{} & \\multicolumn{2}{l}{good\\_bad\\_train\\_p} & \\multicolumn{2}{l}{good\\_bad\\_train\\_d} & \\multicolumn{2}{l}{good\\_bad\\_test\\_p} & \\multicolumn{2}{l}{good\\_bad\\_test\\_d} & \\multicolumn{2}{l}{good\\_rnd\\_train\\_p} & \\multicolumn{2}{l}{good\\_rnd\\_train\\_d} & \\multicolumn{2}{l}{good\\_rnd\\_test\\_p} & \\multicolumn{2}{l}{good\\_rnd\\_test\\_d} \\\\\n{} &             mean &  std &             mean &       std &            mean &           std &            mean &       std &             mean &  std &             mean &       std &            mean &           std &            mean &       std \\\\\ndataset      &                  &      &                  &           &                 &               &                 &           &                  &      &                  &           &                 &               &                 &           \\\\\n\\midrule\ngoodbooks    &              0.0 &  0.0 &         2.8550

In [6]:
ml = get_dataset("movielens-1m")

In [14]:
mzn = get_dataset("amazon")

In [15]:
mzn

<Interactions dataset (38748 users x 27028 items x 1545278 interactions)>

In [8]:
ml

<Interactions dataset (6040 users x 3706 items x 1000208 interactions)>

In [9]:
mzn

<Interactions dataset (39071 users x 111645 items x 2141749 interactions)>

In [10]:
gb = get_dataset("goodbooks")

In [11]:
gb

<Interactions dataset (53424 users x 10000 items x 5976479 interactions)>