# This file contains miscellaneous code.

## Import local packages, e.g., dice, spotlight, and contant variables setting

In [1]:
import os
import sys

for p in ['../spotlight_ext', '../dice_ext']:
    module_path = os.path.abspath(os.path.join(p))
    if module_path not in sys.path:
        sys.path.append(module_path)

In [2]:
import numpy as np
import scipy.stats as st
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import make_scorer

import torch

In [3]:
models_path = '../models'

## Usage example for spotlight

In [4]:
from spotlight.cross_validation import random_train_test_split
from spotlight.datasets.movielens import get_movielens_dataset
# from spotlight.evaluation import mrr_score
# from spotlight.factorization.implicit import ImplicitFactorizationModel

In [6]:
# dataset = get_movielens_dataset(variant='100K')

# train, test = random_train_test_split(dataset)

# model = ImplicitFactorizationModel(n_iter=3, loss='bpr')
# model.fit(train)

# mrr = mrr_score(model, test)

## **Sequential models** (candidate for our problem)

In [5]:
# from spotlight.cross_validation import user_based_train_test_split
# from spotlight.datasets.synthetic import generate_sequential


# dataset = generate_sequential(num_users=100,
#                               num_items=1000,
#                               num_interactions=10000,
#                               concentration_parameter=0.01,
#                               order=3)

# train, test = user_based_train_test_split(dataset)

dataset = get_movielens_dataset(variant='1M')
train, test = random_train_test_split(dataset, random_state=np.random.RandomState(2020))

max_sequence_length = 20
train = train.to_sequence(max_sequence_length=max_sequence_length)
test = test.to_sequence(max_sequence_length=max_sequence_length)

## train model

In [8]:
from spotlight.sequence.implicit import ImplicitSequenceModel
from spotlight.evaluation import sequence_mrr_score

model = ImplicitSequenceModel(
    batch_size=256,
    embedding_dim=32,
    l2=0.0,
    learning_rate=0.05,
    n_iter=11,
    representation='lstm',
    loss='adaptive_hinge',
#     use_cuda=torch.cuda.is_available(),
    random_state=np.random.RandomState(2020)
)
model.fit(train)

mrr = sequence_mrr_score(model, test)

KeyboardInterrupt: 

## save model

In [None]:
ofile = 'entire_model_1m_20interactions.pt'
torch.save(model, os.path.join(models_path, ofile))

## or load a saved model

In [6]:
ofile = 'entire_model_1m.pt'

model = torch.load(os.path.join(models_path, ofile))

In [7]:
items_interacted = test.sequences[test.user_ids==2][0]

In [8]:
predictions = -model.predict(items_interacted[:-1])
print(f'Item to predict: {items_interacted[-1]}')

Item to predict: 127


In [10]:
next_item_pos = st.rankdata(predictions, method='ordinal')[items_interacted[-1]]
next_item_pos

769

: 

In [None]:
sorted(enumerate(predictions), key=lambda x: x[1])[int(next_item_pos) - 1]

(134, -0.9794686)

## Causal convolutions for sequence-based recommendations

In [None]:
hyperparameters = {
    'embedding_dim': 128,
    'kernel_width': 5,
    'dilation': [1, 2, 4],
    'num_layers': 5,
    'nonlinearity': 'relu',
    'residual': True,
    'loss': 'adaptive_hinge',
    'batch_size': 128,
    'learning_rate': 0.01,
    'l2': 0,
    'n_iter': 50
}

In [None]:
import torch

from spotlight.sequence.implicit import ImplicitSequenceModel
from spotlight.sequence.representations import CNNNet
from spotlight.evaluation import sequence_mrr_score


net = CNNNet(train.num_items,
             embedding_dim=hyperparameters['embedding_dim'],
             kernel_width=hyperparameters['kernel_width'],
             dilation=hyperparameters['dilation'],
             num_layers=hyperparameters['num_layers'],
             nonlinearity=hyperparameters['nonlinearity'],
             residual_connections=hyperparameters['residual'])

model = ImplicitSequenceModel(loss=hyperparameters['loss'],
                              representation=net,
                              batch_size=hyperparameters['batch_size'],
                              learning_rate=hyperparameters['learning_rate'],
                              l2=hyperparameters['l2'],
                              n_iter=hyperparameters['n_iter'],
                              use_cuda=torch.cuda.is_available(),
#                               random_state=random_state
                             )

model.fit(train)

test_mrr = sequence_mrr_score(model, test)
# val_mrr = sequence_mrr_score(model, validation)

In [None]:
print(f'Test MRR {test_mrr}')

Test MRR [0.01612903 0.0060241  0.00290698 0.00218818 0.05263158 0.00220751
 0.01052632 0.0052356  0.03333333 0.00108696 0.05       0.33333333
 0.00411523 0.00108696 0.00116009 0.01754386 0.00115875 0.00174216
 1.         0.00273224 0.00103199 0.01149425 0.01470588 0.00150376
 0.00115741 0.00134771 0.00526316 0.00189036 0.00181488 0.00409836
 0.00537634 0.00101833 0.01075269 0.00168067 1.         0.00232019
 0.00315457 0.01020408 0.00111235 0.00290698 0.125      0.00584795
 0.01149425 0.00970874 0.0013624  0.00161812 0.00175439 0.00308642
 0.00275482 1.         0.00137931 0.00423729 0.16666667 0.00187266
 0.00138122 0.00154799 0.00485437 0.00121951 0.00127877 0.125
 0.00117647 0.00507614 0.00564972 1.         0.01960784 0.00169205
 0.00172414 0.00198807 0.01282051 0.0010989  0.00555556 0.00116686
 0.00247525 0.00689655 0.01492537 0.0037037  0.01315789 0.001321
 0.00143472 0.001287   0.00662252 0.00168919 0.00280899 0.00298507
 0.004      0.03333333 0.00204499 0.0013245  0.00181818 0.00

# Brute-force example on Sequential model

In [None]:
# load trained model
ofile = 'entire_model_1m.pt'
model = torch.load(os.path.join(models_path, ofile))

In [None]:
# initialize input parameters
k = 10 #nb of recommended items
no_interactions = 5 #the number of interactions to consider
user_id = 8 #the user for whom recommendations are generated

In [None]:
#The SelectedInteractions class is defined in the code to represent and manage information related to a set of selected interactions
class SelectedInteractions:
    def __init__(self, p=-1, i=None):
        pos = p
        interactions = i
        
    def __str__(self): 
        items_order = [(n[0], n[1].detach().numpy().flatten()[0]) if isinstance(n[1], torch.Tensor) else (n[0], n[1]) for n in self.items_order]
            
        return (f'Found in iter {self.counter_found_best} with score/in pos {self.score} with interactions {self.interactions}\n'
                f'10-best proposed items {items_order}')
    
    score = 0 #The score associated with the selected interactions.
    pos = -1 #The position of the selected item.
    interactions = [] #The interactions selected.
    items_order = [] #A list of items along with their scores, sorted in descending order.
    counter_found_best = -1 #The iteration number when the best interactions were found.

In [None]:
items_interacted = test.sequences[test.user_ids==user_id][0]
predictions = -model.predict(items_interacted[:no_interactions])

print(f'Given the following interactions {items_interacted[:no_interactions]} for user {user_id} the next most {k} possible items'
      f' to interact with are {list(predictions.argsort()[:k])}')
cand = input('Choose one of the above next interacted items that should become less candidate: ')
try:
    cand = int(cand)
except ValueError:
    print("That's not an int!")

print(f'Current pos of selected item {cand} is {st.rankdata(predictions, method="ordinal")[cand]}\n')

Given the following interactions [227 501 492 454 463] for user 8 the next most 10 possible itemsto interact with are [510, 438, 284, 40, 281, 60, 539, 313, 325, 439]


Choose one of the above next interacted items that should become less candidate:  325


Current pos of selected item 325 is 9



In [None]:
#A loop iterates over the permutations of the user's past interactions. 
# For each permutation, it calculates predictions using the model, applies softmax, and ranks the items based on predictions. It tracks the best interactions and their rankings.
from itertools import permutations
import torch.nn.functional as F


counter = 1
best_inter = SelectedInteractions()

for l in range(1, no_interactions + 1):
    perm = permutations(items_interacted[:no_interactions], l)    

    for i in list(perm):        
        preds = model.predict(i) 
        tensor = torch.from_numpy(preds).float()
        preds = F.softmax(tensor, dim=0)        
        item_pos = st.rankdata(-preds, method='ordinal')[cand]
        if item_pos > best_inter.score:
            best_inter.score = item_pos
            best_inter.interactions = i
            best_inter.items_order = sorted(enumerate(preds), key=lambda x: x[1], reverse=True)[:k]
            best_inter.counter_found_best = counter

        counter += 1
    
print(best_inter, f'\nTotal iterations: {counter}')

Found in iter 2 with score/in pos 180 with interactions (501,)
10-best proposed items [(568, 0.0023526023), (306, 0.002310076), (39, 0.002220974), (892, 0.0020520852), (326, 0.0019568491), (1119, 0.0018849755), (466, 0.0017736341), (86, 0.0017688118), (147, 0.0015101795), (918, 0.0014980054)] 
Total iterations: 326


In [None]:
import cvxpy as cp

# Create two scalar optimization variables.
x = cp.Variable()
y = cp.Variable()

# Create two constraints.
constraints = [x + y == 1,
               x - y >= 1]

# Form objective.
obj = cp.Minimize((x - y)**2)

# Form and solve problem.
prob = cp.Problem(obj, constraints)
prob.solve()  # Returns the optimal value.
print("status:", prob.status)
print("optimal value", prob.value)
print("optimal var", x.value, y.value)

status: optimal
optimal value 1.0
optimal var 1.0 1.570086213240983e-22


In [None]:
# Solves a bounded least-squares problem.

import cvxpy as cp
import numpy

# Problem data.
m = 10
n = 5
numpy.random.seed(1)
A = numpy.random.randn(m, n)
b = numpy.random.randn(m)

# Construct the problem.
x = cp.Variable(n)
objective = cp.Minimize(cp.sum_squares(A @ x - b))
constraints = [0 <= x, x <= 1]
prob = cp.Problem(objective, constraints)

print("Optimal value", prob.solve())
print("Optimal var")
print(x.value) # A numpy ndarray.

Optimal value 4.141338603672535
Optimal var
[-4.95922264e-21  6.07571976e-21  1.34643668e-01  1.24976681e-01
 -4.57130806e-21]


In [None]:
import h5py
filename = "~/spotlight_data/movielens/v0.2.0/movielens_movielens_100K.hdf5"

with h5py.File(os.path.expanduser(filename), "r") as f:
    # List all groups
    print("Keys: %s" % f.keys())
    a_group_key = list(f.keys())[0]

    # Get the data
    data = list(f[a_group_key])

Keys: <KeysViewHDF5 ['item_id', 'rating', 'timestamp', 'user_id']>


In [None]:
os.system("jupyter nbconvert misc.ipynb --to slides")

0

In [None]:
!jupyter nbconvert misc.ipynb --to pdf

[NbConvertApp] Converting notebook misc.ipynb to pdf
[NbConvertApp] Writing 63994 bytes to notebook.tex
[NbConvertApp] Building PDF
[NbConvertApp] Running xelatex 3 times: ['xelatex', 'notebook.tex', '-quiet']
[NbConvertApp] Running bibtex 1 time: ['bibtex', 'notebook']
[NbConvertApp] PDF successfully created
[NbConvertApp] Writing 64232 bytes to misc.pdf


In [None]:
!pwd

/home/vkaff/gits/CFExplainability/notebooks


In [None]:
!ls

archive.ics.uci.edu	    matrix_factorization_for_rec_expl.ipynb
brute_force_rec_expl.ipynb  misc.ipynb
budget_strategies.ipynb     misc.pdf
cvxpy_usage_rec_expl.ipynb  pooling_repr_for_rec_expl.ipynb
Dice_test.ipynb		    README.md
fair_rec		    score_preds.ipynb
helpers.ipynb		    torch_rec


### This code was used to debug the "why-not" explainability mode

In [None]:
import numpy as np
from collections import defaultdict
import scipy.stats as st
# get currently working directory
base_dir = os.getcwd()

# load functions from other notebooks
helpers_file = os.path.join(base_dir, 'helpers.ipynb').replace("\\", "/")
%run $helpers_file


FLOAT_MAX = np.finfo(np.float32).max


target_pos = 5
user_id = 1001
top_k = 10
brute_force_specific_cfs = None
i = 0

# while brute_force_specific_cfs == [] or brute_force_specific_cfs is None:
#     i += 1
#     brute_force_specific_cfs = _find_specific_cfs_(test, pretrained_models['lstm'], get_backend_strategy('brute_force'), 7, False, jaccard_sims_matrix, i, 100000, 10)

brute_force_specific_cfs = _find_specific_cfs_(test, pretrained_models['lstm'], get_backend_strategy('brute_force'), target_pos, False, jaccard_sims_matrix, user_id, 1048576, top_k)

user_sequences = test.sequences[test.user_ids == user_id]
user_sequences = [sequence for sequence in user_sequences if all(value > 0 for value in sequence)]

original_interactions = user_sequences[0]
print("original_interactions", original_interactions)

best_interactions = brute_force_specific_cfs[0].interactions['best']
print("best_interactions", best_interactions)

items_removed = np.setdiff1d(original_interactions, best_interactions)
print("items_removed", items_removed)


original_interactions = list(original_interactions)
predictions = -pretrained_models['lstm'].predict(original_interactions)
predictions[original_interactions] = FLOAT_MAX
predictions[0] = FLOAT_MAX
target_item = predictions.argsort()[min(top_k, target_pos)]
print("target_item", target_item)

print("===========================")
original_interaction_test = [2216, 1678, 509, 3016, 929, 801, 881, 2914, 944, 704, 2284, 252, 2902, 1427, 884, 1437, 2640, 1880, 2451, 271]

predictions_test = -pretrained_models['lstm'].predict(original_interaction_test)
predictions_test[original_interaction_test] = FLOAT_MAX
predictions_test[0] = FLOAT_MAX
target_item_test = predictions_test.argsort()[min(top_k, target_pos)]
# rk_data = st.rankdata(-predictions_test, method='ordinal')
print("target_item", target_item)
print("target_item_test", target_item_test)
print("original_interactions", original_interactions)
print("original_interaction_test", original_interaction_test)
print("============================")

predictions_reverse = -pretrained_models['lstm'].predict(best_interactions)
predictions_reverse[best_interactions] = FLOAT_MAX
pos_target_item_reverse = np.where(predictions_reverse.argsort() == target_item)[0][0]
print("position target item in reverse mode", pos_target_item_reverse)

worst_jaccard_sample = find_sample_with_jaccard(target_item, best_interactions, jaccard_sims_matrix, 20, worst_items = True)
print("worst_jacc", worst_jaccard_sample)

jaccard_sample = find_sample_with_jaccard(target_item, best_interactions, jaccard_sims_matrix, 20, worst_items = False)
print("jacc", jaccard_sample)
rs_sample = find_sample_with_recommender(target_item, best_interactions, pretrained_models['lstm'], 20)
if set(items_removed) <= set(jaccard_sample):
    print("jaccard_sample", jaccard_sample)
    search_info = retrieve_solutions_specific_sequence(user_id, test, pretrained_models['lstm'], get_backend_strategy('brute_force'), 100000, top_k, True, jaccard_sims_matrix, best_interactions, target_item, jaccard_sample)

elif set(items_removed) <= set(rs_sample):
    print("rs_sample", rs_sample)
    search_info = retrieve_solutions_specific_sequence(user_id, test, pretrained_models['lstm'], get_backend_strategy('brute_force'), 100000, top_k, True, jaccard_sims_matrix, best_interactions, target_item, rs_sample)
else:
    worst_jaccard_sample[:len(items_removed)] = items_removed[:len(items_removed)]
    print("modified_sample", worst_jaccard_sample)
    search_info = retrieve_solutions_specific_sequence(user_id, test, pretrained_models['lstm'], get_backend_strategy('brute_force'), 1048576, top_k, True, jaccard_sims_matrix, best_interactions, target_item, worst_jaccard_sample)

print("search_info", search_info[0].interactions['best'])

items_removed2 = np.setdiff1d(search_info[0].interactions['best'], best_interactions)
print("items_removed", items_removed2)

result2 = dict.fromkeys([pos_target_item_reverse])
result2[pos_target_item_reverse] = []
result2[pos_target_item_reverse].extend(search_info)

cnt2 = defaultdict(dict)
no_target_achieved_cases2 = defaultdict(list)


cnt2, no_target_achieved_cases2 = convert_res_to_lists(result2, cnt2, no_target_achieved_cases2, "random_0", True)
print(cnt2)
print(no_target_achieved_cases2)

result = dict.fromkeys([target_pos])
result[target_pos] = []
result[target_pos].extend(brute_force_specific_cfs)

cnt = defaultdict(dict)
no_target_achieved_cases = defaultdict(list)

target_item = 0

cnt, no_target_achieved_cases = convert_res_to_lists(result, cnt, no_target_achieved_cases, "random_0", False)
print(cnt)
print(no_target_achieved_cases)

In [None]:
import numpy as np
from collections import defaultdict

# get currently working directory
base_dir = os.getcwd()

# load functions from other notebooks
helpers_file = os.path.join(base_dir, 'helpers.ipynb').replace("\\", "/")
%run $helpers_file


FLOAT_MAX = np.finfo(np.float32).max

target_pos = 1
user_id = 8
top_k = 10
combo_specific_cfs = None
i = 0

while combo_specific_cfs == [] or combo_specific_cfs is None:
    i += 1
    combo_specific_cfs = _find_specific_cfs_(test, pretrained_models['lstm'], get_backend_strategy('combo'), target_pos, False, jaccard_sims_matrix, i, 1000, 10, alpha=0.5, normalization='default')

# combo_specific_cfs = _find_specific_cfs_(test, pretrained_models['lstm'], get_backend_strategy('combo'), target_pos, False, jaccard_sims_matrix, user_id, 1000, top_k)
print("user_id = ", i)
user_sequences = test.sequences[test.user_ids == user_id]
user_sequences = [sequence for sequence in user_sequences if all(value > 0 for value in sequence)]

original_interactions = user_sequences[0]
print("original_interactions", original_interactions)

best_interactions = combo_specific_cfs[0].interactions['best']
print("best_interactions", best_interactions)

items_removed = np.setdiff1d(original_interactions, best_interactions)
print("items_removed", items_removed)

predictions = -pretrained_models['lstm'].predict(original_interactions)
predictions[original_interactions] = FLOAT_MAX
target_item = predictions.argsort()[min(top_k, target_pos)]
print("target_item", target_item)

predictions_reverse = -pretrained_models['lstm'].predict(best_interactions)
predictions_reverse[best_interactions] = FLOAT_MAX
pos_target_item_reverse = np.where(predictions_reverse.argsort() == target_item)[0][0]
print("position target item in reverse mode", pos_target_item_reverse)

worst_jaccard_sample = find_sample_with_jaccard(target_item, best_interactions, jaccard_sims_matrix, 20, worst_items = True)
print("worst_jacc", worst_jaccard_sample)

jaccard_sample = find_sample_with_jaccard(target_item, best_interactions, jaccard_sims_matrix, 20, worst_items = True)
print("jacc", jaccard_sample)
rs_sample = find_sample_with_recommender(target_item, best_interactions, pretrained_models['lstm'], 20)
if set(items_removed) <= set(jaccard_sample):
    print("jaccard_sample", jaccard_sample)
    search_info = retrieve_solutions_specific_sequence(i, test, pretrained_models['lstm'], get_backend_strategy('combo'), 1000, top_k, True, jaccard_sims_matrix, best_interactions, target_item, jaccard_sample)

elif set(items_removed) <= set(rs_sample):
    print("rs_sample", rs_sample)
    search_info = retrieve_solutions_specific_sequence(i, test, pretrained_models['lstm'], get_backend_strategy('combo'), 1000, top_k, True, jaccard_sims_matrix, best_interactions, target_item, rs_sample)
else:
    worst_jaccard_sample[:len(items_removed)] = items_removed[:len(items_removed)]
    print("modified_sample", worst_jaccard_sample)
    search_info = retrieve_solutions_specific_sequence(i, test, pretrained_models['lstm'], get_backend_strategy('combo'), 1000, top_k, True, jaccard_sims_matrix, best_interactions, target_item, worst_jaccard_sample)

print("search_info", search_info[0].interactions['best'])

result2 = dict.fromkeys([pos_target_item_reverse])
result2[pos_target_item_reverse] = []
result2[pos_target_item_reverse].extend(search_info)

cnt2 = defaultdict(dict)
no_target_achieved_cases2 = defaultdict(list)


cnt2, no_target_achieved_cases2 = convert_res_to_lists(result2, cnt2, no_target_achieved_cases2, "random_0", True)
print(cnt2)
print(no_target_achieved_cases2)

result = dict.fromkeys([target_pos])
result[target_pos] = []
result[target_pos].extend(combo_specific_cfs)

cnt = defaultdict(dict)
no_target_achieved_cases = defaultdict(list)

target_item = 0

cnt, no_target_achieved_cases = convert_res_to_lists(result, cnt, no_target_achieved_cases, "random_0", False)
print(cnt)
print(no_target_achieved_cases)