In [None]:
import gzip, json
import fsspec
import math
import random
import bottleneck as bn
import pandas as pd
import numpy as np
import pickle5 as pickle

from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from scipy import sparse
from collections import Countershrinkage = 20


In [None]:
from tensorflow.keras.layers import Input, Concatenate, Dense, Dropout, Embedding, Flatten, Dot#, MultiHeadAttention, Attention
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import L1L2, l1, l2
from tensorflow.keras.callbacks import Callback
import tensorflow as tf

## MovieLens

#### Movielens Hetrec: 
This dataset is an extension of MovieLens10M dataset and links the movies of MovieLens dataset with their corresponding web pages at Internet Movie Database (IMDb) and Rotten Tomatoes movie review systems. As item features, we kept only movie years, genres, actors, directors, countries, and locations. The preferences have been binarized considering all the ratings greater or equal to 3.5 as positive preferences."


### All MovieLens Hetrec features processed in the NFC paper are treated categorical features

In [None]:
#1- movie tags
path = "data/raw-datasets/MovielensHetrec/hetrec2011-movielens-2k-v2/movie_tags.dat"
#write a script to load dt data: data = load(<path>)
lines = data.split("\n")
final_list = []
for line in lines:
    line = line.replace("\r", "")
    final_list.append(line.split("\t"))
# Create the pandas DataFrame
df_movie_tags = pd.DataFrame(final_list[1:], columns = final_list[0])

#2- movies (year but also anything relevant)
path = "data/raw-datasets/MovielensHetrec/hetrec2011-movielens-2k-v2/movies.dat"
#write a script to load dt data: data = load(<path>)
lines = data.split("\n")
final_list = []
for line in lines:
    line = line.replace("\r", "")
    final_list.append(line.split("\t"))
# Create the pandas DataFrame
df_movies = pd.DataFrame(final_list[1:], columns = final_list[0])

#3- movie genres
path = "data/raw-datasets/MovielensHetrec/hetrec2011-movielens-2k-v2/movie_genres.dat"
#write a script to load dt data: data = load(<path>)
lines = data.split("\n")
final_list = []
for line in lines:
    line = line.replace("\r", "")
    final_list.append(line.split("\t"))
# Create the pandas DataFrame
df_movie_genres = pd.DataFrame(final_list[1:], columns = final_list[0])

#4- movie actors
path = "data/raw-datasets/MovielensHetrec/hetrec2011-movielens-2k-v2/movie_actors.dat"
#write a script to load dt data: data = load(<path>)
lines = data.split("\n")
final_list = []
for line in lines:
    line = line.replace("\r", "")
    final_list.append(line.split("\t"))
# Create the pandas DataFrame
df_movie_actors = pd.DataFrame(final_list[1:], columns = final_list[0])

#5- movie directors
path = "data/raw-datasets/MovielensHetrec/hetrec2011-movielens-2k-v2/movie_directors.dat"
#write a script to load dt data: data = load(<path>)
lines = data.split("\n")
final_list = []
for line in lines:
    line = line.replace("\r", "")
    final_list.append(line.split("\t")) 
# Create the pandas DataFrame
df_movie_directors = pd.DataFrame(final_list[1:], columns = final_list[0])

path = "data/raw-datasets/MovielensHetrec/hetrec2011-movielens-2k-v2/movie_countries.dat"
#write a script to load dt data: data = load(<path>)
lines = data.split("\n")
final_list = []
for line in lines:
    line = line.replace("\r", "")
    final_list.append(line.split("\t"))  
# Create the pandas DataFrame
df_movie_countries = pd.DataFrame(final_list[1:], columns = final_list[0])

#5- movie locations
key = "data/raw-datasets/MovielensHetrec/hetrec2011-movielens-2k-v2/movie_locations.dat"
#write a script to load dt data: data = load(<path>)
lines = data.split("\n")
final_list = []
for line in lines:
    line = line.replace("\r", "")
    final_list.append(line.split("\t"))  
# Create the pandas DataFrame
df_movie_locations = pd.DataFrame(final_list[1:], columns = final_list[0])

In [None]:
#get click data 
path = "data/splits/MovielensHetrec/original/implicit_3.0/kcore_user_5_item_5_5_feature_5_reshaped/ICM_all.npz"
df_icm = sparse.load_npz(<path>)

path = "data/splits/MovielensHetrec/original/implicit_3.0/kcore_user_5_item_5_5_feature_5_reshaped/cold_items_holdout_0.80_0.00_0.20_testtreshold_0.0_no_cold_users/URM_all_0_train.npz"
df_urm = sparse.load_npz(<path>)

key = "kdd-data/splits/MovielensHetrec/original/implicit_3.0/kcore_user_5_item_5_5_feature_5_reshaped/cold_items_holdout_0.80_0.00_0.20_testtreshold_0.0_no_cold_users/URM_all_0_test.npz"
df_urm_test = sparse.load_npz(<path>)


In [None]:
#implement read_pickle method
urm_all = read_pickle("data/splits/MovielensHetrec/original/implicit_3.0/kcore_user_5_item_5_5_feature_5_reshaped/URM_all_mapper")
icm_all = read_pickle("data/splits/MovielensHetrec/original/implicit_3.0/kcore_user_5_item_5_5_feature_5_reshaped/ICM_all_mapper")


In [None]:
#urm_all is ratings matrix indexes, nb_users x number of movies 
dict_movie_indexes = urm_all[1] 
dict_user_indexes = urm_all[0]

dict_index_movies = {}
for key in dict_movie_indexes:
    dict_index_movies[dict_movie_indexes[key]]=key

movieIDs = df_movies.id


In [None]:
#prepare ratings and metadata matrices (train, vad and test data)

#tags, years, genres, actors (no filtering for countries, directors)
dict_tags = {}
for i in range(len(df_movie_tags)):
    ID = df_movie_tags.movieID.iloc[i]
    item = df_movie_tags.tagID.iloc[i]
    if not dict_tags.__contains__(ID):
        dict_tags[ID] = []
        dict_tags[ID].append(item)
    else:
        dict_tags[ID].append(item)

dict_years = {}
for i in range(len(df_movies)):
    ID = df_movies.id.iloc[i]
    item = df_movies.year.iloc[i]
    if not dict_years.__contains__(ID):
        dict_years[ID] = []
        dict_years[ID].append(item)
    else:
        dict_years[ID].append(item)
        
dict_genres = {}
for i in range(len(df_movie_genres)):
    ID = df_movie_genres.movieID.iloc[i]
    item = df_movie_genres.genre.iloc[i]
    if not dict_genres.__contains__(ID):
        dict_genres[ID] = []
        dict_genres[ID].append(item)
    else:
        dict_genres[ID].append(item)
    
dict_actors = {}
for i in range(len(df_movie_actors)):
    ID = df_movie_actors.movieID.iloc[i]
    item = df_movie_actors.actorName.iloc[i]
    if not dict_actors.__contains__(ID):
        dict_actors[ID] = []
        dict_actors[ID].append(item)
    else:
        dict_actors[ID].append(item)
        
dict_directors = {}
for i in range(len(df_movie_directors)):
    ID = df_movie_directors.movieID.iloc[i]
    item = df_movie_directors.directorName.iloc[i]
    if not dict_directors.__contains__(ID):
        dict_directors[ID] = []
        dict_directors[ID].append(item)
    else:
        dict_directors[ID].append(item)
        
dict_countries = {}
for i in range(len(df_movie_countries)):
    ID = df_movie_countries.movieID.iloc[i]
    item = df_movie_countries.country.iloc[i]
    if not dict_countries.__contains__(ID):
        dict_countries[ID] = []
        dict_countries[ID].append(item)
    else:
        dict_countries[ID].append(item)
        
dict_location1={}
dict_location2={}
dict_location3={}

for i in range(len(df_movie_locations)):
    ID = df_movie_locations.movieID.iloc[i]
    item1 = df_movie_locations.location1.iloc[i]
    item2 = df_movie_locations.location2.iloc[i]
    item3 = df_movie_locations.location3.iloc[i]
    if not dict_location1.__contains__(ID):
        dict_location1[ID] =[]
        dict_location1[ID].append(item1)
        dict_location2[ID] =[]
        dict_location2[ID].append(item2)
        dict_location3[ID] =[]
        dict_location3[ID].append(item3)
        
    else:
        if item1 not in dict_location1[ID]:
            dict_location1[ID].append(item1)
        if item2 not in dict_location2[ID]:
            dict_location2[ID].append(item2)
        if item3 not in dict_location3[ID]:
            dict_location3[ID].append(item3)

In [None]:
list_tags = []
list_years = []
list_genres = []
list_actors = []
list_directors = []
list_countries = []
list_location1 = []
list_location2 = []
list_location3 = []

for key in dict_index_movies:
    
    #tags
    if dict_tags.__contains__(dict_index_movies[key]):
        list_tags.append(dict_tags[dict_index_movies[key]])
    else:
        list_tags.append([])
        
    #years
    if dict_years.__contains__(dict_index_movies[key]):
        list_years.append([int(dict_years[dict_index_movies[key]][0])])
    else:
        list_years.append([])
        
    #genres
    if dict_genres.__contains__(dict_index_movies[key]):
        list_genres.append(dict_genres[dict_index_movies[key]])
    else:
        list_genres.append([])
        
    #actors
    if dict_actors.__contains__(dict_index_movies[key]):
        list_actors.append(dict_actors[dict_index_movies[key]])
    else:
        list_actors.append([])
        
    #directors
    if dict_directors.__contains__(dict_index_movies[key]):
        list_directors.append(dict_directors[dict_index_movies[key]])
    else:
        list_directors.append([])
        
    #countries
    if dict_countries.__contains__(dict_index_movies[key]):
        list_countries.append(dict_countries[dict_index_movies[key]])
    else:
        list_countries.append([])
        
    #location1
    if dict_location1.__contains__(dict_index_movies[key]):
        list_location1.append(dict_location1[dict_index_movies[key]])
    else:
        list_location1.append([])
        
    #location2
    if dict_location2.__contains__(dict_index_movies[key]):
        list_location2.append(dict_location2[dict_index_movies[key]])
    else:
        list_location2.append([])
        
    #location3
    if dict_location3.__contains__(dict_index_movies[key]):
        list_location3.append(dict_location3[dict_index_movies[key]])
    else:
        list_location3.append([])

In [None]:
total_list_actors = []
for temp_list in list_actors:
    total_list_actors.extend(temp_list)

total_list_directors = []
for temp_list in list_directors:
    total_list_directors.extend(temp_list)
    
total_list_genres = []
for temp_list in list_genres:
    total_list_genres.extend(temp_list)
    
total_list_tags = []
for temp_list in list_tags:
    total_list_tags.extend(temp_list)
    
total_list_location1 = []
for temp_list in list_location1:
    total_list_location1.extend(temp_list)

total_list_location2 = []
for temp_list in list_location2:
    total_list_location2.extend(temp_list)
    
total_list_location3 = []
for temp_list in list_location3:
    total_list_location3.extend(temp_list)
    
total_list_countries = []
for temp_list in list_countries:
    total_list_countries.extend(temp_list)
    
total_list_countries.remove('')
total_list_location1.remove('')
total_list_location2.remove('')
total_list_location3.remove('')


In [None]:
list_locations = []
for i in range(len(list_location1)):
    list_locations.append(' '.join(list_location1[i])+' '+' '.join(list_location2[i])+' '+' '.join(list_location3[i]))
    

In [None]:
counter_actors = Counter(total_list_actors)
df_actors = pd.DataFrame(list(counter_actors.items()),columns = ['actors','count'])
df_actors = df_actors.sort_values("count",ascending=False)
# max_features = len(df_actors)//5
df_actors= df_actors[df_actors["count"]>=2]
# df_actors = df_actors.iloc[:max_features]

counter_directors = Counter(total_list_directors)
df_directors = pd.DataFrame(list(counter_directors.items()),columns = ['directors','count'])
df_directors = df_directors.sort_values("count",ascending=False)
df_directors= df_directors[df_directors["count"]>=2]

counter_genres = Counter(total_list_genres)
df_genres = pd.DataFrame(list(counter_genres.items()),columns = ['genres','count'])
df_genres = df_genres.sort_values("count",ascending=False)
df_genres = df_genres[df_genres["count"]>=2]

counter_tags = Counter(total_list_tags)
df_tags = pd.DataFrame(list(counter_tags.items()),columns = ['tags','count'])
df_tags = df_tags.sort_values("count",ascending=False)
df_tags = df_tags[df_tags["count"]>=2]

counter_countries = Counter(total_list_countries)
df_countries = pd.DataFrame(list(counter_countries.items()),columns = ['countries','count'])
df_countries = df_countries.sort_values("count",ascending=False)
df_countries = df_countries[df_countries["count"]>=2]

counter_location1 = Counter(total_list_location1)
df_location1 = pd.DataFrame(list(counter_location1.items()),columns = ['location1','count'])
df_location1 = df_location1.sort_values("count",ascending=False)
df_location1 = df_location1[df_location1["count"]>=2]

counter_location2 = Counter(total_list_location2)
df_location2 = pd.DataFrame(list(counter_location2.items()),columns = ['location2','count'])
df_location2 = df_location2.sort_values("count",ascending=False)
df_location2 = df_location2[df_location2["count"]>=2]

counter_location3 = Counter(total_list_location3)
df_location3 = pd.DataFrame(list(counter_location3.items()),columns = ['location3','count'])
df_location3 = df_location3.sort_values("count",ascending=False)
df_location3 = df_location3[df_location3["count"]>=2]

max_features = 10000
pipe_feature = Pipeline([('count', CountVectorizer(max_features=max_features)),('tfid', TfidfTransformer())]).fit(list_locations)
encoding_locations = pipe_feature.transform(list_locations).toarray()

mlb = MultiLabelBinarizer(classes = list(df_actors.actors))
encoding_actors = mlb.fit_transform(list_actors)

mlb = MultiLabelBinarizer(classes = list(df_directors.directors))
encoding_directors = mlb.fit_transform(list_directors)

mlb = MultiLabelBinarizer(classes = list(df_genres.genres))
encoding_genres = mlb.fit_transform(list_genres)

mlb = MultiLabelBinarizer(classes = list(df_tags.tags))
encoding_tags = mlb.fit_transform(list_tags)

mlb = MultiLabelBinarizer(classes = list(df_location1.location1))
encoding_location1 = mlb.fit_transform(list_location1)

mlb = MultiLabelBinarizer(classes = list(df_location2.location2))
encoding_location2 = mlb.fit_transform(list_location2)

mlb = MultiLabelBinarizer(classes = list(df_location3.location3))
encoding_location3 = mlb.fit_transform(list_location3)

mlb = MultiLabelBinarizer(classes = list(df_countries.countries))
encoding_countries = mlb.fit_transform(list_countries)

In [None]:
#text features
print(encoding_tags.shape)

#genre features
print(encoding_genres.shape)

#cast features
print(encoding_actors.shape)
print(encoding_directors.shape)

#geographic features
print(encoding_countries.shape)
print(encoding_location1.shape)
print(encoding_location2.shape)
print(encoding_location3.shape)
print(encoding_locations.shape)

# + dont forget years that should not be part of an embedding layer 

In [None]:
scaler = MinMaxScaler()
Btags = scaler.fit_transform(encoding_tags)
Bgenres = scaler.fit_transform(encoding_genres)
Bactors = scaler.fit_transform(encoding_actors)
Bdirectors = scaler.fit_transform(encoding_directors)
Bcountries = scaler.fit_transform(encoding_countries)
Blocation1 = scaler.fit_transform(encoding_location1)
Blocation2 = scaler.fit_transform(encoding_location2)
Blocation3 = scaler.fit_transform(encoding_location3)
Blocations = scaler.fit_transform(encoding_locations)
Byears = scaler.fit_transform(list_years)

In [None]:
def cosine_sim(encoding,shrinkage=0.1):
    sim1 = encoding.dot(encoding.T)
    norm_fi = np.linalg.norm(encoding,axis=1)
    sim2 = np.outer(norm_fi,norm_fi)+shrinkage
    sim = sim1/sim2
    return sim

In [None]:
shrinkage = 20

# B1 = cosine_sim(Btags, shrinkage = shrinkage)
B2 = cosine_sim(Bgenres, shrinkage = shrinkage)
B3 = cosine_sim(Bactors, shrinkage = shrinkage)
B4 = cosine_sim(Bdirectors, shrinkage = shrinkage)
B5 = cosine_sim(Bcountries, shrinkage = shrinkage)
B6 = cosine_sim(Blocation1, shrinkage = shrinkage)
B7 = cosine_sim(Blocation2, shrinkage = shrinkage)
B8 = cosine_sim(Blocation3, shrinkage = shrinkage)
B9 = euclidean_distances(Byears)
B10 = cosine_sim(Blocations, shrinkage = shrinkage)

B9 = B9/B9.max()
B9 = 1-B9
Bf = cosine_sim(Bconstant,shrinkage =shrinkage)

size = B2.shape[0]
for i in range(size):
#     B1[i,i]=0
    B2[i,i]=0
    B3[i,i]=0
    B4[i,i]=0     
    B5[i,i]=0
    B6[i,i]=0
    B7[i,i]=0
    B8[i,i]=0
    B9[i,i]=0
    B10[i,i]=0

### 2- Cosine similarity weighting, linear weigths

In [None]:
X= df_urm.toarray()
X2 = X.dot(B2)
X3 = X.dot(B3)
X4 = X.dot(B4)
X5 = X.dot(B5)
X6 = X.dot(B6)
X7 = X.dot(B7)
X8 = X.dot(B8)
X9 = X.dot(B9)
X10 = X.dot(B10)

X = np.asarray(X).reshape((X.shape[0]*X.shape[1],1))
Xtot = np.concatenate((np.asarray(X2).reshape((X2.shape[0]*X2.shape[1],1)),
                      np.asarray(X3).reshape((X3.shape[0]*X3.shape[1],1)),
                      np.asarray(X4).reshape((X4.shape[0]*X4.shape[1],1)),
                      np.asarray(X5).reshape((X5.shape[0]*X5.shape[1],1)),
                      np.asarray(X6).reshape((X6.shape[0]*X6.shape[1],1)),
                       np.asarray(X7).reshape((X7.shape[0]*X7.shape[1],1)),
                       np.asarray(X8).reshape((X8.shape[0]*X8.shape[1],1)),
                       np.asarray(X9).reshape((X9.shape[0]*X9.shape[1],1)),
                      np.asarray(X10).reshape((X10.shape[0]*X10.shape[1],1))),axis=1)

array_sum = np.sum(Xtot)
array_has_nan = np.isnan(array_sum)

print(Xtot.shape)


my_array = X.copy()
my_array[my_array == 0] = 0.2
my_array = my_array.flatten()

reg = LinearRegression().fit(Xtot, X, sample_weight=my_array)


X2bis = reg.coef_[0][0]*X2
X3bis = reg.coef_[0][1]*X3
X4bis = reg.coef_[0][2]*X4
X5bis = reg.coef_[0][3]*X5
X6bis = reg.coef_[0][4]*X6
X7bis = reg.coef_[0][5]*X7
X8bis = reg.coef_[0][6]*X8
X9bis = reg.coef_[0][7]*X9
X10bis = reg.coef_[0][8]*X10
                      
pred_val = X2bis+X3bis+X4bis+X5bis+X6bis+X7bis+X8bis+X9bis+X10bis
values = df_urm.nonzero()
pred_val[values] =  -np.inf

In [None]:
def NDCG_binary_at_k_batch(X_pred, heldout_batch, k=100):
    '''
    normalized discounted cumulative gain@k for binary relevance
    ASSUMPTIONS: all the 0's in heldout_data indicate 0 relevance
    '''
    batch_users = X_pred.shape[0]
    idx_topk_part = bn.argpartition(-X_pred, k, axis=1)
    topk_part = X_pred[np.arange(batch_users)[:, np.newaxis],
                       idx_topk_part[:, :k]]
    idx_part = np.argsort(-topk_part, axis=1)
    # topk predicted score
    idx_topk = idx_topk_part[np.arange(batch_users)[:, np.newaxis], idx_part]
    # build the discount template
    tp = 1. / np.log2(np.arange(2, k + 2))
    DCG = (heldout_batch[np.arange(batch_users)[:, np.newaxis],
                         idx_topk].toarray() * tp).sum(axis=1)
    
    tp = 1. / np.log2(np.arange(2, k + 2))

    IDCG = np.array([(tp[:min(n, k)]).sum() for n in heldout_batch.getnnz(axis=1)])

    return DCG / IDCG


def Recall_at_k_batch(X_pred, heldout_batch, k=100):
    batch_users = X_pred.shape[0]

    idx = bn.argpartition(-X_pred, k, axis=1)
    X_pred_binary = np.zeros_like(X_pred, dtype=bool)
    X_pred_binary[np.arange(batch_users)[:, np.newaxis], idx[:, :k]] = True

    X_true_binary = (heldout_batch > 0).toarray()

    tmp = (np.logical_and(X_true_binary, X_pred_binary).sum(axis=1)).astype(np.float32)
    recall = tmp / np.minimum(k, X_true_binary.sum(axis=1))
    return recall

In [None]:
test_data = df_urm_test

n10_list, r10_list, n25_list, r25_list, n50_list, r50_list, n100_list, r100_list = [], [],[], [],[], [],[], []
r10_list.append(Recall_at_k_batch(pred_val,test_data, k=10))
n10_list.append(NDCG_binary_at_k_batch(pred_val,test_data, k=10))
r25_list.append(Recall_at_k_batch(pred_val, test_data, k=25))
n25_list.append(NDCG_binary_at_k_batch(pred_val, test_data, k=25))
r50_list.append(Recall_at_k_batch(pred_val, test_data, k=50))
n50_list.append(NDCG_binary_at_k_batch(pred_val, test_data, k=50))
r100_list.append(Recall_at_k_batch(pred_val, test_data, k=100))
n100_list.append(NDCG_binary_at_k_batch(pred_val, test_data, k=100))

In [None]:
#final metrics: shrinkage =20
print("Test NDCG@10=%.5f (%.5f)" % (np.nanmean(n10_list), np.nanstd(n10_list) / np.sqrt(len(n10_list))))
print("Test NDCG@25=%.5f (%.5f)" % (np.nanmean(n25_list), np.nanstd(n25_list) / np.sqrt(len(n25_list))))
print("Test NDCG@50=%.5f (%.5f)" % (np.nanmean(n50_list), np.nanstd(n50_list) / np.sqrt(len(n50_list))))
print("Test NDCG@100=%.5f (%.5f)" % (np.nanmean(n100_list), np.nanstd(n100_list) / np.sqrt(len(n100_list))))
print("Test Recall@10=%.5f (%.5f)" % (np.nanmean(r10_list), np.nanstd(r10_list) / np.sqrt(len(r10_list))))
print("Test Recall@25=%.5f (%.5f)" % (np.nanmean(r25_list), np.nanstd(r25_list) / np.sqrt(len(r25_list))))
print("Test Recall@50=%.5f (%.5f)" % (np.nanmean(r50_list), np.nanstd(r50_list) / np.sqrt(len(r50_list))))
print("Test Recall@100=%.5f (%.5f)" % (np.nanmean(r100_list), np.nanstd(r100_list) / np.sqrt(len(r100_list))))

## MARec Hybrid solution 

In [None]:
X = df_urm.toarray()
values = df_urm.nonzero()


#initialization
lambda1s = [0.1,1,10,100,1000]
lambda2s = [0.1,1,10,100,1000]
lambda3s = [0.1,1,10,100]
coefs = [0]

ks = [0]

b = np.average(X,0)
xo = np.mean(b)

vad_test_data =df_urm_vad

for coef in coefs:
    for k in ks:
        for lambda1 in lambda1s:
            for lambda2 in lambda2s:
                for lambda3 in lambda3s:

                    perc_value = 10
                    rho = 1 
                    yo = lambda1
                    y = [lambda1+k*bi-k*xo for bi in b]


                    #transform vector to diag matrix IR
                    vector = np.sum(X, axis=0)
                    percentile = max(np.percentile(vector,perc_value),1)
                    k = lambda2/percentile
                    vector_tr = np.zeros(len(vector))
                    for counter,item in enumerate(vector):
                        if item <= percentile:
                            vector_tr[counter] = k*(percentile-item)

                    IR = np.diag(vector_tr)

                    #dense computations
                    #Xtilde
                    Xtilde = X2bis+X3bis+X4bis+X5bis+X6bis+X7bis+X8bis+X9bis+X10bis
                    Xtilde = lambda3*Xtilde
                    print("Xtilde computed")

                    #compute P
                    P0 = rho*X.T.dot(X)+y*np.diag(np.ones(df_urm.shape[1]))+X.T.dot(Xtilde).dot(IR)
                    P = np.linalg.inv(P0)
                    print("P computed")
                    del P0

                    #update of Bk
                    B_temp = rho*X.T.dot(X)+X.T.dot(Xtilde).dot(IR)
                    B_tilde = P.dot(B_temp)
                    gamma = np.diag(B_tilde) / np.diag(P)
                    B_k = B_tilde - P.dot(np.diag(gamma))
                    B_k = np.asarray(B_k).reshape((B_k.shape[0],B_k.shape[1]))
                    print("B_k updated")

                    val_index_min = 0
                    n10_list, n25_list, n50_list, n100_list, r10_list, r25_list, r50_list, r100_list = [], [], [], [], [], [], [], []

                    Xtest = X

                    pred_val = Xtest.dot(B_k)
                    pred_val[values] = -np.inf

                    # exclude examples from training and validation (if any)
                    n10_list.append(NDCG_binary_at_k_batch(pred_val, vad_test_data, k=10))
                    n25_list.append(NDCG_binary_at_k_batch(pred_val, vad_test_data, k=25))

                    r10_list.append(Recall_at_k_batch(pred_val, vad_test_data, k=10))
                    r25_list.append(Recall_at_k_batch(pred_val, vad_test_data, k=25))


                    n10_list = np.concatenate(n10_list)
                    n25_list = np.concatenate(n25_list)

                    r10_list = np.concatenate(r10_list)
                    r25_list = np.concatenate(r25_list)

                    #0cold-
                    print("lambda1={}".format(str(lambda1)))
                    print("lambda2={}".format(str(lambda2)))
                    print("lambda3={}".format(str(lambda3)))

                    print("Test NDCG@10=%.5f (%.5f)" % (np.nanmean(n10_list), np.nanstd(n10_list) / np.sqrt(len(n10_list))))
                    print("Test Recall@10=%.5f (%.5f)" % (np.nanmean(r10_list), np.nanstd(r10_list) / np.sqrt(len(r10_list))))

                    print("Test NDCG@25=%.5f (%.5f)" % (np.nanmean(n25_list), np.nanstd(n25_list) / np.sqrt(len(n25_list))))
                    print("Test Recall@25=%.5f (%.5f)" % (np.nanmean(r25_list), np.nanstd(r25_list) / np.sqrt(len(r25_list))))
