# Environment 3 - Implicit Feedback

This environment is made to be closer to true real life problems. It works with implicit feedback instead of explicit feedback. This means that instead of knowing the real rating of a product, we just know if the user bought it.

**Imports**

In [1]:
import requests

import pandas as pd
import numpy as np

**Getting the data**

In [147]:
USER_ID = '9G08LOYFU88BJ8GHNRU3'
env = 'http://35.180.178.243/'
r = requests.get(url=env+'reset', params= {'user_id':USER_ID})
data = r.json()
nb_users = data['nb_users']
nb_items = data['nb_items']
state_history = data['state_history']
rewards_history = data['rewards_history']
action_history = data['action_history']
next_state = data['next_state']

In [149]:
pos_data = pd.DataFrame(action_history, columns=['actions'])

pos_users = []
pos_items = []
pos_meta = []

for i in range(len(action_history)):
    state = state_history[i]
    action = action_history[i]
    
    pos_users.append(state[0][0])
    pos_items.append(state[action][1])
    pos_meta.append(state[action][2:])
    
    
pos_data['user_id'] = pos_users
pos_data['item_id'] = pos_items
pos_data[['price', '0', '1', '2', '3', '4']] = pd.DataFrame(pos_meta)


pos_data = pos_data.drop('actions', axis=1)

pos_data['action'] = pos_data.reset_index()['index']

pos_data


Unnamed: 0,user_id,item_id,price,0,1,2,3,4,action
0,77,28,28.698722,0.789939,2.507614,0.900374,1.040794,2.902107,0
1,33,26,248.907550,-1.330941,1.305624,1.427514,0.968023,0.186144,1
2,76,8,367.506014,1.043017,-0.384232,0.371040,1.340897,1.158571,2
3,31,15,61.868779,0.400431,0.665219,0.485358,2.523798,-0.459346,3
4,72,7,969.817274,-1.079973,1.119112,1.794775,2.638535,0.742932,4
5,68,28,28.698722,1.196186,0.559321,0.900374,1.040794,2.364322,5
6,90,28,28.698722,-1.912106,1.681800,0.900374,1.040794,1.542127,6
7,74,14,903.194734,1.562109,2.590898,0.679910,1.538540,0.280190,7
8,50,25,993.689301,0.832800,1.486144,1.065739,1.375797,0.177736,8
9,19,7,969.817274,1.732512,0.016619,1.794775,2.638535,1.700727,9


In [206]:
def find_other_random(row, rng):
    neg_id = row['item_id']
    action = row['action']
    ids_list = np.array(state_history[int(action)])[:, 1]
    while neg_id == row['item_id']:
        neg_id = rng.choice(ids_list)
    return int(neg_id)

def get_neg_meta(row):
    a = int(row['action'])
    i = int(row['neg_id'])
    ids_list = np.array(state_history[a])[:, 1]
    indice = np.where(ids_list == i)[0][0]
    return state_history[a][indice][2:]


def sample_train(pos_data, random_seed=0):
    """Sample negatives at random"""
    rng = np.random.RandomState(random_seed)
    
    train_data = pd.DataFrame(pos_data)
    train_data['neg_id'] = train_data.apply(lambda row: find_other_random(row, rng), axis=1)
    
    train_data[['neg_price', 'neg_0', 'neg_1', 'neg_2', 'neg_3', 'neg_4']] = train_data.apply(lambda row: get_neg_meta(row), result_type='expand', axis=1)
    
    
    return train_data

#sample_train(pos_data)

## Model

In [151]:
import tensorflow as tf
from keras.models import Model, Sequential
from keras.layers import Embedding, Flatten, Input, Dense, Dropout
from keras.layers import Concatenate, Lambda
from keras.regularizers import l2

In [152]:
def identity_loss(y_true, y_pred):
    """Ignore y_true and return the mean of y_pred
    
    This is a hack to work-around the design of the Keras API that is
    not really suited to train networks with a triplet loss by default.
    """
    return tf.reduce_mean(y_pred + 0 * y_true)

In [207]:
def margin_comparator_loss_improved(inputs, loss_type="improved2", A=0.01, B=0.):
    """Comparator loss for a pair of precomputed similarities
    
    If the inputs are cosine similarities, they each have range in
    (-1, 1), therefore their difference have range in (-2, 2). Using
    a margin of 1. can therefore make sense.

    If the input similarities are not normalized, it can be beneficial
    to use larger values for the margin of the comparator loss.
    """
    p_sim, n_sim, p_metadata, n_metadata = inputs
    p_price = p_metadata[0]
    n_price = n_metadata[0]
    
    # The basic version of the loss, whithout considering the price.
    if loss_type=='basic':
        return tf.maximum(n_sim - p_sim + 1, 0)
  
    if p_sim == n_sim:
        return tf.maximum((n_price - p_price + B), 0)
    
    # The first version of the improved loss
    if loss_type=="improved1":
        return tf.maximum((A / tf.abs(n_sim - p_sim + 1)) * (n_price - p_price + B) + (n_sim - p_sim), 0)
    
    # A second version of the improved loss
    return tf.maximum((-0.5*tf.abs(n_sim - p_sim)+2) * (5/100) * (n_price - p_price) + (n_sim - p_sim + 1 ), 0)

In [156]:
def make_interaction_mlp(input_dim, n_hidden=1, hidden_size=64,
                         dropout=0, l2_reg=None):
    """Build the shared multi layer perceptron"""
    mlp = Sequential()
    if n_hidden == 0:
        # Plug the output unit directly: this is a simple
        # linear regression model. Not dropout required.
        mlp.add(Dense(1, input_dim=input_dim,
                      activation='sigmoid', kernel_regularizer=l2_reg))
    else:
        mlp.add(Dense(hidden_size, input_dim=input_dim,
                      activation='sigmoid', kernel_regularizer=l2_reg))
        mlp.add(Dropout(dropout))
        for i in range(n_hidden - 1):
            mlp.add(Dense(hidden_size, activation='sigmoid',
                          W_regularizer=l2_reg))
            mlp.add(Dropout(dropout))
        mlp.add(Dense(1, activation='sigmoid', kernel_regularizer=l2_reg))
    return mlp

In [203]:
def build_models(n_users, n_items, n_meta=6, user_dim=5, item_dim=5,
                 n_hidden=1, hidden_size=64, dropout=0, loss_type="improved2", l2_reg=0):
    """Build models to train a deep triplet network"""
    
    user_input = Input((1,), name='user_input')
    positive_item_input = Input((1,), name='positive_item_input')
    positive_meta_data_input = Input((n_meta,), name='positive_meta_data_input')
    negative_item_input = Input((1,), name='negative_item_input')
    negative_meta_data_input = Input((n_meta,), name='negative_meta_data_input')
    
    l2_reg = None if l2_reg == 0 else l2(l2_reg)
    
    user_layer = Embedding(n_users, user_dim, input_length=1,
                           name='user_embedding', embeddings_regularizer=l2_reg)

    # The following embedding parameters will be shared to encode both
    # the positive and negative items.
    item_layer = Embedding(n_items, item_dim, input_length=1,
                           name="item_embedding", embeddings_regularizer=l2_reg)

    #user_embedding = Flatten()(user_layer(user_input))
    #positive_item_embedding = Flatten()(item_layer(positive_item_input)) 
    #negative_item_embedding = Flatten()(item_layer(negative_item_input))
   


    # Similarity computation between embeddings using a MLP similarity
    positive_embeddings_pair = Concatenate(name="positive_embeddings_pair")(
        [user_input, positive_item_input, positive_meta_data_input])
    positive_embeddings_pair = Dropout(dropout)(positive_embeddings_pair)
    negative_embeddings_pair = Concatenate(name="negative_embeddings_pair")(
        [user_input, negative_item_input, negative_meta_data_input])
    negative_embeddings_pair = Dropout(dropout)(negative_embeddings_pair)

    # Instanciate the shared similarity architecture
    interaction_layers = make_interaction_mlp(
        1 + 1 + n_meta, n_hidden=n_hidden, hidden_size=hidden_size,
        dropout=dropout)
    

    positive_similarity = interaction_layers(positive_embeddings_pair)
    negative_similarity = interaction_layers(negative_embeddings_pair)

    # The triplet network model, only used for training
    
    triplet_loss = Lambda(
        margin_comparator_loss_improved,
        arguments={'loss_type':loss_type},
        output_shape=(1,),
        name='comparator_loss')(
            [positive_similarity,
             negative_similarity,
             positive_meta_data_input,
             negative_meta_data_input]
        )

    deep_triplet_model = Model(inputs=[user_input,
                                       positive_item_input,
                                       positive_meta_data_input,
                                       negative_item_input,
                                       negative_meta_data_input],
                               outputs=[triplet_loss])

    
    # The match-score model, only used at inference
    deep_match_model = Model(inputs=[user_input, positive_item_input, positive_meta_data_input],
                             outputs=[positive_similarity])

    return deep_match_model, deep_triplet_model

In [119]:
deep_triplet_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user_input (InputLayer)         (None, 1)            0                                            
__________________________________________________________________________________________________
positive_item_input (InputLayer (None, 1)            0                                            
__________________________________________________________________________________________________
positive_meta_data_input (Input (None, 6)            0                                            
__________________________________________________________________________________________________
negative_item_input (InputLayer (None, 1)            0                                            
__________________________________________________________________________________________________
negative_m

## Running

In [214]:
hyper_parameters = dict(
    n_meta=6,
    user_dim=1,
    item_dim=1,
    n_hidden=2,
    hidden_size=128,
    dropout=0.1,
    loss_type="improved22",
    l2_reg=0
)
deep_match_model, deep_triplet_model = build_models(nb_users, nb_items,
                                                    **hyper_parameters)


deep_triplet_model.compile(loss=identity_loss, optimizer='adam')
fake_y = np.ones_like(pos_data['user_id'])

n_epochs = 15

for i in range(n_epochs):
    # Sample new negatives to build different triplets at each epoch
    train_inputs = sample_train(pos_data, random_seed=i)

    # Fit the model incrementally by doing a single pass over the
    # sampled triplets.
    deep_triplet_model.fit(
        [
            train_inputs['user_id'].values,
            train_inputs['item_id'].values,
            train_inputs[['price', '0', '1', '2', '3','4']].values,
            train_inputs['neg_id'].values,
            train_inputs[['neg_price', 'neg_0', 'neg_1', 'neg_2', 'neg_3','neg_4']].values
        ],
        fake_y,
        epochs=1)


  app.launch_new_instance()


Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1
Epoch 1/1


In [215]:
nb_episodes = 100
cum_rewards=[]

for j in range(3):
    cum_reward = 0
    for i in range(nb_episodes):
        next_state = np.array(next_state)
        predicted = int(np.argmax(deep_match_model.predict([next_state[:,0], next_state[:,1], next_state[:,2:]])))
        r = requests.get(url=env+'predict', params= {'user_id':USER_ID, 'recommended_item': predicted})
        next_state = r.json()['state']
        reward = r.json()['reward']
        cum_reward += reward 
        #print('Predicted: {:d}, reward: {:.3f}'.format(predicted, reward))
    
    print('Cumulative reward: {:.3f}'.format(cum_reward))
    cum_rewards.append(cum_reward)
    

print('\nAverage cumulative reward: {:.3f}'.format(np.mean(cum_rewards)))

Cumulative reward: 22110.680
Cumulative reward: 26536.714
Cumulative reward: 23250.867

Average cumulative reward: 23966.087
