# Environment 2 - Using covariates

In this second environment we add some covariables to have better results.

**Imports**

In [1]:
import requests
import pandas as pd
import numpy as np

from keras.layers import Input, Embedding, Flatten, Dot, Concatenate, Dense, Activation, BatchNormalization, Dropout
from keras.models import Model
from keras.callbacks import EarlyStopping

Using TensorFlow backend.


**Getting the data**

In [2]:
USER_ID = '9G08LOYFU88BJ8GHNRU3'
env = 'http://35.180.254.42/'

data = requests.get(url=env+'reset', params= {'user_id':USER_ID}).json()

nb_items = data['nb_items']
nb_users = data['nb_users']
item_history = data['item_history']
user_history = data['user_history']
rating_history = data['rating_history']
variables_history = np.array(data['variables_history'])

next_item = data['next_item']
next_user = data['next_user']
next_variables = data['next_variables']

In [3]:
ratings = pd.DataFrame(data={'user_id':user_history, 'item_id':item_history, 'rating': rating_history, 'variable0': variables_history[:,0], 'variable1': variables_history[:,1], 'variable2': variables_history[:,2], 'variable3': variables_history[:,3], 'variable4': variables_history[:,4]})
ratings.head()

Unnamed: 0,user_id,item_id,rating,variable0,variable1,variable2,variable3,variable4
0,71,136,4,1.824085,2.893115,1.219532,0.334681,0.528621
1,27,236,5,1.676702,1.494615,1.254594,2.080134,0.927485
2,11,215,5,1.119021,0.69064,1.462471,-1.023091,-0.033058
3,61,18,3,-0.672768,1.184261,0.403368,1.37202,1.038933
4,16,164,3,-0.136157,1.016314,0.90148,-0.008183,2.49079


## Building the model

In [4]:
def build_model(embedding_size = 5):
    
    user_id_input = Input(shape=[1],name='user')
    item_id_input = Input(shape=[1], name='item')
    variables_input = Input(shape=[5], name='variables')

    user_embedding = Embedding(output_dim=embedding_size,
                               input_dim=nb_users + 1,
                               input_length=1,
                               name='user_embedding')(user_id_input)

    item_embedding = Embedding(output_dim=embedding_size,
                               input_dim=nb_items + 1,
                               input_length=1,
                               name='item_embedding')(item_id_input)

    user_vecs = Flatten()(user_embedding)
    item_vecs = Flatten()(item_embedding)

    concat = Concatenate()([user_vecs, item_vecs, variables_input])

    first_dense = Dense(256)(concat)
    first_activation = Activation('relu')(first_dense)

    first_dropout = Dropout(0)(first_activation)

    second_dense = Dense(256)(first_dropout)
    second_activation = Activation('relu')(second_dense)

    second_dropout = Dropout(0)(second_activation)

    third_dense = Dense(256)(second_dropout)
    third_activation = Activation('relu')(third_dense)

    output = Dense(1)(third_activation)


    model = Model(inputs=[user_id_input, item_id_input, variables_input], outputs=output)

    return model

In [None]:
model = build_model()
model.summary()

model.compile(optimizer='adam', loss='MSE')

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user (InputLayer)               (None, 1)            0                                            
__________________________________________________________________________________________________
item (InputLayer)               (None, 1)            0                                            
__________________________________________________________________________________________________
user_embedding (Embedding)      (None, 1, 5)         505         user[0][0]                       
__________________________________________________________________________________________________
item_embedding (Embedding)      (None, 1, 5)         1505        item[0][0]                       
__________________________________________________________________________________________________
flatten_1 

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=2)
model.fit(
    [
        ratings['user_id'],
        ratings['item_id'],
        ratings[['variable0', 'variable1', 'variable2', 'variable3', 'variable4']]
    ],
    ratings['rating'],
    batch_size=64,
    epochs=50,
    validation_split=0.2,
    shuffle=True,
    callbacks=[early_stopping]
)


Train on 1600 samples, validate on 400 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50


<keras.callbacks.History at 0x7f0329c6b748>

## Testing

In [None]:
nb_samples = 1000
mse = 0
users_list = []
ratings_list = []
items_list = []
variables_list = []

for i in range(nb_samples):
    predicted_score = model.predict([[next_user], [next_item], [next_variables]])[0,0]
    
    r = requests.get(url=env + 'predict', params= {'user_id':USER_ID, 'predicted_score':predicted_score})
    true_rating = r.json()['rating']
    mse += (true_rating - predicted_score)**2
    
    users_list += [next_user]
    ratings_list += [true_rating]
    items_list += [next_item]
    variables_list += [next_variables]
    
    if (i+1)%100 == 0:
        print("Episode {:d}/{:d}".format(i+1, nb_samples))
        model.fit([users_list, items_list, variables_list], ratings_list, verbose=0, epochs=early_stopping.stopped_epoch)
        users_list = []
        ratings_list = []
        items_list = []
        variables_list = []

    next_item = r.json()['next_item']
    next_user = r.json()['next_user']
    next_variables = r.json()['next_variables']
    
print('\nMSE: ', mse/nb_samples )

Episode 100/1000
Episode 200/1000
Episode 300/1000
Episode 400/1000
Episode 500/1000
Episode 600/1000
Episode 700/1000
Episode 800/1000
Episode 900/1000
