# Environment 2 - Using covariates

In this second environment we add some covariables to have better results.

**Imports**

In [1]:
import requests
import pandas as pd
import numpy as np

from keras.layers import Input, Embedding, Flatten, Dot, Concatenate, Dense, Activation, BatchNormalization, Dropout
from keras.models import Model
from keras.callbacks import EarlyStopping

Using TensorFlow backend.


**Getting the data**

In [2]:
USER_ID = '9G08LOYFU88BJ8GHNRU3'
env = 'http://35.180.254.42/'

data = requests.get(url=env+'reset', params= {'user_id':USER_ID}).json()

nb_items = data['nb_items']
nb_users = data['nb_users']
item_history = data['item_history']
user_history = data['user_history']
rating_history = data['rating_history']
variables_history = np.array(data['variables_history'])

next_item = data['next_item']
next_user = data['next_user']
next_variables = data['next_variables']

In [3]:
ratings = pd.DataFrame(data={'user_id':user_history, 'item_id':item_history, 'rating': rating_history, 'variable0': variables_history[:,0], 'variable1': variables_history[:,1], 'variable2': variables_history[:,2], 'variable3': variables_history[:,3], 'variable4': variables_history[:,4]})
ratings.head()

Unnamed: 0,user_id,item_id,rating,variable0,variable1,variable2,variable3,variable4
0,86,203,2,0.758556,0.69042,0.20652,1.642183,-0.343074
1,51,37,1,0.399668,0.66394,3.518021,1.89348,0.161181
2,87,274,5,0.975652,-0.383725,1.53627,1.13556,1.410745
3,26,82,2,-0.205529,1.058542,1.575781,-0.343903,0.094833
4,92,133,2,0.485205,-1.021502,2.082121,1.421829,1.296152


## Building the model

In [4]:
def build_model(embedding_size = 5):
    
    user_id_input = Input(shape=[1],name='user')
    item_id_input = Input(shape=[1], name='item')
    variables_input = Input(shape=[5], name='variables')

    user_embedding = Embedding(output_dim=embedding_size,
                               input_dim=nb_users + 1,
                               input_length=1,
                               name='user_embedding')(user_id_input)

    item_embedding = Embedding(output_dim=embedding_size,
                               input_dim=nb_items + 1,
                               input_length=1,
                               name='item_embedding')(item_id_input)

    user_vecs = Flatten()(user_embedding)
    item_vecs = Flatten()(item_embedding)

    concat = Concatenate()([user_vecs, item_vecs, variables_input])

    first_dense = Dense(100)(concat)
    first_activation = Activation('relu')(first_dense)

    first_dropout = Dropout(0.4)(first_activation)

    second_dense = Dense(80)(first_dropout)
    second_activation = Activation('relu')(second_dense)

    second_dropout = Dropout(0.4)(second_activation)

    third_dense = Dense(60)(second_dropout)
    third_activation = Activation('relu')(third_dense)

    third_dropout = Dense(1)(third_activation)


    model = Model(inputs=[user_id_input, item_id_input, variables_input], outputs=third_dropout)

    return model

In [5]:
model = build_model()
model.summary()

model.compile(optimizer='adam', loss='MSE')

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
user (InputLayer)               (None, 1)            0                                            
__________________________________________________________________________________________________
item (InputLayer)               (None, 1)            0                                            
__________________________________________________________________________________________________
user_embedding (Embedding)      (None, 1, 5)         505         user[0][0]                       
__________________________________________________________________________________________________
item_embedding (Embedding)      (None, 1, 5)         1505        item[0][0]                       
__________________________________________________________________________________________________
flatten_1 

In [6]:
early_stopping = EarlyStopping(monitor='val_loss', patience=2)
model.fit(
    [
        ratings['user_id'],
        ratings['item_id'],
        ratings[['variable0', 'variable1', 'variable2', 'variable3', 'variable4']]
    ],
    ratings['rating'],
    batch_size=64,
    epochs=20,
    validation_split=0.1,
    shuffle=True,
    callbacks=[early_stopping])


Train on 1800 samples, validate on 200 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20


<keras.callbacks.History at 0x7fa8165e1160>

## Testing

In [7]:
nb_samples = 1000
mse = 0
users_list = []
ratings_list = []
items_list = []
variables_list = []

for i in range(nb_samples):
    predicted_score = model.predict([[next_user], [next_item], [next_variables]])[0,0]
    
    r = requests.get(url=env + 'predict', params= {'user_id':USER_ID, 'predicted_score':predicted_score})
    true_rating = r.json()['rating']
    mse += (true_rating - predicted_score)**2
    
    users_list += [next_user]
    ratings_list += [true_rating]
    items_list += [next_item]
    variables_list += [next_variables]
    
    if (i+1)%100 == 0:
        print("Episode {:d}/{:d}".format(i+1, nb_samples))
        model.fit([users_list, items_list, variables_list], ratings_list, verbose=0, epochs=20)
        users_list = []
        ratings_list = []
        items_list = []
        variables_list = []

    next_item = r.json()['next_item']
    next_user = r.json()['next_user']
    next_variables = r.json()['next_variables']
    
print('\nMSE: ', mse/nb_samples )

Episode 100/1000
Episode 200/1000
Episode 300/1000
Episode 400/1000
Episode 500/1000
Episode 600/1000
Episode 700/1000
Episode 800/1000
Episode 900/1000
Episode 1000/1000

MSE:  1.0433540274132733
