# Dataset

In [12]:
import pandas as pd

pd_train = pd.read_csv('training-ratings.csv', header = 0, names=['u', 'i', 'r'])
pd_train

Unnamed: 0,u,i,r
0,0,1032,5.0
1,0,1044,4.0
2,0,1045,3.0
3,0,1066,4.0
4,0,1069,5.0
...,...,...,...
911026,6039,3090,5.0
911027,6039,3121,1.0
911028,6039,3179,5.0
911029,6039,3182,3.0


In [13]:
pd_test = pd.read_csv('test-ratings.csv', header = 0, names=['u', 'i', 'r'])
pd_test

Unnamed: 0,u,i,r
0,0,0,4.0
1,0,1,3.0
2,0,2,5.0
3,0,3,4.0
4,0,4,3.0
...,...,...,...
89173,1757,541,3.0
89174,1757,603,3.0
89175,1757,688,3.0
89176,1757,690,2.0


## EVO DeepLearning

In [14]:
import tensorflow as tf
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
from tensorflow.keras.layers import Embedding, Input, Dense, Reshape, Flatten, Concatenate, Multiply
import numpy as np
import matplotlib.pyplot as plt

n_users = max(pd_train['u'].max(), pd_test['u'].max())
n_items = max(pd_train['i'].max(), pd_test['i'].max())

print(n_users, n_items)

K=8
EPOCH = 20
BATCH = 64

user_in = Input(shape=[1], name='ui')
user_e = Embedding(n_users+1, K, name="user_e", activation='relu')(user_in)

item_in = Input(shape=[1], name='ii')
item_e = Embedding(n_items+1, K, name="item_e", activation='relu')(item_in)


x_u = layers.Dropout(0.1)(user_e)
x_i = layers.Dropout(0.1)(item_e)

x_u = layers.Dense(44, activation='relu', kernel_initializer='he_uniform')(x_u)
x_i = layers.Dense(44, activation='relu', kernel_initializer='he_uniform')(x_i)

x_d = Multiply()([x_u, x_i])

x_d = layers.Dense(24, activation='relu', kernel_initializer='he_normal')(x_d)
x_d = layers.Dense(16, activation='elu', kernel_initializer='zero')(x_d)

outputs = Dense(1, activation='linear', kernel_initializer='lecun_uniform', name = 'prediction')(x_d)

model = keras.Model(inputs=[user_in, item_in], outputs=outputs, name='EvoDeep')

model.compile(
    loss=tf.keras.losses.MeanAbsoluteError(),
    optimizer=keras.optimizers.Adam(lr=0.001)
)

model.summary()


history = model.fit(
    [pd_train['u'], pd_train['i']],
    pd_train['r'],
    epochs=EPOCH,
    batch_size=BATCH,
    verbose=1,
)

6039 3705


TypeError: ('Keyword argument not understood:', 'activation')

In [4]:
results = model.evaluate(
    x=[pd_test['u'], pd_test['i']],
    y=pd_test['r']
)
results



0.6739329695701599

# MaxtrixFactorization

In [8]:
from surprise import NMF
from surprise import accuracy
from surprise import Dataset
from surprise import Reader
from surprise.model_selection import train_test_split

reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(pd_train[['u', 'i', 'r']], reader)
trainset = data.build_full_trainset()
#, testset = train_test_split(data, test_size=.0) # Lo tenemos en un fichero a parte

In [15]:
algo = NMF(biased=True)
algo.fit(trainset)

<surprise.prediction_algorithms.matrix_factorization.NMF at 0x7f9c00185be0>

In [11]:
predictions = algo.test(pd_test.to_records(index=False))
accuracy.mae(predictions)

MAE:  0.7202


0.7201687947795056