In [2]:
import numpy as np

# Data

MovieLens

In [3]:
path = "ml-1m/"

In [4]:
f= open(path+'ratings.dat')
f.readline()

'1::1193::5::978300760\n'

In [5]:
# https://docs.scipy.org/doc/numpy/reference/generated/numpy.loadtxt.html
ratings = np.loadtxt(path+'ratings.dat', dtype=int, delimiter='::')

##### 1m ratings

All ratings are contained in the file `ratings.csv`. Each line of this file after the header row represents one rating of one movie by one user, and has the following format:

    userId,movieId,rating,timestamp

The lines within this file are ordered first by userId, then, within user, by movieId.

Ratings are made on a 5-star scale, with half-star increments (0.5 stars - 5.0 stars).

Timestamps represent seconds since midnight Coordinated Universal Time (UTC) of January 1, 1970.


In [5]:
ratings.shape

(1000209, 4)

In [6]:
ratings[:10]

array([[        1,      1193,         5, 978300760],
       [        1,       661,         3, 978302109],
       [        1,       914,         3, 978301968],
       [        1,      3408,         4, 978300275],
       [        1,      2355,         5, 978824291],
       [        1,      1197,         3, 978302268],
       [        1,      1287,         5, 978302039],
       [        1,      2804,         5, 978300719],
       [        1,       594,         4, 978302268],
       [        1,       919,         4, 978301368]])

In [6]:
np.random.seed = 50

msk = np.random.rand(len(ratings)) < 0.8
trn = ratings[msk]
val = ratings[~msk]


In [7]:
n_users = np.unique(ratings[:,0]).max() + 1
n_users

6041

In [8]:
n_movies = np.unique(ratings[:,1]).max() + 1
n_movies

3953

# Models

State-of-the-art: http://www.mymedialite.net/examples/datasets.html

In [89]:
from keras.models import Sequential, Model
from keras.layers import Embedding, Dense, Dropout, Dot, Add, Input, Flatten, Concatenate, BatchNormalization, Activation, Lambda
from keras import regularizers
from keras.optimizers import Adam

### MF model

input
$$(u,m)$$
embedding
$$\to (e_u,e_m)$$
dot product
$$\to  e_u\cdot e_m$$
to fit the rating

In [143]:
n_factors = 10

u_input = Input(shape = (1,))
u_emb = Embedding(input_dim=n_users, output_dim=n_factors)(u_input)

In [144]:
m_input = Input(shape = (1,))
m_emb = Embedding(input_dim=n_movies, output_dim=n_factors)(m_input)

In [158]:
dot = Dot(axes=2)([u_emb,m_emb])
out = Flatten()(dot)

In [159]:
mf_model = Model(inputs=[u_input,m_input], outputs=out)

In [160]:
# https://keras.io/optimizers/#adam
# learning rate = 0.001

mf_model.compile(optimizer='adam', loss='mse')

In [161]:
mf_model.predict([trn[:10,0], trn[:10,1]])

array([[ 4.27939034],
       [ 3.45203161],
       [ 4.18549109],
       [ 4.08537817],
       [ 4.29241276],
       [ 3.94918108],
       [ 4.1293025 ],
       [ 3.88715553],
       [ 4.09248304],
       [ 3.85607409]], dtype=float32)

In [141]:
mf_model.fit([trn[:,0], trn[:,1]], trn[:,2], epochs=6, batch_size=64)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x1f03cb710>

In [148]:
mf_model.fit([trn[:,0], trn[:,1]], trn[:,2], 
             validation_data = ([val[:,0],val[:,1]],val[:,2]),
             epochs=6, batch_size=64)

Train on 799442 samples, validate on 200767 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x1f251f160>

### MF model with bias

$$(u,m)\to (e_u,e_m,b_u,b_m)\to e_u\cdot e_m+b_u+b_m$$
where
$$e_u,e_m\in R^{n_{factor}},b_u,b_m\in R$$

MF + bias + sigmoid output

$$(u,m)\to (e_u,e_m,b_u,b_m)\to 5\sigma(e_u\cdot e_m+b_u+b_m)\in [0,5]$$

##### Unlike other MF models, I don't treat non-existing ratings as zeros!

In [175]:
def get_mfb_model(n_factors = 10):
    
    reg = 1e-3
    
    u_input = Input(shape = (1,))
    u_emb = Embedding(input_dim=n_users, 
                      output_dim=n_factors, 
                      input_length=1, 
                      embeddings_regularizer=regularizers.l2(reg))(u_input)

    m_input = Input(shape = (1,))
    m_emb = Embedding(input_dim=n_movies, 
                      output_dim=n_factors, 
                      input_length=1,
                      embeddings_regularizer=regularizers.l2(reg))(m_input)
        
    u_b = Embedding(input_dim=n_users, output_dim=1, input_length=1)(u_input)
    m_b = Embedding(input_dim=n_movies, output_dim=1, input_length=1)(m_input)
    
    dot = Flatten()(Dot(axes=2)([u_emb,m_emb]))
    
    out = Flatten()(Add()([dot,u_b,m_b]))

    out = Activation('sigmoid')(out)
    out = Lambda(lambda x:5*x)(out)

    model = Model(inputs=[u_input,m_input], outputs=out)
    
    return model

In [176]:
mfb_model = get_mfb_model(50)

In [177]:
#mfb_model.compile(optimizer='adam', loss='mse')
mfb_model.compile(optimizer=Adam(0.001), loss='mse')

In [178]:
mfb_model.predict([trn[:10,0],trn[:10,1]])

array([[ 2.48019314],
       [ 2.47084451],
       [ 2.50200295],
       [ 2.51603889],
       [ 2.52446055],
       [ 2.5035429 ],
       [ 2.48992205],
       [ 2.46621227],
       [ 2.45354271],
       [ 2.4552393 ]], dtype=float32)

In [179]:
mfb_model.fit([trn[:,0],trn[:,1]],trn[:,2],
             validation_data=([val[:,0],val[:,1]],val[:,2]),
             epochs = 6, batch_size =64)

Train on 800697 samples, validate on 199512 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x13d26f940>

Model 1: MF + bias

```
n_factor =10

Train on 799442 samples, validate on 200767 samples
Epoch 1/6
799442/799442 [==============================] - 39s 49us/step - loss: 1.1622 - val_loss: 0.8879
Epoch 2/6
799442/799442 [==============================] - 38s 48us/step - loss: 0.8572 - val_loss: 0.8529
Epoch 3/6
799442/799442 [==============================] - 37s 46us/step - loss: 0.8264 - val_loss: 0.8249
Epoch 4/6
799442/799442 [==============================] - 35s 44us/step - loss: 0.7968 - val_loss: 0.8055
Epoch 5/6
799442/799442 [==============================] - 36s 44us/step - loss: 0.7725 - val_loss: 0.7937
Epoch 6/6
799442/799442 [==============================] - 35s 44us/step - loss: 0.7494 - val_loss: 0.7823
```

Model 2: MF + bias, n_factor =50, overfitting
```
Train on 800697 samples, validate on 199512 samples
Epoch 1/6
800697/800697 [==============================] - 118s 148us/step - loss: 2.6856 - val_loss: 0.8870
Epoch 2/6
800697/800697 [==============================] - 118s 147us/step - loss: 0.8365 - val_loss: 0.8287
Epoch 3/6
800697/800697 [==============================] - 116s 145us/step - loss: 0.7556 - val_loss: 0.7949
Epoch 4/6
800697/800697 [==============================] - 117s 146us/step - loss: 0.6772 - val_loss: 0.7861
Epoch 5/6
800697/800697 [==============================] - 113s 141us/step - loss: 0.6015 - val_loss: 0.7958
Epoch 6/6
800697/800697 [==============================] - 113s 142us/step - loss: 0.5394 - val_loss: 0.8215
```

Model 3: MF + bias, n_factor =50, regularized (l2, 1e-5)
```
Train on 800697 samples, validate on 199512 samples
Epoch 1/6
800697/800697 [==============================] - 150s 187us/step - loss: 5.6244 - val_loss: 1.6054
Epoch 2/6
800697/800697 [==============================] - 145s 182us/step - loss: 1.1624 - val_loss: 0.9754
Epoch 3/6
800697/800697 [==============================] - 139s 174us/step - loss: 0.9061 - val_loss: 0.8804
Epoch 4/6
800697/800697 [==============================] - 146s 182us/step - loss: 0.8504 - val_loss: 0.8518
Epoch 5/6
800697/800697 [==============================] - 138s 173us/step - loss: 0.8315 - val_loss: 0.8408
Epoch 6/6
800697/800697 [==============================] - 146s 182us/step - loss: 0.8237 - val_loss: 0.8361
```

##### Model 4: MF + bias + sigmoid output
```
n_factor = 10

Train on 800697 samples, validate on 199512 samples
Epoch 1/6
800697/800697 [==============================] - 34s 43us/step - loss: 1.0465 - val_loss: 0.8017
Epoch 2/6
800697/800697 [==============================] - 35s 44us/step - loss: 0.7645 - val_loss: 0.7705
Epoch 3/6
800697/800697 [==============================] - 39s 49us/step - loss: 0.7237 - val_loss: 0.7541
Epoch 4/6
800697/800697 [==============================] - 44s 55us/step - loss: 0.6893 - val_loss: 0.7465
Epoch 5/6
800697/800697 [==============================] - 46s 58us/step - loss: 0.6629 - val_loss: 0.7468
Epoch 6/6
800697/800697 [==============================] - 45s 56us/step - loss: 0.6448 - val_loss: 0.7490

<keras.callbacks.History at 0x11b3a69e8>
```
##### Sigmoid output improves 4%!

Model 5: MF + bias + sigmoid output, n_factor = 50, overfitting
```
Train on 800697 samples, validate on 199512 samples
Epoch 1/6
800697/800697 [==============================] - 126s 157us/step - loss: 1.0027 - val_loss: 0.7844
Epoch 2/6
800697/800697 [==============================] - 125s 156us/step - loss: 0.6991 - val_loss: 0.7445
Epoch 3/6
800697/800697 [==============================] - 117s 146us/step - loss: 0.5832 - val_loss: 0.7610
Epoch 4/6
800697/800697 [==============================] - 114s 142us/step - loss: 0.4992 - val_loss: 0.7998
Epoch 5/6
800697/800697 [==============================] - 112s 140us/step - loss: 0.4464 - val_loss: 0.8421
Epoch 6/6
800697/800697 [==============================] - 116s 145us/step - loss: 0.4123 - val_loss: 0.8784

<keras.callbacks.History at 0x12417f978>
```


### NN model

linear model
$$(u,m)\to (e_u,e_m)\to We_u+W'e_m\in R$$
where the weights $W,W'$ are independent of $u$ and $m.$

DNN
$$(u,m)\to (e_u,e_m)\to DNN(e_u,e_m)\in R$$

In [24]:
def get_linear_model(n_factors=10):
    in_u = Input(shape=(1,))
    in_m = Input(shape=(1,))
    
    e_u = Embedding(input_dim=n_users, output_dim=n_factors)(in_u)
    e_m = Embedding(input_dim=n_movies, output_dim=n_factors)(in_m)
    
    x = Flatten()(Concatenate()([e_u,e_m]))
    
    x = Dense(units=1)(x)
    
    return Model(inputs=[in_u,in_m], outputs=x)

In [25]:
linear_model = get_linear_model(50)

In [26]:
linear_model.compile(optimizer='adam', loss='mse')

In [188]:
linear_model.predict([trn[:10,0],trn[:10,1]])

array([[ 0.01178968],
       [ 0.00800661],
       [ 0.05860475],
       [ 0.04867204],
       [ 0.08399892],
       [-0.00301455],
       [ 0.06716891],
       [ 0.06247176],
       [ 0.00345044],
       [ 0.01526604]], dtype=float32)

In [27]:
linear_model.fit([trn[:,0],trn[:,1]],trn[:,2],
            validation_data=([val[:,0],val[:,1]],val[:,2]),
            epochs=6, batch_size = 64)

Train on 799629 samples, validate on 200580 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x12c136860>

linear model
```
n_factor =10

Train on 799442 samples, validate on 200767 samples
Epoch 1/6
799442/799442 [==============================] - 37s 46us/step - loss: 1.2824 - val_loss: 0.8614
Epoch 2/6
799442/799442 [==============================] - 36s 45us/step - loss: 0.8466 - val_loss: 0.8438
Epoch 3/6
799442/799442 [==============================] - 39s 48us/step - loss: 0.8343 - val_loss: 0.8383
Epoch 4/6
799442/799442 [==============================] - 35s 44us/step - loss: 0.8285 - val_loss: 0.8345
Epoch 5/6
799442/799442 [==============================] - 36s 45us/step - loss: 0.8250 - val_loss: 0.8329
Epoch 6/6
799442/799442 [==============================] - 36s 45us/step - loss: 0.8225 - val_loss: 0.8317
```

```
n_factor=50

Train on 799629 samples, validate on 200580 samples
Epoch 1/6
799629/799629 [==============================] - 102s 128us/step - loss: 1.1326 - val_loss: 0.8708
Epoch 2/6
799629/799629 [==============================] - 101s 126us/step - loss: 0.8484 - val_loss: 0.8476
Epoch 3/6
799629/799629 [==============================] - 106s 132us/step - loss: 0.8340 - val_loss: 0.8454
Epoch 4/6
799629/799629 [==============================] - 102s 127us/step - loss: 0.8281 - val_loss: 0.8377
Epoch 5/6
799629/799629 [==============================] - 106s 132us/step - loss: 0.8246 - val_loss: 0.8351
Epoch 6/6
799629/799629 [==============================] - 123s 154us/step - loss: 0.8227 - val_loss: 0.8327
```

nn model
$$(u,m)\to (e_u,e_m)\to^{MLP} output$$

In [180]:
def get_nn_model(n_factors=10):
    in_u = Input(shape=(1,))
    in_m = Input(shape=(1,))
    
    e_u = Embedding(input_dim=n_users, output_dim=n_factors)(in_u)
    e_m = Embedding(input_dim=n_movies, output_dim=n_factors)(in_m)
    
    x = Flatten()(Concatenate()([e_u,e_m]))
    
    x= Dense(units=int((2 * n_factors) *0.75), activation='relu')(x)
    x= Dropout(0.4)(x)
#    x= Dense(int((2*n_factors) *0.2), activation='relu')(x)    
    
    x = Dense(units=1)(x)
    
    return Model(inputs=[in_u,in_m], outputs=x)

In [181]:
nn_model = get_nn_model(50)

In [182]:
nn_model.compile(optimizer='adam', loss='mse')

In [183]:
nn_model.predict([trn[:10,0],trn[:10,1]])

array([[ 0.03734574],
       [ 0.04256944],
       [ 0.00699022],
       [ 0.04564754],
       [ 0.04943538],
       [ 0.04825059],
       [ 0.03127331],
       [ 0.02851982],
       [ 0.0427456 ],
       [ 0.00544166]], dtype=float32)

In [184]:
nn_model.fit([trn[:,0],trn[:,1]],trn[:,2],
            validation_data=([val[:,0],val[:,1]],val[:,2]),
            epochs=6, batch_size=64)

Train on 800697 samples, validate on 199512 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x13e41b048>

Two-layer model

Model 1

```
n_factor =10

x = Flatten()(Concatenate()([e_u,e_m]))
x= Dense(units=int((2 * n_factors) *3 /4), activation='relu')(x)
x= Dropout(0.4)(x)
x = Dense(units=1)(x)

Train on 799442 samples, validate on 200767 samples
Epoch 1/6
799442/799442 [==============================] - 42s 53us/step - loss: 1.3456 - val_loss: 0.8327
Epoch 2/6
799442/799442 [==============================] - 48s 60us/step - loss: 0.8526 - val_loss: 0.8213
Epoch 3/6
799442/799442 [==============================] - 42s 53us/step - loss: 0.8411 - val_loss: 0.8149
Epoch 4/6
799442/799442 [==============================] - 39s 49us/step - loss: 0.8328 - val_loss: 0.8101
Epoch 5/6
799442/799442 [==============================] - 44s 56us/step - loss: 0.8285 - val_loss: 0.8083
Epoch 6/6
799442/799442 [==============================] - 42s 53us/step - loss: 0.8258 - val_loss: 0.8078
```

Model 2
```
n_factor=50

Same architecture as model 1

Train on 800697 samples, validate on 199512 samples
Epoch 1/6
800697/800697 [==============================] - 121s 152us/step - loss: 1.0590 - val_loss: 0.8328
Epoch 2/6
800697/800697 [==============================] - 118s 147us/step - loss: 0.8299 - val_loss: 0.8071
Epoch 3/6
800697/800697 [==============================] - 127s 159us/step - loss: 0.8003 - val_loss: 0.7891
Epoch 4/6
800697/800697 [==============================] - 121s 151us/step - loss: 0.7843 - val_loss: 0.7827
Epoch 5/6
800697/800697 [==============================] - 121s 151us/step - loss: 0.7749 - val_loss: 0.7780
Epoch 6/6
800697/800697 [==============================] - 121s 151us/step - loss: 0.7682 - val_loss: 0.7754
```

Model 3
```
n_factor =10

x = Flatten()(Concatenate()([e_u,e_m]))
x = Dropout(0.3)(x)
x= Dense(units=int((2 * n_factors) *7 /10), activation='relu')(x)
x = Dropout(0.75)(x)    
x = Dense(units=1)(x)

Train on 799442 samples, validate on 200767 samples
Epoch 1/3
799442/799442 [==============================] - 47s 59us/step - loss: 1.7085 - val_loss: 0.8877
Epoch 2/3
799442/799442 [==============================] - 45s 56us/step - loss: 0.9742 - val_loss: 0.8830
Epoch 3/3
799442/799442 [==============================] - 45s 56us/step - loss: 0.9638 - val_loss: 0.8638
```

Model 4
```
n_factor =50

Same architecture as model 3

Train on 799629 samples, validate on 200580 samples
Epoch 1/3
799629/799629 [==============================] - 114s 143us/step - loss: 1.2233 - val_loss: 0.8412
Epoch 2/3
799629/799629 [==============================] - 109s 136us/step - loss: 0.8714 - val_loss: 0.8244
Epoch 3/3
799629/799629 [==============================] - 113s 141us/step - loss: 0.8599 - val_loss: 0.8223
```


3-layer model

Model 1
```
n_factor=10

x = Flatten()(Concatenate()([e_u,e_m]))
x= Dense(units=int((2 * n_factors) *0.75), activation='relu')(x)
x= Dropout(0.7)(x)
x= Dense(int((2*n_factors) *0.3), activation='relu')(x)
x= Dropout(0.7)(x)    
x = Dense(units=1)(x)

Train on 799629 samples, validate on 200580 samples
Epoch 1/6
799629/799629 [==============================] - 39s 49us/step - loss: 2.0640 - val_loss: 1.0601
Epoch 2/6
799629/799629 [==============================] - 39s 49us/step - loss: 1.1302 - val_loss: 1.0580
Epoch 3/6
799629/799629 [==============================] - 39s 49us/step - loss: 1.1178 - val_loss: 0.9833
Epoch 4/6
799629/799629 [==============================] - 40s 50us/step - loss: 1.0784 - val_loss: 0.9851
Epoch 5/6
799629/799629 [==============================] - 41s 51us/step - loss: 1.0762 - val_loss: 0.9842
Epoch 6/6
799629/799629 [==============================] - 38s 48us/step - loss: 1.0753 - val_loss: 0.9833
```

Model 2

```
n_factor=50

Train on 799629 samples, validate on 200580 samples
Epoch 1/3
799629/799629 [==============================] - 116s 145us/step - loss: 1.3015 - val_loss: 0.8801
Epoch 2/3
799629/799629 [==============================] - 115s 144us/step - loss: 0.9299 - val_loss: 0.8609
Epoch 3/3
799629/799629 [==============================] - 113s 141us/step - loss: 0.9112 - val_loss: 0.8452
```

Model 3: as an extension of model 1 in two-layer models
```
n_factor=10

    x = Flatten()(Concatenate()([e_u,e_m]))
    x= Dense(units=int((2 * n_factors) *0.75), activation='relu')(x)
    x= Dropout(0.4)(x)
    x= Dense(int((2*n_factors) *0.2), activation='relu')(x)    
    x = Dense(units=1)(x)
    
Train on 799629 samples, validate on 200580 samples
Epoch 1/6
799629/799629 [==============================] - 39s 49us/step - loss: 1.0733 - val_loss: 0.8548
Epoch 2/6
799629/799629 [==============================] - 38s 47us/step - loss: 0.8327 - val_loss: 0.8428
Epoch 3/6
799629/799629 [==============================] - 39s 49us/step - loss: 0.8184 - val_loss: 0.8414
Epoch 4/6
799629/799629 [==============================] - 39s 49us/step - loss: 0.8100 - val_loss: 0.8302
Epoch 5/6
799629/799629 [==============================] - 39s 48us/step - loss: 0.8035 - val_loss: 0.8424
Epoch 6/6
799629/799629 [==============================] - 37s 46us/step - loss: 0.7974 - val_loss: 0.8293
```

Model 5: adding batch norm
```
n_factor =10

    x = Flatten()(Concatenate()([e_u,e_m]))    
    x= Dense(units=int((2 * n_factors) *0.75), activation='relu')(x)
    x= Dropout(0.4)(x)
    x= BatchNormalization()(x)
    x= Dense(int((2*n_factors) *0.2), activation='relu')(x)
    x= BatchNormalization()(x)    
    x = Dense(units=1)(x)
    
Train on 799629 samples, validate on 200580 samples
Epoch 1/6
799629/799629 [==============================] - 55s 69us/step - loss: 1.2542 - val_loss: 0.8618
Epoch 2/6
799629/799629 [==============================] - 52s 65us/step - loss: 0.8678 - val_loss: 0.8621
Epoch 3/6
799629/799629 [==============================] - 51s 64us/step - loss: 0.8469 - val_loss: 0.8409
Epoch 4/6
799629/799629 [==============================] - 53s 66us/step - loss: 0.8335 - val_loss: 0.8379
Epoch 5/6
799629/799629 [==============================] - 52s 65us/step - loss: 0.8278 - val_loss: 0.8402
Epoch 6/6
799629/799629 [==============================] - 52s 65us/step - loss: 0.8238 - val_loss: 0.8340
```

Model 6
```
n_factor =50

Train on 799629 samples, validate on 200580 samples
Epoch 1/6
799629/799629 [==============================] - 137s 172us/step - loss: 1.0966 - val_loss: 0.8391
Epoch 2/6
799629/799629 [==============================] - 149s 186us/step - loss: 0.8313 - val_loss: 0.8144
Epoch 3/6
799629/799629 [==============================] - 141s 177us/step - loss: 0.8030 - val_loss: 0.7940
Epoch 4/6
799629/799629 [==============================] - 151s 189us/step - loss: 0.7845 - val_loss: 0.7869
Epoch 5/6
799629/799629 [==============================] - 161s 201us/step - loss: 0.7722 - val_loss: 0.7855
Epoch 6/6
799629/799629 [==============================] - 154s 193us/step - loss: 0.7637 - val_loss: 0.7901
```

### todo: NN model with residual block

### Mixed model 1: MF+NN modeling of bias

linear
$$(u,m)\to (e_u,e_m,b_u,b_m)\to e_u\cdot e_m + Wb_u+W'b_m$$

DNN
$$(u,m)\to (e_u,e_m,b_u,b_m)\to e_u\cdot e_m + DNN(b_u,b_m)$$

In [193]:
def get_mixed_model(e_factors = 10, b_factors = 10):
    
    u_input = Input(shape = (1,))
    m_input = Input(shape = (1,))
    
    u_emb = Embedding(input_dim=n_users, output_dim=e_factors)(u_input)
    m_emb = Embedding(input_dim=n_movies, output_dim=e_factors)(m_input)
    dot = Flatten()(Dot(axes=2)([u_emb,m_emb]))
    
    bu = Embedding(input_dim=n_users, output_dim=b_factors)(u_input)
    bm = Embedding(input_dim=n_movies, output_dim=b_factors)(m_input)
#    bias = Dense(int((2*b_factors) *0.75), 
#                 kernel_regularizer=regularizers.l2(0.01),
#                bias_regularizer=regularizers.l2(0.01))(Concatenate()([bu,bm]))
    bias = Dense(int((2*b_factors) *0.75))(Concatenate()([bu,bm]))
    bias = Dropout(0.4)(bias)
    bias = Dense(1)(bias)
    
    out = Flatten()(Add()([dot,bias]))
    out = Lambda(lambda x:5*x)(Activation('sigmoid')(out))

    model = Model(inputs=[u_input,m_input], outputs=out)
    
    return model

In [194]:
mixed_model = get_mixed_model(e_factors=10, b_factors=50)
mixed_model.compile(optimizer='adam', loss='mse')
mixed_model.predict([trn[:10,0], trn[:10,1]])

array([[ 2.49259472],
       [ 2.44761515],
       [ 2.45221663],
       [ 2.50785089],
       [ 2.52457094],
       [ 2.44535422],
       [ 2.49942732],
       [ 2.510396  ],
       [ 2.44880247],
       [ 2.46320534]], dtype=float32)

In [195]:
mixed_model.fit([trn[:,0],trn[:,1]],trn[:,2],
               validation_data=([val[:,0],val[:,1]],val[:,2]),
               epochs=6, batch_size=64)

Train on 800697 samples, validate on 199512 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x13f2864a8>

Model 1: linear
```
e_factor = b_factor =10

Train on 800697 samples, validate on 199512 samples
Epoch 1/6
800697/800697 [==============================] - 51s 64us/step - loss: 1.2682 - val_loss: 0.8255
Epoch 2/6
800697/800697 [==============================] - 49s 61us/step - loss: 0.7698 - val_loss: 0.7734
Epoch 3/6
800697/800697 [==============================] - 43s 54us/step - loss: 0.7064 - val_loss: 0.7596
Epoch 4/6
800697/800697 [==============================] - 43s 54us/step - loss: 0.6718 - val_loss: 0.7575
Epoch 5/6
800697/800697 [==============================] - 44s 56us/step - loss: 0.6516 - val_loss: 0.7581
Epoch 6/6
800697/800697 [==============================] - 44s 55us/step - loss: 0.6385 - val_loss: 0.7594
```

Model 2: linear + sigmoid output
```
e_factor = b_fac =10

Train on 800697 samples, validate on 199512 samples
Epoch 1/6
800697/800697 [==============================] - 52s 66us/step - loss: 0.8753 - val_loss: 0.8059
Epoch 2/6
800697/800697 [==============================] - 52s 64us/step - loss: 0.7571 - val_loss: 0.7720
Epoch 3/6
800697/800697 [==============================] - 53s 67us/step - loss: 0.7025 - val_loss: 0.7621
Epoch 4/6
800697/800697 [==============================] - 42s 53us/step - loss: 0.6698 - val_loss: 0.7616
Epoch 5/6
800697/800697 [==============================] - 46s 57us/step - loss: 0.6491 - val_loss: 0.7635
Epoch 6/6
800697/800697 [==============================] - 45s 56us/step - loss: 0.6353 - val_loss: 0.7645

<keras.callbacks.History at 0x11d916e10>
```

Model 3: DNN + sigmoid
```
e_factor= b_fac =10

Train on 800697 samples, validate on 199512 samples
Epoch 1/6
800697/800697 [==============================] - 61s 77us/step - loss: 0.8847 - val_loss: 0.8075
Epoch 2/6
800697/800697 [==============================] - 58s 72us/step - loss: 0.7677 - val_loss: 0.7670
Epoch 3/6
800697/800697 [==============================] - 60s 75us/step - loss: 0.7085 - val_loss: 0.7566
Epoch 4/6
800697/800697 [==============================] - 61s 76us/step - loss: 0.6764 - val_loss: 0.7578
Epoch 5/6
800697/800697 [==============================] - 61s 76us/step - loss: 0.6566 - val_loss: 0.7610
Epoch 6/6
800697/800697 [==============================] - 64s 80us/step - loss: 0.6430 - val_loss: 0.7617

<keras.callbacks.History at 0x11dcb6f28>
```

Model 4: DNN + sigmoid
```
e_factor=50,  b_fac =10

Greatly overfitting even with regularzation!

Train on 800697 samples, validate on 199512 samples
Epoch 1/6
800697/800697 [==============================] - 138s 172us/step - loss: 0.8801 - val_loss: 0.7802
Epoch 2/6
800697/800697 [==============================] - 138s 172us/step - loss: 0.6779 - val_loss: 0.7570
Epoch 3/6
800697/800697 [==============================] - 140s 175us/step - loss: 0.5520 - val_loss: 0.7897
Epoch 4/6
800697/800697 [==============================] - 137s 171us/step - loss: 0.4784 - val_loss: 0.8364
Epoch 5/6
800697/800697 [==============================] - 138s 172us/step - loss: 0.4349 - val_loss: 0.8783
Epoch 6/6
800697/800697 [==============================] - 143s 178us/step - loss: 0.4059 - val_loss: 0.9133

<keras.callbacks.History at 0x12108fc88>
```

### Mixed model 2: MF + NN modeling of interaction

linear model
$$(u,m)\to (e_u,e_m,b_u,b_m)\to We_u\cdot W'e_m+b_u+b_m$$

DNN
$$(u,m)\to (e_u,e_m,b_u,b_m)\to DNN(e_u)\cdot DNN'(e_m)+b_u+b_m$$

In [39]:
def get_mixed_nn_interaction(n_factors = 10):
    
    u_input = Input(shape = (1,))
    u_emb = Embedding(input_dim=n_users, output_dim=n_factors)(u_input)
    m_input = Input(shape = (1,))
    m_emb = Embedding(input_dim=n_movies, output_dim=n_factors)(m_input)
    
    out_u= Dense(int(n_factors *0.75))(u_emb)
    out_m= Dense(int(n_factors *0.75))(m_emb)
    dot = Flatten()(Dot(axes=2)([out_u,out_m]))
    
    u_b = Embedding(input_dim=n_users, output_dim=1)(u_input)
    m_b = Embedding(input_dim=n_movies, output_dim=1)(m_input)
    
    out = Flatten()(Add()([dot,u_b,m_b]))
    
    out = Activation('sigmoid')(out)
    out = Lambda(lambda x:5*x)(out)

    model = Model(inputs=[u_input,m_input], outputs=out)
    
    return model

In [44]:
nn_interaction_model = get_mixed_nn_interaction(n_factors=50)
nn_interaction_model.compile(optimizer='adam', loss='mse')
nn_interaction_model.predict([trn[:10,0], trn[:10,1]])

array([[ 2.40735555],
       [ 2.48720074],
       [ 2.4142499 ],
       [ 2.43169713],
       [ 2.45257306],
       [ 2.45953631],
       [ 2.45998096],
       [ 2.46519303],
       [ 2.41099477],
       [ 2.50233197]], dtype=float32)

In [45]:
nn_interaction_model.fit([trn[:,0],trn[:,1]],trn[:,2],
               validation_data=([val[:,0],val[:,1]],val[:,2]),
               epochs=6, batch_size=64)

Train on 800697 samples, validate on 199512 samples
Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


<keras.callbacks.History at 0x12206dba8>

Model 1: linear, n_factor =10
```
Train on 800697 samples, validate on 199512 samples
Epoch 1/6
800697/800697 [==============================] - 49s 62us/step - loss: 0.8666 - val_loss: 0.8097
Epoch 2/6
800697/800697 [==============================] - 42s 53us/step - loss: 0.7823 - val_loss: 0.7807
Epoch 3/6
800697/800697 [==============================] - 43s 54us/step - loss: 0.7541 - val_loss: 0.7710
Epoch 4/6
800697/800697 [==============================] - 47s 59us/step - loss: 0.7328 - val_loss: 0.7626
Epoch 5/6
800697/800697 [==============================] - 45s 56us/step - loss: 0.7113 - val_loss: 0.7565
Epoch 6/6
800697/800697 [==============================] - 46s 57us/step - loss: 0.6962 - val_loss: 0.7502

<keras.callbacks.History at 0x120b8e080>
```

Model 2: linear, n_factor =50, overfitting
```
Train on 800697 samples, validate on 199512 samples
Epoch 1/6
800697/800697 [==============================] - 130s 163us/step - loss: 0.8498 - val_loss: 0.7934
Epoch 2/6
800697/800697 [==============================] - 125s 156us/step - loss: 0.7452 - val_loss: 0.7635
Epoch 3/6
800697/800697 [==============================] - 126s 157us/step - loss: 0.6724 - val_loss: 0.7634
Epoch 4/6
800697/800697 [==============================] - 131s 164us/step - loss: 0.6152 - val_loss: 0.7709
Epoch 5/6
800697/800697 [==============================] - 123s 154us/step - loss: 0.5718 - val_loss: 0.7900
Epoch 6/6
800697/800697 [==============================] - 118s 147us/step - loss: 0.5391 - val_loss: 0.8056

<keras.callbacks.History at 0x12206dba8>
```

# Temporal information