In [1]:
import numpy as np
import pandas as pd
import keras
import gc
from keras.layers import Input, Embedding, Flatten, Dot, Add
from keras.models import Model
from sklearn.preprocessing import LabelEncoder

Using TensorFlow backend.


In [2]:
lb=LabelEncoder()

In [3]:
def LabelEncoding_Cat(df):
    df=df.copy()
    mappingTable = {}
    Cat_Var=[i for i in df.columns.tolist() if i != 'Rating']
    for col in Cat_Var:
        mappingTable[col]={}
        rowDist = df[col].unique()
        df[col]=lb.fit_transform(df[col].astype('str'))
        idx = df[col].unique()
        for i, j in zip(idx, rowDist):
            mappingTable[col][j] = i 
        print('Transfrom ', col)
    return df ,mappingTable

In [4]:
train_df = pd.read_csv('train.csv')

In [5]:
del train_df['TrainDataID']
gc.collect()
train_df.head()

Unnamed: 0,UserID,MovieID,Rating
0,796,1193,5
1,796,661,3
2,796,914,3
3,796,3408,4
4,796,2355,5


In [6]:
train_df, mapping = LabelEncoding_Cat(train_df)

Transfrom  UserID
Transfrom  MovieID


In [7]:
train_df.head()

Unnamed: 0,UserID,MovieID,Rating
0,5814,189,5
1,5814,3359,3
2,5814,3597,3
3,5814,2495,4
4,5814,1370,5


In [8]:
train = train_df.values
np.random.shuffle(train)

In [9]:
train_df.UserID.nunique(),train_df.MovieID.nunique()

(6040, 3688)

In [10]:
# Use Tensorflow tensor
user_input = Input(shape=(1,),name='User')
movie_input = Input(shape=(1,),name='Movie')

In [11]:
user_vec = Embedding(train_df.UserID.nunique(), 8, embeddings_initializer='random_normal',name='Wu')(user_input)
user_vec = Flatten()(user_vec)
movie_vec = Embedding(train_df.MovieID.nunique(), 8,embeddings_initializer='random_normal',name='Wm')(movie_input)
movie_vec = Flatten()(movie_vec)

In [12]:
movie_bias = Embedding(train_df.MovieID.nunique(), 1, embeddings_initializer='zeros',name='Bm')(movie_input)
movie_bias = Flatten()(movie_bias)
user_bias = Embedding(train_df.UserID.nunique(), 1, embeddings_initializer='zeros',name='Bu')(user_input)
user_bias = Flatten()(user_bias)

In [13]:
r_hat = Dot(axes=1)([user_vec, movie_vec])
r_hat = Add()([r_hat, user_bias])
r_hat = Add()([r_hat, movie_bias])
model = Model([user_input, movie_input], r_hat)
model.compile(loss='mse',optimizer='sgd')

In [14]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
User (InputLayer)               (None, 1)            0                                            
__________________________________________________________________________________________________
Movie (InputLayer)              (None, 1)            0                                            
__________________________________________________________________________________________________
Wu (Embedding)                  (None, 1, 8)         48320       User[0][0]                       
__________________________________________________________________________________________________
Wm (Embedding)                  (None, 1, 8)         29504       Movie[0][0]                      
__________________________________________________________________________________________________
flatten_1 

In [15]:
output = model.fit([train[:, 0], train[:, 1]], train[:, 2], batch_size=64, epochs=10, verbose=2)

Epoch 1/10
 - 18s - loss: 10.5055
Epoch 2/10
 - 18s - loss: 6.2582
Epoch 3/10
 - 20s - loss: 4.2742
Epoch 4/10
 - 17s - loss: 3.2426
Epoch 5/10
 - 16s - loss: 2.6533
Epoch 6/10
 - 16s - loss: 2.2873
Epoch 7/10
 - 14s - loss: 2.0429
Epoch 8/10
 - 14s - loss: 1.8692
Epoch 9/10
 - 14s - loss: 1.7394
Epoch 10/10
 - 14s - loss: 1.6383


In [16]:
model.layers

[<keras.engine.topology.InputLayer at 0x11defe9e8>,
 <keras.engine.topology.InputLayer at 0x11defe710>,
 <keras.layers.embeddings.Embedding at 0x11defe9b0>,
 <keras.layers.embeddings.Embedding at 0x11c291c50>,
 <keras.layers.core.Flatten at 0x1101504a8>,
 <keras.layers.core.Flatten at 0x11def8400>,
 <keras.layers.embeddings.Embedding at 0x11defee48>,
 <keras.layers.merge.Dot at 0x11ded2940>,
 <keras.layers.core.Flatten at 0x11dedc6a0>,
 <keras.layers.embeddings.Embedding at 0x11dee8400>,
 <keras.layers.merge.Add at 0x11ded2358>,
 <keras.layers.core.Flatten at 0x11dee8e80>,
 <keras.layers.merge.Add at 0x11ded9cf8>]

In [17]:
uesr_embedding = model.layers[2].get_weights()[0]
movie_embedding = model.layers[3].get_weights()[0]

In [18]:
uesr_embedding.shape, movie_embedding.shape

((6040, 8), (3688, 8))

In [19]:
uesr_embedding

array([[-0.04936188,  0.00449077,  0.02021181, ...,  0.06525329,
        -0.02020228, -0.00696586],
       [-0.03776457,  0.03246533, -0.11356646, ..., -0.04326073,
         0.0508648 ,  0.11466761],
       [-0.01180263, -0.04633947, -0.03847113, ..., -0.07450321,
        -0.01714919, -0.06017509],
       ...,
       [ 0.04125899, -0.10075564,  0.03646569, ...,  0.04375195,
        -0.01770937,  0.01858334],
       [-0.04973083,  0.0032155 , -0.04741767, ..., -0.00476364,
         0.00203702,  0.05242538],
       [ 0.06710503, -0.05897513, -0.00228288, ...,  0.03370593,
        -0.00981954, -0.0022861 ]], dtype=float32)

In [20]:
movie_embedding

array([[-0.07829513,  0.00616422,  0.00962315, ...,  0.01688451,
         0.06251603,  0.00622967],
       [-0.02567048,  0.05743589, -0.03408841, ..., -0.06112809,
        -0.01131272, -0.09059291],
       [-0.05518647, -0.03821751, -0.09248856, ..., -0.03937665,
         0.01054341,  0.00682454],
       ...,
       [ 0.07430061,  0.07331047,  0.05453219, ...,  0.01108829,
        -0.00013804, -0.07382252],
       [-0.01174863, -0.040372  ,  0.02966973, ..., -0.04910225,
        -0.03804779, -0.02898551],
       [ 0.00516372, -0.09503576,  0.02831763, ...,  0.04336522,
         0.02344754, -0.03695222]], dtype=float32)

In [21]:
y_hat = model.predict([train[:, 0], train[:, 1]])

In [24]:
y_hat

array([[4.110096  ],
       [2.9542813 ],
       [0.82525826],
       ...,
       [3.782424  ],
       [0.93773264],
       [2.4136903 ]], dtype=float32)