In [1]:
import pandas as pd
import numpy as np

from keras import *
from keras.layers import Embedding, Input,concatenate, Flatten,dot, Dense
from keras.models import Model
from keras.optimizers import Adam

from numpy.random import seed
seed(51)
from tensorflow import set_random_seed
set_random_seed(22)


import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"] = ""

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  return f(*args, **kwds)


In [10]:
vocab_size=5  #we have 5 words in our vocabulary (0,1,2,3,4) -- generally think of this as 5 unqiue tokens
             #(e.g. words, symbols, user IDs, Movie IDs, Product IDs )
embedding_size=3 #there are three latent factors that describe our words


embedding_layer = Embedding(output_dim=embedding_size, \
                            input_dim=vocab_size, \
                            input_length=1,\
                            mask_zero=True)

x = Input(shape=[1])
embedding = embedding_layer(x)
model = Model(inputs=x, outputs=embedding)

print(model.summary())
print("")
print(" ")
print("-----------------------------------------------------------------")
print("-----------------------------------------------------------------")

print("Input shape: ", model.input_shape) #this "model" inputs a single number
print("Output shape: ", model.output_shape) #this "model" exports a length (embedding_size) vector

print("Weight Matrix shape: ", np.array(model.get_weights()).shape) #shape of the embedding matrix is
                                                                    #(1, vocab_size, embedding_size)
print(" ")
print(" ")
model.get_weights() #The embedding weights


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 1)                 0         
_________________________________________________________________
embedding_4 (Embedding)      (None, 1, 3)              15        
Total params: 15
Trainable params: 15
Non-trainable params: 0
_________________________________________________________________
None

 
-----------------------------------------------------------------
-----------------------------------------------------------------
Input shape:  (None, 1)
Output shape:  (None, 1, 3)
Weight Matrix shape:  (1, 5, 3)
 
 


[array([[-0.02434868,  0.02641512,  0.02833296],
        [-0.0264437 , -0.04659697,  0.00371159],
        [ 0.02535654, -0.01593039,  0.0152105 ],
        [-0.04532808, -0.006714  , -0.02591713],
        [-0.00216939,  0.04102891, -0.01888945]], dtype=float32)]

In [12]:
#simply a lookup

X = np.array(([2],[2],[1])) #3rd, 3rd, 2nd words
model.predict(X)

array([[[ 0.02535654, -0.01593039,  0.0152105 ]],

       [[ 0.02535654, -0.01593039,  0.0152105 ]],

       [[-0.0264437 , -0.04659697,  0.00371159]]], dtype=float32)

In [13]:
vocab_size_1=7  #User IDs?
vocab_size_2=5 #movie IDs?

embedding_size=3 #constant


embedding_layer_1 = Embedding(output_dim=embedding_size, input_dim=vocab_size_1,input_length=1, mask_zero=False)
embedding_layer_2 = Embedding(output_dim=embedding_size, input_dim=vocab_size_2,input_length=1, mask_zero=False)

userIDs = Input(shape=[1])
movieIDs = Input(shape=[1])


embedding_users = embedding_layer_1 (userIDs)
embedding_movies = embedding_layer_2(movieIDs)

x= concatenate([embedding_users,embedding_movies])
x=Flatten()(x)

model = Model(inputs=[userIDs,movieIDs], outputs=x)

print(model.summary())
print(" ")
print(" ")
print("-----------------------------------------------------------------")
print("-----------------------------------------------------------------")
print(" ")
print("Input shape: ", model.input_shape) #this "model" inputs a single number
print("Output shape: ", model.output_shape) #this "model" exports a length (embedding_size) vector

print(" ")
print("User weight Matrix shape: ", np.array(model.get_weights()[0]).shape) 
print("Movies weight Matrix shape: ", np.array(model.get_weights()[1]).shape) 

print(" ")
print("User embedding weights")
print("-----------------------------------------------------------------")
print(model.get_weights()[0])
print(" ")
print(" ")
print("Movie embedding weights")
print("-----------------------------------------------------------------")
print(model.get_weights()[1]) 


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
input_6 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_5 (Embedding)         (None, 1, 3)         21          input_5[0][0]                    
__________________________________________________________________________________________________
embedding_6 (Embedding)         (None, 1, 3)         15          input_6[0][0]                    
__________________________________________________________________________________________________
concatenat

In [14]:
X = [np.array(([1])),np.array(([2]))] #2nd UserID and 3rd Movie embeddings concatenated
model.predict(X)

array([[ 0.03142424, -0.04858527,  0.01714227, -0.01932679,  0.01362795,
         0.03613641]], dtype=float32)

In [15]:
vocab_size_1=7  #User IDs?
vocab_size_2=5 #movie IDs?

embedding_size=3 #constant


embedding_layer_1 = Embedding(output_dim=embedding_size, input_dim=vocab_size_1,input_length=1, mask_zero=False)
embedding_layer_2 = Embedding(output_dim=embedding_size, input_dim=vocab_size_2,input_length=1, mask_zero=False)

userIDs = Input(shape=[1])
movieIDs = Input(shape=[1])


embedding_users = embedding_layer_1 (userIDs)
embedding_movies = embedding_layer_2(movieIDs)

x= dot([embedding_users,embedding_movies],axes=-1)
x=Flatten()(x)

model = Model(inputs=[userIDs,movieIDs], outputs=x)

print(model.summary())
print(" ")
print(" ")
print("-----------------------------------------------------------------")
print("-----------------------------------------------------------------")
print(" ")
print("Input shape: ", model.input_shape) #this "model" inputs a single number
print("Output shape: ", model.output_shape) #this "model" exports a length (embedding_size) vector

print(" ")
print("User weight Matrix shape: ", np.array(model.get_weights()[0]).shape) 
print("Movies weight Matrix shape: ", np.array(model.get_weights()[1]).shape) 

print(" ")
print("User embedding weights")
print("-----------------------------------------------------------------")
print(model.get_weights()[0])
print(" ")
print(" ")
print("Movie embedding weights")
print("-----------------------------------------------------------------")
print(model.get_weights()[1]) 


__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_7 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
input_8 (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
embedding_7 (Embedding)         (None, 1, 3)         21          input_7[0][0]                    
__________________________________________________________________________________________________
embedding_8 (Embedding)         (None, 1, 3)         15          input_8[0][0]                    
__________________________________________________________________________________________________
dot_1 (Dot

In [16]:
X = [np.array(([1])),np.array(([2]))] #2nd UserID and 3rd Movie embeddings dot
model.predict(X)

array([[6.0641032e-05]], dtype=float32)

In [17]:
np.dot(np.array(model.get_weights()[0][1]),np.array(model.get_weights()[1][2]))

6.0641003e-05

In [None]:
from IPython.display import Image
Image("/home/jma/Desktop/march_embedding/Screenshot from 2018-01-30 10-41-23.png")