In [26]:
import keras
from keras.layers import *
from keras.models import Model
from keras.optimizers import Adam

import tensorflow as tf

import numpy as np
import pandas as pd 

In [3]:
num_users = 1000
max_len = 50
num_categories = 100
category_sequences = [
    np.random.randint(0, num_categories, np.random.randint(max_len)) 
    for _ in range(num_users)
]

In [8]:
print(category_sequences[0])

[72 87 81 90 81 20 52 21 20  2 87 61 90 71 84 25 76 41 78 98]


In [None]:
keras.layers.Embedding(num_categories, embedding_dim)

In [None]:
embedding_matrix = tf.get_variable(“embeddings”, [num_categories, embedding_dim])
embeddings = tf.nn.embedding_lookup(embedding_matrix, category_sequence_goes_here)

In [None]:
def to_fractions(sequence):
    return pd.Series(sequence).value_counts() / len(sequence)
    
fractions = pd.DataFrame([to_fractions(seq) for seq in category_sequences]).fillna(0)

In [27]:
def deep_user_multiple_sequences(input_sizes, output_sizes, embedding_sizes, depth=(100, 100)):

    # The inputs are not actually sequences! they are the distribution over sequence objects...
    inputs = [Input(shape=(s,)) for s in input_sizes]

    # Each input is then embedded into its own space (relu necessary because we feed into a linear layer)
    embeddings = [Dense(emb_size, activation='relu')(input) for emb_size, input in zip(embedding_sizes, inputs)]

    # Concat everything
    everything = concatenate(embeddings)

    # Add in additional layers
    for layer_size in depth:
        everything = Dense(layer_size, activation='relu')(everything)

    # Go to output
    outputs = [Dense(out_size, activation='softmax')(everything) for out_size in output_sizes]

    # Build, print, and return model
    model = Model(inputs=inputs, outputs=outputs)
    model.summary()
    model.compile(loss='categorical_crossentropy', optimizer=Adam(), metrics=['accuracy'])
    return model

In [28]:
my_model = deep_user_multiple_sequences(input_sizes=(100, 50), 
                                        output_sizes=(4, 3), 
                                        embedding_sizes=(100, 100))

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 100)          0                                            
__________________________________________________________________________________________________
input_2 (InputLayer)            (None, 50)           0                                            
__________________________________________________________________________________________________
dense_1 (Dense)                 (None, 100)          10100       input_1[0][0]                    
__________________________________________________________________________________________________
dense_2 (Dense)                 (None, 100)          5100        input_2[0][0]                    
__________________________________________________________________________________________________
concatenat