# Possible Experiments

1. try adding more dense layers
2. play around with learning rate and epochs
3. Using for loop when predicting
4. Don't change the loss and metrics it's gonna give error
5. Add embedding layer 
6. Add days information to the input
7. Add mask
8. Change Input and Output Sequence length

# Importing libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# Reading data

In [2]:
data = pd.read_csv('customer_wise_aggregated_data.csv')

# Processing data for sequences

In [3]:
tokenized_movies_20 = np.array([seq[:20] for seq in data['movie_ids'] if len(seq) > 20])

In [4]:
movies = keras.preprocessing.text.Tokenizer(split=',')

In [5]:
movies.fit_on_texts(data['movie_ids'])

In [6]:
tokenized_movies = movies.texts_to_sequences(data['movie_ids'])

In [7]:
tokenized_movies_with_threshold = np.array([seq[:20] for seq in tokenized_movies if len(seq) > 20])

In [8]:
tokenized_movies_with_threshold.shape

(254529, 20)

In [9]:
seq_to_txt_movies = movies.sequences_to_texts(tokenized_movies_with_threshold)

In [10]:
len(seq_to_txt_movies)

254529

In [11]:
movies_tokenizer = keras.preprocessing.text.Tokenizer()

In [12]:
movies_tokenizer.fit_on_texts(seq_to_txt_movies[:50000])

In [13]:
movies_tokenized = np.array(movies_tokenizer.texts_to_sequences(seq_to_txt_movies[:50000]))

In [14]:
movies_tokenizer.sequences_to_texts([movies_tokenized[0]])

['705 1267 1561 2095 2456 3423 723 30 157 173 329 457 1314 1428 1615 1693 2452 2782 2851 3290']

In [15]:
seq_to_txt_movies[0]

'705  1267  1561  2095  2456  3423  723  30  157  173  329  457  1314  1428  1615  1693  2452  2782  2851  3290'

In [16]:
tokenizer_vocab_size = len(movies_tokenizer.word_counts) + 1

In [17]:
tokenizer_vocab_size

4384

# Training and Target Sequences

In [18]:
training_data_X = movies_tokenized[:, :-5]

In [19]:
training_data_X.shape

(50000, 15)

In [20]:
training_data_y = movies_tokenized[:, -15:]

In [21]:
training_data_y.shape

(50000, 15)

In [22]:
training_data_X[0]

array([ 132,   81,   87,   53,  359,  865,  267,   22, 1611,  804,   46,
         28,  215,   11,   79])

In [23]:
training_data_y[0]

array([ 865,  267,   22, 1611,  804,   46,   28,  215,   11,   79,  447,
          5,   15,  550,   84])

In [24]:
def create_train_tfdata(train_feat_dict, train_target_tensor, batch_size, buffer_size=None):
    """
    Create train tf dataset for model train input
    :param train_feat_dict: dict, containing the features tensors for train data
    :param train_target_tensor: np.array(), the training TARGET tensor
    :param batch_size: (int) size of the batch to work with
    :param buffer_size: (int) Optional. Default is None. Size of the buffer
    :return: (tuple) 1st element is the training dataset,
                     2nd is the number of steps per epoch (based on batch size)
    """
    if buffer_size is None:
        buffer_size = batch_size*50

    train_steps_per_epoch = len(train_target_tensor) // batch_size

    train_dataset = tf.data.Dataset.from_tensor_slices((train_feat_dict, train_target_tensor)).cache()
    train_dataset = train_dataset.shuffle(buffer_size).batch(batch_size)
    train_dataset = train_dataset.repeat().prefetch(tf.data.experimental.AUTOTUNE)
    
    return train_dataset, train_steps_per_epoch
  
train_feat_dict = {'item_id': training_data_X} #{'item_id': train_dict['item_id'],
                    # 'nb_days': train_dict['nb_days']}
train_target_tensor = training_data_y #train_dict['target']

train_dataset, train_steps_per_epoch = create_train_tfdata(train_feat_dict, train_target_tensor, batch_size=500)

# Loss function

In [25]:
def loss_function(real, pred):
    """
    We redefine our own loss function in order to get rid of the '0' value
    which is the one used for padding. This to avoid that the model optimize itself
    by predicting this value because it is the padding one.
    
    :param real: the truth
    :param pred: predictions
    :return: a masked loss where '0' in real (due to padding)
                are not taken into account for the evaluation
    """

    # to check that pred is numric and not nan
    # mask = tf.math.logical_not(tf.math.equal(real, 0))
    loss_object_ = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True, reduction='none')
    loss_ = loss_object_(real, pred)
    # mask = tf.cast(mask, dtype=loss_.dtype)
    # loss_ *= mask

    return tf.reduce_mean(loss_)

# Building model

In [26]:
def build_model(hp, max_len, item_vocab_size):
    """
    Build a model given the hyper-parameters with item and nb_days input features
    :param hp: (kt.HyperParameters) hyper-parameters to use when building this model
    :return: built and compiled tensorflow model 
    """
    # inputs = {}
    inputs = tf.keras.Input(batch_input_shape=[None, 15],
                                       name='item_id', dtype=tf.int32)
    # create encoding padding mask
    # encoding_padding_mask = tf.math.logical_not(tf.math.equal(inputs['item_id'], 0))

    # nb_days bucketized
    # inputs['nb_days'] = tf.keras.Input(batch_input_shape=[None, max_len],
    #                                    name='nb_days', dtype=tf.int32)

    # Pass categorical input through embedding layer
    # with size equals to tokenizer vocabulary size
    # Remember that vocab_size is len of item tokenizer + 1
    # (for the padding '0' value)
    
    embedding_item = tf.keras.layers.Embedding(input_dim=item_vocab_size,
                                               output_dim=hp.get('embedding_item'),
                                               name='embedding_item'
                                              )(inputs)
    # nbins=100, +1 for zero padding
    # embedding_nb_days = tf.keras.layers.Embedding(input_dim=100 + 1,
    #                                               output_dim=hp.get('embedding_nb_days'),
    #                                               name='embedding_nb_days'
    #                                              )(inputs['nb_days'])

    #  Concatenate embedding layers
    # concat_embedding_input = tf.keras.layers.Concatenate(
    #  name='concat_embedding_input')([embedding_item, embedding_nb_days])

    # concat_embedding_input
    batchnorm_inputs = tf.keras.layers.BatchNormalization(name='batchnorm_inputs')(embedding_item)
    
    # LSTM layer
    rnn = tf.keras.layers.LSTM(units=hp.get('rnn_units_cat'),
                                   return_sequences=True,
                                   stateful=False,
                                   recurrent_initializer='glorot_normal',
                                   name='LSTM_cat'
                                   )(batchnorm_inputs)

    rnn = tf.keras.layers.BatchNormalization(name='batchnorm_lstm')(rnn)

    # Self attention so key=value in inputs
    att = tf.keras.layers.Attention(use_scale=False, causal=True,
                                    name='attention')(inputs=[rnn, rnn],)
                                                    #   mask=[encoding_padding_mask,
                                                    #         encoding_padding_mask])

    # Last layer is a fully connected one
    output = tf.keras.layers.Dense(item_vocab_size, name='dense_1')(att)
    # output = tf.keras.layers.Dense(15 ,activation='softmax' ,name='output')(dense_1)
    # dense_2 = tf.keras.layers.Dense(15, name='dense_2')(dense_1)
    # output = tf.keras.layers.Dense(1, name='output')(dense_2)

    # output = tf.keras.layers.Dense(item_vocab_size, name='output')(att)

    model = tf.keras.Model(inputs, output)

    model.compile(
        optimizer=tf.keras.optimizers.Adam(hp.get('learning_rate')),
        loss=loss_function,
        metrics=['sparse_categorical_accuracy'])
    
    return model

In [27]:
hp = {'embedding_item': 64, 'rnn_units_cat': 32, 'learning_rate': 0.01}

In [28]:
model = build_model(hp, max_len=15, item_vocab_size=tokenizer_vocab_size)



In [29]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 item_id (InputLayer)           [(None, 15)]         0           []                               
                                                                                                  
 embedding_item (Embedding)     (None, 15, 64)       280576      ['item_id[0][0]']                
                                                                                                  
 batchnorm_inputs (BatchNormali  (None, 15, 64)      256         ['embedding_item[0][0]']         
 zation)                                                                                          
                                                                                                  
 LSTM_cat (LSTM)                (None, 15, 32)       12416       ['batchnorm_inputs[0][0]']   

# Training Model

In [30]:
def fit_model(model, train_dataset, steps_per_epoch, epochs):
    """
    Fit the Keras model on the training dataset for a number of given epochs
    :param model: tf model to be trained
    :param train_dataset: (tf.data.Dataset object) the training dataset
                          used to fit the model
    :param steps_per_epoch: (int) Total number of steps (batches of samples) before 
                            declaring one epoch finished and starting the next epoch.
    :param epochs: (int) the number of epochs for the fitting phase
    :return: tuple (mirrored_model, history) with trained model and model history
    """
    
    # mirrored_strategy allows to use multi GPUs when available
    mirrored_strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy(
        tf.distribute.experimental.CollectiveCommunication.AUTO)
    
    with mirrored_strategy.scope():
        mirrored_model = model

    history = mirrored_model.fit(train_dataset,
                                 steps_per_epoch=steps_per_epoch,
                                 epochs=epochs, verbose=2)

    return mirrored_model, history

In [31]:
trained_model, training_history = fit_model(model, train_dataset, steps_per_epoch=100, epochs=5)

Instructions for updating:
use distribute.MultiWorkerMirroredStrategy instead


Instructions for updating:
use distribute.MultiWorkerMirroredStrategy instead






INFO:tensorflow:Single-worker MultiWorkerMirroredStrategy with local_devices = ('/device:CPU:0',), communication = CommunicationImplementation.AUTO


INFO:tensorflow:Single-worker MultiWorkerMirroredStrategy with local_devices = ('/device:CPU:0',), communication = CommunicationImplementation.AUTO


Epoch 1/5


2023-01-29 04:29:39.049629: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz
2023-01-29 04:29:39.133308: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" model: "0" num_cores: 10 environment { key: "cpu_instruction_set" value: "ARM NEON" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 16384 l2_cache_size: 524288 l3_cache_size: 524288 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }


100/100 - 8s - loss: 6.2404 - sparse_categorical_accuracy: 0.0263 - 8s/epoch - 82ms/step
Epoch 2/5
100/100 - 7s - loss: 5.6141 - sparse_categorical_accuracy: 0.0374 - 7s/epoch - 72ms/step
Epoch 3/5
100/100 - 7s - loss: 5.5137 - sparse_categorical_accuracy: 0.0403 - 7s/epoch - 72ms/step
Epoch 4/5
100/100 - 7s - loss: 5.4557 - sparse_categorical_accuracy: 0.0419 - 7s/epoch - 73ms/step
Epoch 5/5
100/100 - 7s - loss: 5.4140 - sparse_categorical_accuracy: 0.0432 - 7s/epoch - 72ms/step


In [32]:
test_data = {'item_id': [training_data_X[0]]}

In [33]:
test_feat_dict = {'item_id': training_data_X[:5]} #{'item_id': train_dict['item_id'],
                    # 'nb_days': train_dict['nb_days']}
test_target_tensor = training_data_y[:5] #train_dict['target']

test_dataset, _ = create_train_tfdata(train_feat_dict, train_target_tensor, batch_size=5)

In [34]:
predictions = trained_model.predict(test_dataset, steps=1)



2023-01-29 04:30:23.323028: W tensorflow/core/grappler/costs/op_level_cost_estimator.cc:690] Error in PredictCost() for the op: op: "Softmax" attr { key: "T" value { type: DT_FLOAT } } inputs { dtype: DT_FLOAT shape { unknown_rank: true } } device { type: "CPU" model: "0" num_cores: 10 environment { key: "cpu_instruction_set" value: "ARM NEON" } environment { key: "eigen" value: "3.4.90" } l1_cache_size: 16384 l2_cache_size: 524288 l3_cache_size: 524288 memory_size: 268435456 } outputs { dtype: DT_FLOAT shape { unknown_rank: true } }
2023-01-29 04:30:23.375279: W tensorflow/core/kernels/data/cache_dataset_ops.cc:856] The calling iterator did not fully read the dataset being cached. In order to avoid unexpected truncation of the dataset, the partially cached contents of the dataset  will be discarded. This can happen if you have an input pipeline similar to `dataset.cache().take(k).repeat()`. You should use `dataset.take(k).cache().repeat()` instead.


In [35]:
trained_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 item_id (InputLayer)           [(None, 15)]         0           []                               
                                                                                                  
 embedding_item (Embedding)     (None, 15, 64)       280576      ['item_id[0][0]']                
                                                                                                  
 batchnorm_inputs (BatchNormali  (None, 15, 64)      256         ['embedding_item[0][0]']         
 zation)                                                                                          
                                                                                                  
 LSTM_cat (LSTM)                (None, 15, 32)       12416       ['batchnorm_inputs[0][0]']   

In [40]:
predictions[0].shape

(15, 4384)

In [43]:
training_data_X[0]

array([ 132,   81,   87,   53,  359,  865,  267,   22, 1611,  804,   46,
         28,  215,   11,   79])

In [44]:
training_data_y[0]

array([ 865,  267,   22, 1611,  804,   46,   28,  215,   11,   79,  447,
          5,   15,  550,   84])

In [50]:
t = np.array(tf.random.categorical(predictions[0], 15)[0])

In [52]:
t.shape

(15,)

In [53]:
movies_tokenizer.sequences_to_texts([t])

['1394 187 75 758 3756 2475 1145 2430 1145 3282 758 2495 1571 2617 2612']

# - I - G - N - O - R - E -      Code after this point

In [None]:
predictions[0][0][:20]

array([-8.396628 ,  4.414433 ,  3.3817878,  4.147898 ,  3.6213093,
        3.5384274,  4.476161 ,  3.2653542,  3.4214551,  3.9680667,
        3.12084  ,  2.9775157,  2.6365614,  4.1158466,  3.3386211,
        3.1838474,  2.0544128,  1.5780928,  2.6273649,  4.2736964],
      dtype=float32)

In [None]:
predictions[0][1].max()

0.031356186

In [None]:
np.where(predictions[0][1] == 0.031356186)

(array([1]),)

In [None]:
movies_tokenizer.index_word[54]

'788'

1542 788

In [None]:
movies_tokenizer.sequences_to_texts([[1542]])

['2472']

2472 907

In [None]:
movies_tokenizer.word_index

{'571': 1,
 '1905': 2,
 '1145': 3,
 '2152': 4,
 '2452': 5,
 '1542': 6,
 '483': 7,
 '3106': 8,
 '1798': 9,
 '1307': 10,
 '1428': 11,
 '175': 12,
 '985': 13,
 '607': 14,
 '2782': 15,
 '313': 16,
 '191': 17,
 '3427': 18,
 '2122': 19,
 '1962': 20,
 '1220': 21,
 '30': 22,
 '1180': 23,
 '2862': 24,
 '4306': 25,
 '3860': 26,
 '4432': 27,
 '457': 28,
 '708': 29,
 '798': 30,
 '2612': 31,
 '1470': 32,
 '1110': 33,
 '4123': 34,
 '3624': 35,
 '3962': 36,
 '2200': 37,
 '1202': 38,
 '3825': 39,
 '1144': 40,
 '758': 41,
 '312': 42,
 '1102': 43,
 '299': 44,
 '3938': 45,
 '329': 46,
 '2372': 47,
 '4356': 48,
 '1744': 49,
 '1406': 50,
 '3756': 51,
 '197': 52,
 '2095': 53,
 '788': 54,
 '2112': 55,
 '3925': 56,
 '1865': 57,
 '3368': 58,
 '357': 59,
 '2391': 60,
 '482': 61,
 '3638': 62,
 '2580': 63,
 '1799': 64,
 '4345': 65,
 '4043': 66,
 '1975': 67,
 '3371': 68,
 '886': 69,
 '3320': 70,
 '2800': 71,
 '2660': 72,
 '1650': 73,
 '3151': 74,
 '2342': 75,
 '1810': 76,
 '241': 77,
 '4472': 78,
 '1615': 79,
 '46

# Trials

In [None]:
trial_model = build_model(hp, max_len=15, item_vocab_size=tokenizer_vocab_size)



In [None]:
trial_model.summary()

Model: "model_6"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 item_id (InputLayer)           [(None, 15)]         0           []                               
                                                                                                  
 embedding_item (Embedding)     (None, 15, 64)       280576      ['item_id[0][0]']                
                                                                                                  
 batchnorm_inputs (BatchNormali  (None, 15, 64)      256         ['embedding_item[0][0]']         
 zation)                                                                                          
                                                                                                  
 LSTM_cat (LSTM)                (None, 15, 32)       12416       ['batchnorm_inputs[0][0]'] 

In [None]:
trained_model, training_history = fit_model(trial_model, train_dataset, steps_per_epoch=100, epochs=10)





INFO:tensorflow:Single-worker MultiWorkerMirroredStrategy with local_devices = ('/device:CPU:0',), communication = CommunicationImplementation.AUTO


INFO:tensorflow:Single-worker MultiWorkerMirroredStrategy with local_devices = ('/device:CPU:0',), communication = CommunicationImplementation.AUTO


Epoch 1/10


ValueError: in user code:

    File "/Users/yuvraj/miniconda/lib/python3.10/site-packages/keras/engine/training.py", line 1160, in train_function  *
        return step_function(self, iterator)
    File "/Users/yuvraj/miniconda/lib/python3.10/site-packages/keras/engine/training.py", line 1146, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/Users/yuvraj/miniconda/lib/python3.10/site-packages/keras/engine/training.py", line 1135, in run_step  **
        outputs = model.train_step(data)
    File "/Users/yuvraj/miniconda/lib/python3.10/site-packages/keras/engine/training.py", line 998, in train_step
        return self.compute_metrics(x, y, y_pred, sample_weight)
    File "/Users/yuvraj/miniconda/lib/python3.10/site-packages/keras/engine/training.py", line 1092, in compute_metrics
        self.compiled_metrics.update_state(y, y_pred, sample_weight)
    File "/Users/yuvraj/miniconda/lib/python3.10/site-packages/keras/engine/compile_utils.py", line 605, in update_state
        metric_obj.update_state(y_t, y_p, sample_weight=mask)
    File "/Users/yuvraj/miniconda/lib/python3.10/site-packages/keras/utils/metrics_utils.py", line 77, in decorated
        update_op = update_state_fn(*args, **kwargs)
    File "/Users/yuvraj/miniconda/lib/python3.10/site-packages/keras/metrics/base_metric.py", line 143, in update_state_fn
        return ag_update_state(*args, **kwargs)
    File "/Users/yuvraj/miniconda/lib/python3.10/site-packages/keras/metrics/base_metric.py", line 700, in update_state  **
        matches = ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/Users/yuvraj/miniconda/lib/python3.10/site-packages/keras/metrics/metrics.py", line 3669, in sparse_categorical_accuracy
        matches = metrics_utils.sparse_categorical_matches(y_true, y_pred)
    File "/Users/yuvraj/miniconda/lib/python3.10/site-packages/keras/utils/metrics_utils.py", line 962, in sparse_categorical_matches
        y_true = tf.squeeze(y_true, [-1])

    ValueError: Can not squeeze dim[1], expected a dimension of 1, got 15 for '{{node Squeeze}} = Squeeze[T=DT_FLOAT, squeeze_dims=[-1]](Cast_1)' with input shapes: [?,15].


In [None]:
trial_model_2 = build_model(hp, max_len=15, item_vocab_size=tokenizer_vocab_size)



In [None]:
trial_model_2.summary()

Model: "model_10"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 item_id (InputLayer)           [(None, 15)]         0           []                               
                                                                                                  
 embedding_item (Embedding)     (None, 15, 64)       280576      ['item_id[0][0]']                
                                                                                                  
 batchnorm_inputs (BatchNormali  (None, 15, 64)      256         ['embedding_item[0][0]']         
 zation)                                                                                          
                                                                                                  
 LSTM_cat (LSTM)                (None, 15, 32)       12416       ['batchnorm_inputs[0][0]']

In [None]:
trained_model, training_history = fit_model(trial_model, train_dataset, steps_per_epoch=100, epochs=10)





INFO:tensorflow:Single-worker MultiWorkerMirroredStrategy with local_devices = ('/device:CPU:0',), communication = CommunicationImplementation.AUTO


INFO:tensorflow:Single-worker MultiWorkerMirroredStrategy with local_devices = ('/device:CPU:0',), communication = CommunicationImplementation.AUTO


Epoch 1/10


TypeError: 'NoneType' object is not callable

In [24]:
model_ED = keras.Sequential()
model_ED.add(keras.layers.LSTM(150, batch_input_shape=(50, 15, 1), stateful=True))
model_ED.add(keras.layers.RepeatVector(15))
model_ED.add(keras.layers.LSTM(150, return_sequences=True, stateful=True))
model_ED.add(keras.layers.TimeDistributed(keras.layers.Dense(tokenizer_vocab_size, activation='softmax')))
model_ED.compile(loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), optimizer='Adam', metrics=['sparse_categorical_accuracy'])

In [25]:
_X = training_data_X.reshape(50000, 15, 1).copy()

In [26]:
_X.shape

(50000, 15, 1)

In [27]:
_X[0]

array([[ 132],
       [  81],
       [  87],
       [  53],
       [ 359],
       [ 865],
       [ 267],
       [  22],
       [1611],
       [ 804],
       [  46],
       [  28],
       [ 215],
       [  11],
       [  79]])

In [None]:
_y[0]

array([[ 865],
       [ 267],
       [  22],
       [1611],
       [ 804],
       [  46],
       [  28],
       [ 215],
       [  11],
       [  79],
       [ 447],
       [   5],
       [  15],
       [ 550],
       [  84]])

In [28]:
_y = training_data_y.reshape(50000, 15, 1)

In [29]:
_y.shape

(50000, 15, 1)

In [37]:
model_ED.fit(_X, _y, batch_size=50, epochs=1)

2023-01-29 04:02:07.315779: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz




<keras.callbacks.History at 0x2cd4d7400>

In [78]:
_X[0].reshape(1, 15,1)

array([[[ 132],
        [  81],
        [  87],
        [  53],
        [ 359],
        [ 865],
        [ 267],
        [  22],
        [1611],
        [ 804],
        [  46],
        [  28],
        [ 215],
        [  11],
        [  79]]])

In [39]:
predictions = model_ED.predict(_X, batch_size=50)[0]



In [40]:
predictions.shape

(15, 4384)

In [42]:
predictions[0, :5]

array([7.3849020e-09, 2.6411006e-05, 9.9977034e-01, 3.3581380e-06,
       6.4954620e-05], dtype=float32)