In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from scipy import sparse
import os
import random

In [2]:
tf.__version__

'2.0.0'

# Read Data
- all datasets datetime sorted

In [3]:
# path = 'C:/Users/robin.opdam/Google Drive/Thesis (Msc)/Thesis_shared_files/'
path = '/Users/Robin/Google Drive/Thesis (Msc)/Thesis_shared_files/'
# path = './'

## Amazon Fashion 

In [4]:
# data_path = 'data/Amazon/'
# file_name = 'Amazon_full' 
# file_name = 'Amazon_05_users' 
# file_name = 'Amazon_01_users'
# file_name = 'Amazon_005_users'
# file_name = 'Amazon_001_users'

## MovieLens 

In [5]:
data_path = 'data/ML/'
# file_name = 'ML_full' 
# file_name = 'ML_05_users'
file_name = 'ML_01_users'
# file_name = 'ML_005_users'
# file_name = 'ML_001_users'

In [6]:
df = pd.read_pickle(file_name)
df.head()

Unnamed: 0,user,item,rating,datetime,item_id,user_id
18590190,120461,2501,5.0,2000-04-25 02:29:35,2410,120460
18590032,120461,252,4.0,2000-04-25 02:29:35,249,120460
18590159,120461,2069,4.0,2000-04-25 02:29:35,1980,120460
18590048,120461,440,4.0,2000-04-25 02:29:35,435,120460
18590145,120461,1959,4.0,2000-04-25 02:29:35,1870,120460


# Data Prep
Create new ids for users and items that match the row and column indices of the user-item interaction matrix

In [9]:
df['item_id'] = df.item.astype('category').cat.codes
df['user_id'] = df.user.astype('category').cat.codes

## Train Test Split
### Leave last item out of subset of users

In [10]:
def leave_users_out(full_data, leave_out, seed=1234):
    np.random.seed(seed)
    full_data['index'] = full_data.index
    user_index_df = full_data.groupby('user')['index'].apply(list)
    users = np.random.choice(list(user_index_df.index), leave_out, replace=False)
    users_indices = []
    
    for user in users:
        users_indices.extend(user_index_df.loc[user])
    
    sub_set = full_data.loc[users_indices]
    remaining = full_data.drop(users_indices)
    
    return remaining.drop(columns=['index']), sub_set.drop(columns=['index'])

In [11]:
def leave_last_x_out(full_data, n_users, leave_out=1, seed=1234):
    """ 
    Input: data must contain user_id
    Output: full_data = without all last (time order) entries in leave one out set
            leave_one_out_set = data with one user and one item from full_data
    """
    np.random.seed(seed)
    
    full_data['index'] = full_data.index
    user_items_ind = full_data.groupby('user_id')['index'].apply(list)
    users = full_data.user_id.unique()
    leave_out_indices = []
    users_picked = []
    
    for i in range(len(full_data.user_id.unique())):
        random_user = np.random.choice(users)
        item_indices = user_items_ind[random_user] # random user's items indices
        if random_user in users_picked or len(item_indices) <= leave_out:
            random_user = np.random.choice(users)
            item_indices = user_items_ind[random_user] # random user's items indices
        else:
            users_picked.append(random_user)
            leave_out_indices.extend(item_indices[-leave_out:])
        
        if len(users_picked) == n_users:
            break
        
    if len(users_picked) < n_users:
        error = 'Cannot pick ' + str(n_users) + ' users with more than ' + str(leave_out) + ' items'
        solution = '\nTry a smaller test and/or validation percentage of the data'
        raise ValueError(error + solution) 
            
    leave_out_set = full_data.loc[leave_out_indices] # the last items of n_users users with n_item > leave_out
    full_data_leave_one_out = full_data.drop(leave_out_indices) # drops last items for n_users users
    
    return full_data_leave_one_out.drop(columns=['index']), leave_out_set.drop(columns=['index'])

In [12]:
def train_val_test_split(df, batch_size, val_perc, test_perc, n_items_val, n_items_test, stats=True):
    """
    Input: df with user and item id, batch size for CFRNN data, val and test perc of users
           number of last items to leave out for val and test set
    Output: full_data = total users and items of the original df, 
            Train, validation and test sets
    """
    
    total_users = len(df.user_id.unique()) # Need all users for BPR
    total_items = len(df.item_id.unique()) # Need all items for CFRNN
    
    users_to_remove = len(df.user_id.unique())%batch_size #Batch size compatible for CFRNN
    df_new, deleted_users = leave_users_out(df, users_to_remove)

    test_users = int(test_perc*total_users / 64 + 1) * 64 # Number of users to be used for testing
    test_last_items = n_items_test # Items to be removed from test users in train set and used in test set

    val_users = int(val_perc*total_users / 64 + 1) * 64
    val_last_items = n_items_val
    
    train_set, test_set = leave_last_x_out(df_new, test_users, test_last_items)
    train_set, val_set = leave_last_x_out(train_set, val_users, val_last_items)
    
    if stats:
        print('Total number of items:', total_items)
        print('Total users:', total_users)
        print('Number of train users:', len(train_set.user_id.unique()))
        print('Number of test users:', test_users)
        print('Number of validation users:', val_users, '\n')
        print('Users deleted:', len(deleted_users.user_id.unique()))
    
    return total_users, total_items, train_set, val_set, test_set

### Data Sizes

In [13]:
BATCH_SIZE = 32
val_perc = test_perc = 0.1
n_last_items_val = n_last_items_test = 1

### Create Split

In [14]:
total_users, total_items, train_set, val_set, test_set = train_val_test_split(df, BATCH_SIZE, val_perc, test_perc, n_last_items_val, n_last_items_test)

Total number of items: 27387
Total users: 16254
Number of train users: 16224
Number of test users: 1664
Number of validation users: 1664 

Users deleted: 30


---
# Data Prep
1. Each (relatively) ordered item sequence per user will be viewed as one time series
2. **Sequences will be batched based on their length such that padding is minimal**
3. Each batch consists out of BATCH_SIZE users sequences



## Train and Target sequences

In [29]:
def get_x_y_sequences_ordered(dataset, n_unknowns_in_y=1, stats=True):
    user_sequences_x = []
    user_sequences_y = []
    lengths = []
    users = dataset.user_id.unique()
    shortest_u_seq_order = list(df.groupby('user_id')['item_id'].count().sort_values().index)
    
    for u in shortest_u_seq_order:
        user_item_seq = np.array(dataset[dataset['user_id']==u]['item_id'])
        user_sequences_x.append(user_item_seq[:-n_unknowns_in_y])
        user_sequences_y.append(user_item_seq[n_unknowns_in_y:])
        lengths.append(len(user_item_seq))
    
    median = np.median(lengths)
    
    if stats:
        print('Number of sequences x:', len(user_sequences_x), 
              '\nAvg sequence length x:', np.average(lengths),
              '\nStd_dev sequence length x:', np.round(np.std(lengths),2),
              '\nMedian of sequence length x:', median)

    return user_sequences_x, user_sequences_y, shortest_u_seq_order

In [30]:
def min_padding(sequences, batch_size, max_len):
    padded_sequences = []
    batch = []
    max_batch_seq_len = 0
    for i, seq in enumerate(sequences):
        batch.append(seq)
        if max_batch_seq_len > max_len:
            max_batch_seq_len = max_len
            
        elif max_batch_seq_len < len(seq):
            max_batch_seq_len = len(seq)
            
        if (i+1)%batch_size == 0:
            padded_sequences.append(tf.keras.preprocessing.sequence.pad_sequences(batch, maxlen=int(max_batch_seq_len), padding='post', truncating='pre'))
            max_batch_seq_len = 0
            batch = [] 
            
    return padded_sequences


### Create minimal padded sequences

In [32]:
max_seq_len = 500
shift_targets_by = 1

In [34]:
user_sequences_x, user_sequences_y, user_order = get_x_y_sequences_ordered(train_set, shift_targets_by)

padded_sequences_x = min_padding(user_sequences_x, BATCH_SIZE, max_seq_len)
padded_sequences_y = min_padding(user_sequences_y, BATCH_SIZE, max_seq_len)

Number of sequences x: 16254 
Avg sequence length x: 150.0122431401501 
Std_dev sequence length x: 242.4 
Median of sequence length x: 70.0


---
# LSTM Model
Collaborative Filtering with Recurrent Neural Networks

- paper: https://arxiv.org/pdf/1608.07400.pdf
- code: https://github.com/rdevooght/sequence-based-recommendations (in Theano)

---
## Model Prep

### Architecture

In [35]:
def build_model(total_items, embedding_dim, rnn_units, batch_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(total_items, embedding_dim,
                                  batch_input_shape=[batch_size, None]),
        
        tf.keras.layers.LSTM(units=rnn_units,
                             return_sequences=True,
                             stateful=False, #Reset cell states with each batch
                             recurrent_initializer='glorot_uniform'),
        
        tf.keras.layers.Dense(total_items)
    ])
    return model

### Build Model

In [36]:
embedding_dim = 100
rnn_units = 20

In [37]:
model = build_model(
total_items = total_items,
embedding_dim = embedding_dim,
rnn_units = rnn_units,
batch_size = BATCH_SIZE)

### Add Loss
- **Added one hot encoding of the labels to match logits output after dense layer**

In [73]:
# def recall(y_true, y_pred):
#     K = tf.keras.backend
#     y_true = K.one_hot(tf.dtypes.cast(y_true, tf.int32), total_items)
#     y_true = K.ones_like(y_true) 
#     true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
#     all_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    
#     recall = true_positives / (all_positives + K.epsilon())
#     return recall

In [38]:
def loss(labels, logits):
    oh_labels = tf.keras.backend.one_hot(tf.dtypes.cast(labels, tf.int32), total_items)
    return tf.keras.losses.categorical_crossentropy(oh_labels, logits, from_logits=True)

In [39]:
model.compile(optimizer='Adagrad', loss=loss)#, metrics=[recall])

## Summmary

In [40]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (32, None, 100)           2738700   
_________________________________________________________________
lstm_1 (LSTM)                (32, None, 20)            9680      
_________________________________________________________________
dense_1 (Dense)              (32, None, 27387)         575127    
Total params: 3,323,507
Trainable params: 3,323,507
Non-trainable params: 0
_________________________________________________________________


---
## Train Model

### Batch Generator
- Needed for the different size batches 

In [41]:
class Batch_Generator(tf.keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, X, y, batch_size=1, shuffle=True):
        'Initialization'
        self.X = X
        self.y = y
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return len(self.y)#int(np.floor(len(self.y)/self.batch_size))

    def __getitem__(self, index):
        return self.__data_generation(index)

    def on_epoch_end(self):
        'Shuffles indexes after each epoch'
        self.indexes = np.arange(len(self.y))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, index):
        return np.stack(self.X[index]), np.stack(self.y[index])

### Prep Validation Data

In [42]:
user_sequences_val_x, user_sequences_val_y, median = get_x_y_sequences_ordered(val_set, 1, stats=False)

padded_sequences_val_x = min_padding(user_sequences_val_x, 32, 500)
padded_sequences_val_y = min_padding(user_sequences_val_y, 32, 500)

In [80]:
# user_sequences_val_x, user_sequences_val_y, median = get_x_y_sequences(val_set, 1, stats=False)

# padded_sequences_val_x = pad_sequences(user_sequences_val_x, max_seq_length, stats=False)
# padded_sequences_val_y = pad_sequences(user_sequences_val_y, max_seq_length, stats=False)

# sequences_data_val_x = tf.data.Dataset.from_tensor_slices(padded_sequences_val_x) 
# sequences_data_val_y = tf.data.Dataset.from_tensor_slices(padded_sequences_val_y) 

# val_dataset = tf.data.Dataset.zip((sequences_data_val_x, sequences_data_val_y))
# val_dataset = val_dataset.batch(BATCH_SIZE, drop_remainder=False)

### Configure Checkpoints

In [43]:
# Directory where the checkpoints will be saved
checkpoint_dir = './rnn_train_checkpoints'

# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath = checkpoint_prefix,
    save_weights_only = True)

---
## Fit Model

In [44]:
epochs = 200
print('#Batches:', len(padded_sequences_x))
print('Batch size:', BATCH_SIZE)

#Batches: 507
Batch size: 32


In [27]:
# def train_generator():
#     for X_train, y_train in zip(padded_sequences_x, padded_sequences_y):
#         yield (X_train, y_train)

In [28]:
history = model.fit_generator(generator=Batch_Generator(
                              X=padded_sequences_x, 
                              y=padded_sequences_y, 
                              batch_size=BATCH_SIZE,
                              shuffle=True),
                              
#                               validation_data=Batch_Generator(
#                               X=padded_sequences_val_x,
#                               y=padded_sequences_val_y,
#                               batch_size=BATCH_SIZE,
#                               shuffle=True),
                              
                              epochs=epochs, 
                              callbacks=[checkpoint_callback],
                              use_multiprocessing=True)

Epoch 1/200
Epoch 2/200
Epoch 3/200

Process Keras_worker_ForkPoolWorker-3:
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 335, in get
    res = self._reader.recv_bytes()
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
KeyboardInterrupt


KeyboardInterrupt: 

Process Keras_worker_ForkPoolWorker-4:
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 335, in get
    res = self._reader.recv_bytes()
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 407, in _recv_bytes
    buf = self._recv(4)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 379, in _recv
    chunk = read(handle, remaining)
KeyboardInterrupt


In [37]:
file_name

'ML_01_users'

In [38]:
results = pd.DataFrame(history.history)
results.to_pickle('his_vsl_01_ml_users_200_epochs')

---
## Continue training from checkpoint

In [None]:
model.summary()

In [None]:
tf.train.latest_checkpoint(checkpoint_dir)
model = build_model(n_items, embedding_dim, rnn_units, batch_size=100)
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.compile(optimizer='Adagrad', loss=loss)

In [None]:
aditional_epochs = 1

In [None]:
model.fit(dataset, epochs=aditional_epochs, callbacks=[checkpoint_callback])

---
# Predict Sequences

## Restore Latest Checkpoints

In [39]:
tf.train.latest_checkpoint(checkpoint_dir)

'./rnn_train_checkpoints/ckpt'

In [40]:
model = build_model(total_items, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
                   
model.build(tf.TensorShape([1, None]))

In [41]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (1, None, 100)            2738700   
_________________________________________________________________
lstm_1 (LSTM)                (1, None, 20)             9680      
_________________________________________________________________
dense_1 (Dense)              (1, None, 27387)          575127    
Total params: 3,323,507
Trainable params: 3,323,507
Non-trainable params: 0
_________________________________________________________________


---
## Create Predictions

**Using train_set sequences to predict test_set item(s)**

In [112]:
test_user_seq = np.array(train_set[train_set['user_id']==10278]['item_id']).reshape(-1,1).T
true_items = list(test_set[test_set['user_id']==10278]['item_id'])
predictions = tf.squeeze(model(test_user_seq),0)
predictions = tf.random.categorical(predictions, num_samples=1).numpy()
user_seq = np.array(df[df['user_id']==10278]['item_id']).reshape(-1,1).T

for item, pred_item, real in zip(list(test_user_seq[0]), predictions, user_seq[0]):
    print(item, pred_item, real)

145 [10474] 145
578 [0] 578
576 [0] 576
309 [0] 309
583 [1022] 583
331 [0] 331
9 [2726] 9
156 [0] 156
0 [175] 0
203 [9] 203
180 [9624] 180
448 [0] 448
401 [25633] 401
310 [219] 310
445 [6981] 445
423 [0] 423
356 [22495] 356
543 [9416] 543
329 [168] 329
348 [15398] 348
230 [0] 230
471 [0] 471
431 [0] 431
181 [2245] 181
16 [0] 16
137 [160] 137
163 [85] 163
259 [152] 259
342 [0] 342
369 [2372] 369
218 [0] 218
500 [1445] 500
266 [5458] 266
271 [5412] 271
61 [1189] 61
491 [699] 491
349 [189] 349
231 [1535] 231
340 [20186] 340
530 [4190] 530
575 [0] 575
585 [7748] 585
459 [1176] 459
506 [595] 506
499 [1800] 499
57 [321] 57
4 [20450] 4
488 [3623] 488
360 [0] 360
630 [1242] 630
482 [2664] 482
363 [7245] 363
190 [6220] 190
343 [5474] 343
582 [18036] 582
428 [3654] 428
375 [6990] 375
520 [1191] 520
139 [2823] 139
466 [1176] 466
622 [3350] 622
525 [15022] 525
584 [699] 584
353 [102] 353
524 [9537] 524
526 [4045] 526
528 [3876] 528
374 [2294] 374
403 [0] 403
529 [1881] 529
339 [1032] 339
273 [1336

In [83]:
def get_predictions(model, train_set, test_set, rank_at, temp=0.9):

    predictions_df = pd.DataFrame(columns=['user', 'pred_seq', 'true_seq'])
    for u in test_set.user_id.unique():
        test_user_seq = np.array(train_set[train_set['user_id']==u]['item_id'])
        true_items = list(test_set[test_set['user_id']==u]['item_id'])
        generated_predictions = []

        #Predict
        for item in range(rank_at): #could be any number of recommended items you want to predict
            predictions = model(test_user_seq.reshape(-1,1).T)
            predictions = tf.squeeze(predictions, 0)

            predictions = predictions / temp
            predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()
            test_user_seq = np.append(test_user_seq, predicted_id).reshape(-1,1).transpose()

    #         half_test_seq = tf.expand_dims([predicted_id], 0)
            generated_predictions.append(predicted_id)

        predictions_df = predictions_df.append({'user':u, 'pred_seq':generated_predictions, 'true_seq':true_items}, ignore_index=True)
        
    return predictions_df

In [84]:
rank_at = 20
predictions_df = get_predictions(model, train_set, val_set, rank_at)

In [85]:
predictions_df

Unnamed: 0,user,pred_seq,true_seq
0,10278,"[2437, 1165, 7804, 12191, 0, 0, 0, 0, 0, 0, 11...",[726]
1,13306,"[2361, 188, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...",[181]
2,8372,"[2597, 3691, 1149, 5796, 581, 0, 2590, 4847, 1...",[1141]
3,13798,"[0, 0, 247, 18025, 1499, 0, 359, 1148, 356, 57...",[13667]
4,348,"[16519, 2550, 15150, 760, 26216, 22268, 5115, ...",[17254]
...,...,...,...
1659,8315,"[1219, 16345, 1055, 8044, 14236, 11873, 3121, ...",[11018]
1660,4504,"[3111, 1050, 0, 10479, 12160, 1789, 0, 471, 0,...",[1351]
1661,2582,"[191, 12191, 10510, 822, 708, 16409, 10262, 18...",[13540]
1662,4847,"[708, 1161, 4569, 13358, 11852, 2780, 0, 11573...",[14757]


In [86]:
predictions_df.to_pickle('CFRNN_res_200_ML_01_users')

In [None]:
for i, row in predictions_df.iterrows():
    if len(row['true_seq']) > 1:
        print(row)

# Evaluate

In [87]:
# result_path = 'Results/CFRNN/'
# predictions = pd.read_pickle(path + result_path + 'CFRNN_res_200_ML_01_users')
predictions = predictions_df

In [88]:
def get_metrics(ranked_df, steps, max_rank):
    s = time.time()
    ranks_at = [1] + [i for i in range(steps, max_rank + steps, steps)]
    hitcounts = []
    recs_at = []
    precs_at = []
    metrics = pd.DataFrame(columns=['rank_at', 'hitcounts', 'recall', 'precision'])
    for rank in ranks_at:
        hitcount = 0
        for i, row in ranked_df.iterrows():
            hitcount +=  len(set(row['true_id']) & set(row['pred_items_ranked'][:rank]))

        prec_at = hitcount / rank / len(ranked_df)
        rec_at = hitcount / len(ranked_df.iloc[0]['true_id']) / len(ranked_df)

        hitcounts.append(hitcount)                     
        recs_at.append(rec_at)
        precs_at.append(prec_at)

    metrics['rank_at'] = ranks_at
    metrics['hitcounts'] = hitcounts
    metrics['recall'] = recs_at
    metrics['precision'] = precs_at
    print('Obtaining metrics time:', round(time.time() - s,2))
    return metrics

In [89]:
renames = {'pred_seq':'pred_items_ranked', 'true_seq':'true_id'}
predictions = predictions.rename(renames, axis=1)

In [90]:
import time

In [91]:
metrics_val_set = get_metrics(predictions, 5, 20)

Obtaining metrics time: 1.15


In [92]:
metrics_val_set

Unnamed: 0,rank_at,hitcounts,recall,precision
0,1,1,0.000601,0.000601
1,5,8,0.004808,0.000962
2,10,10,0.00601,0.000601
3,15,12,0.007212,0.000481
4,20,13,0.007812,0.000391


In [50]:
metrics

Unnamed: 0,rank_at,hitcounts,recall,precision
0,1,2,0.001202,0.001202
1,5,8,0.004808,0.000962
2,10,14,0.008413,0.000841
3,15,15,0.009014,0.000601
4,20,17,0.010216,0.000511


In [61]:
metrics_temp_06

Unnamed: 0,rank_at,hitcounts,recall,precision
0,1,2,0.001202,0.001202
1,5,3,0.001803,0.000361
2,10,3,0.001803,0.00018
3,15,3,0.001803,0.00012
4,20,3,0.001803,9e-05


In [71]:
metrics_temp_2

Unnamed: 0,rank_at,hitcounts,recall,precision
0,1,0,0.0,0.0
1,5,1,0.000601,0.00012
2,10,2,0.001202,0.00012
3,15,2,0.001202,8e-05
4,20,3,0.001803,9e-05


In [51]:
file_name

'ML_01_users'

In [None]:
metrics.to_pickle(path + 'Results/CFRNN/' + 'metrics_CFRNN_' + file_name)

# Appendix

In [None]:
# oh_input = tf.keras.backend.one_hot(padded, n_items)
# e = tf.keras.layers.Embedding(n_items, 100, input_length=max_l)

In [None]:
# One hot encoded input
# sequences_data_x = tf.data.Dataset.from_tensor_slices(tf.keras.backend.one_hot(padded_sequences_x, n_items)) 
# sequences_data_y = tf.data.Dataset.from_tensor_slices(tf.keras.backend.one_hot(padded_sequences_y, n_items)) 