Mengimport library yang dibutuhkan

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import os
from sklearn.model_selection import train_test_split
import random

Mengimport training data

In [None]:
from pandas.core.arrays.datetimelike import TimelikeOpsT
df = pd.read_parquet("https://drive.google.com/uc?export=download&id=1A-EIdeFtptDcCsLF0hz8N8dl_iGBrmZe")

dummy_users = pd.read_parquet("https://drive.google.com/uc?export=download&id=1qmm-B0YKXLYAX9vRbbrANGvw9EGp1YV3").values.flatten().astype(str)
products = pd.read_parquet("https://drive.google.com/uc?export=download&id=1k-D53GRj6kew-CcZXI1y2lXRR1oM0alI").values.flatten().astype(str)

In [None]:
train1, test = train_test_split(df, test_size=0.4)

In [None]:
train = train1.sample(frac = 1)

Mendefinisikan Model Rekomendasi: memberikan nomor random untuk estiap user dan produk

In [None]:
class SimpleRecommender(tf.keras.Model):
    def __init__(self, dummy_users, products, length_of_embedding):
        super(SimpleRecommender, self).__init__()
        self.products = tf.constant(products, dtype=np.dtype("<U9"))
        self.dummy_users = tf.constant(dummy_users, dtype=np.dtype("<U9"))
        self.dummy_user_table = tf.lookup.StaticHashTable(tf.lookup.KeyValueTensorInitializer(self.dummy_users, range(len(dummy_users))), -1)
        self.product_table = tf.lookup.StaticHashTable(tf.lookup.KeyValueTensorInitializer(self.products, range(len(products))), -1)
        
        self.user_embedding = tf.keras.layers.Embedding(len(dummy_users), length_of_embedding)
        self.product_embedding = tf.keras.layers.Embedding(len(products), length_of_embedding)

        self.dot = tf.keras.layers.Dot(axes=-1)
        
    def call(self, inputs):
        user = inputs[0]
        products = inputs[1]

        user_embedding_index = self.dummy_user_table.lookup(user)
        product_embedding_index = self.product_table.lookup(products)

        user_embedding_values = self.user_embedding(user_embedding_index)
        product_embedding_values = self.product_embedding(product_embedding_index)

        return tf.squeeze(self.dot([user_embedding_values, product_embedding_values]), 1)
    
    @tf.function
    def call_item_item(self, product):
        product_x = self.product_table.lookup(product)
        pe = tf.expand_dims(self.product_embedding(product_x), 0)
        
        all_pe = tf.expand_dims(self.product_embedding.embeddings, 0)
        scores = tf.reshape(self.dot([pe, all_pe]), [-1])
        
        top_scores, top_indices = tf.math.top_k(scores, k=100)
        top_ids = tf.gather(self.products, top_indices)
        return top_ids, top_scores

In [None]:
sr1 = SimpleRecommender(dummy_users, products, 25)
sr1([tf.constant([['2022U0'], ['2022U1']]),
     tf.constant([['2022P0',  '2022P1',  '2022P2'], ['2022P3', '2022P4', '2022P5']])])

<tf.Tensor: shape=(2, 3), dtype=float32, numpy=
array([[ 0.00127274, -0.00120148,  0.00674033],
       [-0.00536375, -0.00236683, -0.00208339]], dtype=float32)>

In [None]:
dummy_user_tensor = tf.constant(train[["UserID"]].values, dtype=tf.string) 
product_tensor = tf.constant(train[["ProductID"]].values, dtype=tf.string)

# create tensor dataset
dataset = tf.data.Dataset.from_tensor_slices((dummy_user_tensor, product_tensor))
for x, y in dataset:
    print(x)
    print(y)
    break

tf.Tensor([b'2022U4675'], shape=(1,), dtype=string)
tf.Tensor([b'2022P765'], shape=(1,), dtype=string)


In [None]:
random_negatives_indexes = tf.random.uniform((7, ), minval = 0, maxval = len(products), dtype=tf.int32) # products the user did not view
random_negatives_indexes

<tf.Tensor: shape=(7,), dtype=int32, numpy=array([1335,  123,  753,  848,  681, 1379,  198], dtype=int32)>

In [None]:
tf.gather(products, random_negatives_indexes)

<tf.Tensor: shape=(7,), dtype=string, numpy=
array([b'2022P397', b'2022P1108', b'2022P1676', b'2022P1761',
       b'2022P1610', b'2022P436', b'2022P1176'], dtype=object)>

In [None]:
tf.one_hot(0, depth=11)

<tf.Tensor: shape=(11,), dtype=float32, numpy=array([1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)>

In [None]:
class Mapper():
    
    def __init__(self, possible_products, num_negative_products):
        self.num_possible_products = len(possible_products)
        self.possible_products_tensor = tf.constant(possible_products, dtype=tf.string)
        
        self.num_negative_products = num_negative_products
        self.y = tf.one_hot(0, num_negative_products + 1)
    
    def __call__(self, user, product):
        random_negatives_indexes = tf.random.uniform((self.num_negative_products, ), minval = 0, maxval = self.num_possible_products, dtype=tf.int32)
        negatives = tf.gather(self.possible_products_tensor, random_negatives_indexes)
        candidates = tf.concat([product, negatives], axis = 0)
        return (user, candidates), self.y

In [None]:
dataset = tf.data.Dataset.from_tensor_slices((dummy_user_tensor, product_tensor)).map(Mapper(products, 10))
for (u, c), y in dataset:
  print(u)
  print(c)
  print(y)
  break

tf.Tensor([b'2022U4675'], shape=(1,), dtype=string)
tf.Tensor(
[b'2022P765' b'2022P558' b'2022P1972' b'2022P255' b'2022P1011' b'2022P565'
 b'2022P610' b'2022P607' b'2022P750' b'2022P695' b'2022P631'], shape=(11,), dtype=string)
tf.Tensor([1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.], shape=(11,), dtype=float32)


In [None]:
def get_dataset(df, products, num_negative_products):
    dummy_user_tensor = tf.constant(df[["UserID"]].values, dtype=tf.string) 
    product_tensor = tf.constant(df[["ProductID"]].values, dtype=tf.string) 


    dataset = tf.data.Dataset.from_tensor_slices((dummy_user_tensor, product_tensor))
    dataset = dataset.map(Mapper(products, num_negative_products))
    dataset = dataset.batch(1024) 
    return dataset

In [None]:
for (u, c), y in get_dataset(train, products, 4):
  print(u)
  print(c)
  print(y)
  break

tf.Tensor(
[[b'2022U4675']
 [b'2022U1286']
 [b'2022U2621']
 ...
 [b'2022U4174']
 [b'2022U2860']
 [b'2022U4048']], shape=(1024, 1), dtype=string)
tf.Tensor(
[[b'2022P765' b'2022P964' b'2022P1231' b'2022P743' b'2022P1752']
 [b'2022P1807' b'2022P653' b'2022P1739' b'2022P1517' b'2022P1799']
 [b'2022P166' b'2022P431' b'2022P1095' b'2022P1520' b'2022P504']
 ...
 [b'2022P1770' b'2022P820' b'2022P1852' b'2022P1958' b'2022P389']
 [b'2022P1029' b'2022P1403' b'2022P390' b'2022P141' b'2022P1789']
 [b'2022P1794' b'2022P1127' b'2022P1786' b'2022P1478' b'2022P350']], shape=(1024, 5), dtype=string)
tf.Tensor(
[[1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 ...
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]
 [1. 0. 0. 0. 0.]], shape=(1024, 5), dtype=float32)


In [None]:
model = SimpleRecommender(dummy_users, products, 100)
model.compile(loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              optimizer = tf.keras.optimizers.Adagrad(learning_rate=10),
              metrics = [tf.keras.metrics.CategoricalAccuracy()])

model.fit(get_dataset(train, products, 5), validation_data = get_dataset(test, products, 5), epochs=15)

Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x7f036eea3590>

In [None]:
model.save("test")

NotImplementedError: ignored

In [None]:
model.summary()

Model: "simple_recommender_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_16 (Embedding)    multiple                  569600    
                                                                 
 embedding_17 (Embedding)    multiple                  200400    
                                                                 
 dot_8 (Dot)                 multiple                  0         
                                                                 
Total params: 770,000
Trainable params: 770,000
Non-trainable params: 0
_________________________________________________________________


In [None]:
import pickle
with open("model.pickle","wb") as f:
  pickle.dump(model,f)



INFO:tensorflow:Assets written to: ram://0bc64935-91ad-4334-a27b-6a79113f849f/assets


INFO:tensorflow:Assets written to: ram://0bc64935-91ad-4334-a27b-6a79113f849f/assets


In [None]:
!zip test.zip test

  adding: test/ (stored 0%)


In [None]:
model_load = tf.keras.models.load_model("test")

In [None]:
model_load.weights[0]

<tf.Variable 'simple_recommender_8/embedding_16/embeddings:0' shape=(5696, 100) dtype=float32, numpy=
array([[ 0.09156609, -0.00174855,  0.98805785, ..., -0.3030135 ,
         0.18610561, -0.06433531],
       [ 0.29778105, -0.73424447,  0.5805011 , ...,  0.17244406,
         0.25087658, -0.23656133],
       [-0.17968935,  0.0440236 , -0.3118358 , ...,  0.4487192 ,
         0.11854497, -0.5511509 ],
       ...,
       [ 0.5261049 ,  0.20003417,  0.19530766, ..., -0.17415707,
         0.25661653,  0.7815298 ],
       [ 0.4868169 ,  0.5511425 , -0.26012117, ...,  0.09079498,
         0.006056  , -0.4371972 ],
       [-0.30596715,  0.06270116,  0.31799683, ..., -0.00922189,
        -0.69781923, -0.45874676]], dtype=float32)>

In [None]:
model_loaded = SimpleRecommender(dummy_users, products,100)

In [None]:
model_load.weights[0]

<tf.Variable 'simple_recommender_8/embedding_16/embeddings:0' shape=(5696, 100) dtype=float32, numpy=
array([[ 0.09156609, -0.00174855,  0.98805785, ..., -0.3030135 ,
         0.18610561, -0.06433531],
       [ 0.29778105, -0.73424447,  0.5805011 , ...,  0.17244406,
         0.25087658, -0.23656133],
       [-0.17968935,  0.0440236 , -0.3118358 , ...,  0.4487192 ,
         0.11854497, -0.5511509 ],
       ...,
       [ 0.5261049 ,  0.20003417,  0.19530766, ..., -0.17415707,
         0.25661653,  0.7815298 ],
       [ 0.4868169 ,  0.5511425 , -0.26012117, ...,  0.09079498,
         0.006056  , -0.4371972 ],
       [-0.30596715,  0.06270116,  0.31799683, ..., -0.00922189,
        -0.69781923, -0.45874676]], dtype=float32)>

In [None]:
model_loaded.user_embedding.set_weights(model_load.weights[0].values)

AttributeError: ignored

In [None]:
print("Recs for item {}: {}".format(test_product, model.call_item_item(tf.constant(test_product, dtype=tf.string))))

Recs for item 2022P0: (<tf.Tensor: shape=(100,), dtype=string, numpy=
array([b'2022P0', b'2022P1868', b'2022P261', b'2022P1131', b'2022P875',
       b'2022P1687', b'2022P126', b'2022P539', b'2022P441', b'2022P1462',
       b'2022P511', b'2022P1066', b'2022P562', b'2022P1096', b'2022P1658',
       b'2022P439', b'2022P1352', b'2022P1191', b'2022P737', b'2022P337',
       b'2022P1883', b'2022P618', b'2022P1057', b'2022P328', b'2022P1390',
       b'2022P1659', b'2022P760', b'2022P1044', b'2022P526', b'2022P507',
       b'2022P401', b'2022P366', b'2022P1441', b'2022P818', b'2022P435',
       b'2022P1177', b'2022P522', b'2022P1284', b'2022P961', b'2022P1394',
       b'2022P1678', b'2022P1222', b'2022P1058', b'2022P508',
       b'2022P1486', b'2022P1761', b'2022P691', b'2022P1235', b'2022P297',
       b'2022P1392', b'2022P1147', b'2022P1723', b'2022P254',
       b'2022P1254', b'2022P641', b'2022P1146', b'2022P1095', b'2022P911',
       b'2022P709', b'2022P169', b'2022P1282', b'2022P1661', b'2

In [None]:
from tensorflow import keras
model.save('Artjuna')

INFO:tensorflow:Assets written to: Artjuna/assets
