In [1]:
import os
import tensorflow as tf

In [2]:
class_name = os.listdir('./dataset')
class_name

['instagram', 'owner', 'sticker', 'tiktok', 'twitter', 'youtube']

In [7]:
MODEL_PATH = os.path.join('..', 'word2vec', 'model-128-64k-100k')
MODEL_PATH

'..\\word2vec\\model-128-64k-100k'

In [8]:
vocab_ds = tf.data.TextLineDataset(os.path.join(MODEL_PATH, 'metadata.tsv')).filter(
        # ignore [UNK] token
        lambda text: tf.cast(not tf.strings.regex_full_match(text, '\[UNK\]'), bool))


In [9]:
# Define the vocabulary size and the number of words in a sequence.
vocab_size = 64000
sequence_length = 128

vectorize_layer = tf.keras.layers.TextVectorization(
        max_tokens=vocab_size,
        output_sequence_length=sequence_length,
        # add vocab
        vocabulary=tf.constant(
            [text.numpy() for text in vocab_ds]))

In [10]:
vocab = vectorize_layer.get_vocabulary()
print(vocab[:10])
print(vocab[-10:])

['', '[UNK]', 'tahun', 'kategori', 'tidak', 'juga', 'atau', 'ia', 'itu', 'indonesia']
['mendivestasi', 'menavigasi', 'menandatangi', 'menagerie', 'memperijazah', 'memperberbagai', 'memoles', 'memento', 'membunuhi', 'mematok']


In [11]:
model = tf.keras.models.load_model('pretrained')

In [12]:
example = [
    "ubah foto tersebut menjadi stiker",
    "download sebuah video dari youtube",
    "bisakah kamu mengubah foto di atas menjadi sticker?",
    "dapatkah saya berbicara dengan owner anda untuk meminta bantuan?",
    "download video dari instagram",
    "unduhkan saya sebuah video dari tiktok",
    "saya mempunyai link video twitter, download video twitter tersebut",
    "buat stiker",
    "halo bagaimana kabar kamu?",
    "makan ayam goreng"
]
example = tf.constant([vectorize_layer(text).numpy() for text in example])

predicted = model.predict(example)

for pred in predicted:
    index = predicted.tolist().index(pred.tolist())
    score = max(pred)
    higest_index = pred.tolist().index(score)
    print("Input:", " ".join([vocab[each] for each in example[index].numpy()]))
    print("Prediction:", pred)
    print("Predicted label:", class_name[higest_index], ":", score)
    print("")

Input: ubah foto tersebut [UNK] stiker                                                                                                                           
Prediction: [5.2987394e-04 6.1361090e-04 9.8994058e-01 3.1207844e-03 5.0364668e-03
 7.5860741e-04]
Predicted label: sticker : 0.9899406

Input: download sebuah video [UNK] youtube                                                                                                                           
Prediction: [0.00227016 0.02256915 0.01509738 0.0119985  0.02716564 0.9208991 ]
Predicted label: youtube : 0.9208991

Input: bisakah kamu mengubah foto [UNK] atas [UNK] [UNK]                                                                                                                        
Prediction: [5.8259477e-04 5.5261641e-03 9.6953493e-01 4.6514980e-03 1.3389646e-02
 6.3152043e-03]
Predicted label: sticker : 0.96953493

Input: dapatkah saya berbicara [UNK] owner anda [UNK] meminta bantuan                                 