In [13]:
import tensorflow as tf
import tensorflow_text as tf_text

In [2]:
!pip install tensorflow-text

Collecting tensorflow-text
  Downloading tensorflow_text-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.2/5.2 MB[0m [31m34.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting tensorflow<2.17,>=2.16.1 (from tensorflow-text)
  Downloading tensorflow-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (589.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m589.8/589.8 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Collecting h5py>=3.10.0 (from tensorflow<2.17,>=2.16.1->tensorflow-text)
  Downloading h5py-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.3/5.3 MB[0m [31m76.6 MB/s[0m eta [36m0:00:00[0m
Collecting ml-dtypes~=0.3.1 (from tensorflow<2.17,>=2.16.1->tensorflow-text)
  Downloading ml_dtypes-0.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.2 MB)

In [22]:
class TextPreprocessor(tf.keras.layers.Layer):
    def __init__(self, max_tokens=10000, **kwargs):
        super().__init__(**kwargs)
        self.tokenizer = tf.keras.layers.TextVectorization(max_tokens=max_tokens)

    def adapt(self, texts):
        self.tokenizer.adapt(texts)

    def preprocess(self, text):
        return self.tokenizer(text)

    def call(self, text):
        return self.preprocess(text)

In [23]:
class CosineSimilarityLayer(tf.keras.layers.Layer):
    def call(self, inputs):
        tfidf_manual, tfidf_keyword = inputs
        similarity = tf.keras.losses.cosine_similarity(tfidf_manual, tfidf_keyword)
        return (1 + similarity) * 50  # Convert to percentage

In [24]:
def create_similarity_model():
    manual_abstract_input = tf.keras.Input(shape=(), dtype=tf.string, name='manual_abstract')
    keyword_query_input = tf.keras.Input(shape=(), dtype=tf.string, name='keyword_query')

    text_preprocessor = TextPreprocessor()
    tfidf_manual = text_preprocessor(manual_abstract_input)
    tfidf_keyword = text_preprocessor(keyword_query_input)

    similarity = CosineSimilarityLayer()([tfidf_manual, tfidf_keyword])

    model = tf.keras.Model(inputs=[manual_abstract_input, keyword_query_input], outputs=similarity)
    return model

In [25]:
# Create the similarity model
model = create_similarity_model()

In [36]:
# Example usage
manual_abstract = ["In the modern technology industry, the utilization of machine learning and natural language processing is increasingly dominant, with applications like facial recognition leveraging machine learning algorithms to understand and interpret users' natural language."]
keyword_queries = ["machine learning OR natural language processing"]  # Ensure it's a list of lists

In [37]:
# Adapt the vectorizer on the manual abstract
preprocessor_layer = model.get_layer(index=2)
preprocessor_layer.adapt([manual_abstract])

In [40]:
# Example usage
manual_abstract = ["In the modern technology industry, the utilization of machine learning and natural language processing is increasingly dominant, with applications like facial recognition leveraging machine learning algorithms to understand and interpret users' natural language."]
keyword_queries = ["machine learning OR natural language processing"]

# Calculate similarity
similarities = model.predict([manual_abstract[0], keyword_queries[0]])  # Ambil elemen pertama dari masing-masing daftar
print("Similarities:", similarities)

ValueError: Unrecognized data type: x=["In the modern technology industry, the utilization of machine learning and natural language processing is increasingly dominant, with applications like facial recognition leveraging machine learning algorithms to understand and interpret users' natural language.", 'machine learning OR natural language processing'] (of type <class 'list'>)

In [None]:
# Export the model
model_save_path = 'similarity_model'
model.save(model_save_path)
print(f"Model saved to {model_save_path}")

In [None]:
# Loading the model for later use
loaded_model = tf.keras.models.load_model(model_save_path, custom_objects={'TextPreprocessor': TextPreprocessor, 'CosineSimilarityLayer': CosineSimilarityLayer})

In [None]:
# Calculate similarity with loaded model
similarities_loaded = loaded_model.predict([manual_abstract, keyword_queries])
print("Similarities with loaded model:", similarities_loaded)