# LEALLA
### Learning Lightweight Language-agnostic Sentence Embeddings with Knowledge Distillation
#### Paper: https://arxiv.org/pdf/2302.08387.pdf

In [1]:
import tensorflow_hub as hub
import tensorflow as tf
import tensorflow_text as text
import numpy as np

In [2]:
# pip install tensorflow_text

In [3]:
encoder = hub.KerasLayer("https://tfhub.dev/google/LEALLA/LEALLA-small/1")
# encoder = hub.KerasLayer("https://tfhub.dev/google/LEALLA/LEALLA-base/1")
# encoder = hub.KerasLayer("https://tfhub.dev/google/LEALLA/LEALLA-large/1")

english_sentences = tf.constant(["dog", "Puppies are nice.", "I enjoy taking long walks along the beach with my dog."])
italian_sentences = tf.constant(["cane", "I cuccioli sono carini.", "Mi piace fare lunghe passeggiate lungo la spiaggia con il mio cane."])
japanese_sentences = tf.constant(["犬", "子犬はいいです", "私は犬と一緒にビーチを散歩するのが好きです"])

english_embeds = encoder(english_sentences)
japanese_embeds = encoder(japanese_sentences)
italian_embeds = encoder(italian_sentences)

# English-Italian similarity
print(np.matmul(english_embeds, np.transpose(italian_embeds)))

# English-Japanese similarity
print(np.matmul(english_embeds, np.transpose(japanese_embeds)))

# Italian-Japanese similarity
print(np.matmul(italian_embeds, np.transpose(japanese_embeds)))


[[0.6628791  0.18278457 0.2560031 ]
 [0.14280055 0.7678191  0.19196478]
 [0.15308793 0.10031503 0.8378704 ]]
[[0.91263676 0.45516834 0.24380797]
 [0.2345259  0.65163374 0.21599735]
 [0.26216242 0.22121471 0.6608679 ]]
[[0.6203863  0.26901698 0.14951538]
 [0.27798057 0.5323863  0.15065356]
 [0.24729793 0.24758407 0.8123442 ]]
