In [1]:
from fasttext import load_model
from train import find_similar_words, evaluate_similarity
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def word_analogy(model, word1, word2, word3, top_k=5):
    """
    Perform word analogy: word1 - word2 + word3 to predict the fourth word (word4).
    Example: king - man + woman = queen
    """
    # Get word vectors for word1, word2, and word3
    vec1 = model.get_word_vector(word1)
    vec2 = model.get_word_vector(word2)
    vec3 = model.get_word_vector(word3)
    
    # Calculate the result vector: word1 - word2 + word3
    result_vector = vec1 - vec2 + vec3
    
    # Get the nearest neighbors
    words = model.get_words()  # List of words in the model
    similarities = []
    
    for word in words:
        word_vec = model.get_word_vector(word)
        similarity = np.dot(result_vector, word_vec) / (np.linalg.norm(result_vector) * np.linalg.norm(word_vec))
        similarities.append((similarity, word))
    
    # Sort by similarity and return top_k neighbors
    similarities.sort(reverse=True, key=lambda x: x[0])
    return similarities[:top_k]

# Skipgram model
Trained from scratch

In [23]:
# load the model
model = load_model('fasttext_skipgram_v0.bin')

In [4]:
# Example word pairs to test similarity
word_pairs = [
    ('راجل', 'مرا'),
    ('رجل', 'مرا'),
    ('راجل', 'مرأة'),
    ('رجل', 'مرأة'),
    ('باك', 'مك'),
]

evaluate_similarity(model, word_pairs)

Similarity between 'راجل' and 'مرا': 0.6732
Similarity between 'رجل' and 'مرا': 0.4378
Similarity between 'راجل' and 'مرأة': 0.6482
Similarity between 'رجل' and 'مرأة': 0.5752
Similarity between 'باك' and 'مك': 0.6148


In [5]:
find_similar_words(model, word='راجل', k=10)
find_similar_words(model, word='رجل', k=10)
find_similar_words(model, word='مرا', k=10)
find_similar_words(model, word='مرأة', k=10)
find_similar_words(model, word='مك', k=10)


Top 10 words similar to 'راجل':
=راجل (Score: 0.9065)
/راجل (Score: 0.8977)
[راجل (Score: 0.8842)
#راجل (Score: 0.8780)
هراجل (Score: 0.8565)
،راجل (Score: 0.8541)
مراجل (Score: 0.8513)
رراجل (Score: 0.8493)
'راجل (Score: 0.8487)
؟راجل (Score: 0.8480)

Top 10 words similar to 'رجل':
ورجل (Score: 0.7989)
ك”رجل (Score: 0.7922)
:رجل (Score: 0.7742)
=رجل (Score: 0.7657)
،رجل (Score: 0.7444)
جلال{رجل (Score: 0.7400)
برجل (Score: 0.7332)
رجل/ (Score: 0.7188)
«رجل (Score: 0.7185)
رجل.وي (Score: 0.7053)

Top 10 words similar to 'مرا':
![مرا (Score: 0.8394)
عومرا (Score: 0.8021)
ومرا (Score: 0.7950)
…مرا (Score: 0.7681)
"ومرا (Score: 0.7559)
راجل/مرا (Score: 0.7505)
)لمرا (Score: 0.7491)
المرا (Score: 0.7435)
ىمرا (Score: 0.7408)
=مرا (Score: 0.7371)

Top 10 words similar to 'مرأة':
امرأة (Score: 0.8832)
إمرأة (Score: 0.8740)
مراة (Score: 0.8645)
أمرأة (Score: 0.8583)
امراة (Score: 0.8473)
(إمرأة (Score: 0.8336)
وكأمرأة (Score: 0.8285)
كمرأة (Score: 0.8272)
فامرأة (Score: 0.8267)
تلمرأة (Score

# CBOW model
Trained from scratch

In [6]:
# load the model
model = load_model('fasttext_cbow_v0.bin')

In [7]:
# Example word pairs to test similarity
word_pairs = [
    ('راجل', 'مرا'),
    ('رجل', 'مرا'),
    ('راجل', 'مرأة'),
    ('رجل', 'مرأة'),
    ('باك', 'مك'),
]

evaluate_similarity(model, word_pairs)

Similarity between 'راجل' and 'مرا': 0.5745
Similarity between 'رجل' and 'مرا': 0.2633
Similarity between 'راجل' and 'مرأة': 0.5035
Similarity between 'رجل' and 'مرأة': 0.4069
Similarity between 'باك' and 'مك': 0.4034


In [8]:
find_similar_words(model, word='راجل', k=10)
find_similar_words(model, word='رجل', k=10)
find_similar_words(model, word='مرا', k=10)
find_similar_words(model, word='مرأة', k=10)
find_similar_words(model, word='مك', k=10)


Top 10 words similar to 'راجل':
[راجل (Score: 0.9146)
/راجل (Score: 0.9123)
=راجل (Score: 0.9121)
#راجل (Score: 0.9114)
)راجل (Score: 0.9043)
؟راجل (Score: 0.9003)
-راجل (Score: 0.8959)
'راجل (Score: 0.8904)
راجل, (Score: 0.8882)
،راجل (Score: 0.8871)

Top 10 words similar to 'رجل':
ك”رجل (Score: 0.8934)
:رجل (Score: 0.8912)
=رجل (Score: 0.8887)
،رجل (Score: 0.8875)
ررجل (Score: 0.8717)
«رجل (Score: 0.8583)
و"رجل (Score: 0.8554)
.رجل (Score: 0.8454)
كرجل (Score: 0.8341)
“رجل (Score: 0.8100)

Top 10 words similar to 'مرا':
…مرا (Score: 0.8889)
،مرا (Score: 0.8808)
=مرا (Score: 0.8777)
'مرا (Score: 0.8754)
![مرا (Score: 0.8737)
مرامرا (Score: 0.8441)
ىمرا (Score: 0.8440)
)لمرا (Score: 0.8319)
راجل/مرا (Score: 0.8194)
-مرا (Score: 0.8119)

Top 10 words similar to 'مرأة':
كإمرأة (Score: 0.8819)
(إمرأة (Score: 0.8792)
مرأة2: (Score: 0.8746)
امرأة (Score: 0.8712)
-مرأة (Score: 0.8703)
تلمرأة (Score: 0.8661)
إمرأة (Score: 0.8623)
فامرأة (Score: 0.8484)
وكأمرأة (Score: 0.8453)
ومرأة (Score: 0

In [18]:
# Perform word analogy: word1 - word2 + word3 to predict the fourth word (word4).
# Example: king - man + woman = queen
word_analogy(model, word1='ملك', word2='راجل', word3='مرأة')

[(0.7938787, 'ملك'),
 (0.7415985, 'ملك؛'),
 (0.74077106, 'ملك…'),
 (0.71872395, 'ملكى'),
 (0.7130859, 'ملكاً')]

In [19]:
# Perform word analogy: word1 - word2 + word3 to predict the fourth word (word4).
# Example: king - man + woman = queen
word_analogy(model, word1='ملك', word2='رجل', word3='مرأة')

[(0.6445792, 'ملك'),
 (0.5912996, 'ب"ملك'),
 (0.5876373, "'ملك"),
 (0.5839689, '"*ملك'),
 (0.5831766, '،ملك')]

In [20]:
# Perform word analogy: word1 - word2 + word3 to predict the fourth word (word4).
# Example: king - man + woman = queen
word_analogy(model, word1='ملك', word2='رجل', word3='مراة')

[(0.6107284, 'ملك'),
 (0.56964546, '،ملك'),
 (0.5693874, "'ملك"),
 (0.5685666, 'ب"ملك'),
 (0.5629976, '"*ملك')]

In [21]:
# Perform word analogy: word1 - word2 + word3 to predict the fourth word (word4).
# Example: king - man + woman = queen
word_analogy(model, word1='ملك', word2='رجل', word3='مرا')

[(0.59764856, 'مرا'),
 (0.54374105, 'ملك'),
 (0.50879276, '…مرا'),
 (0.50743216, '،مرا'),
 (0.5070816, 'مضمرا')]

In [22]:
# Perform word analogy: word1 - word2 + word3 to predict the fourth word (word4).
# Example: king - man + woman = queen
word_analogy(model, word1='ملك', word2='راجل', word3='مرا')

[(0.7233809, 'ملك'),
 (0.66944826, 'ملك؛'),
 (0.66250694, 'ملك…'),
 (0.6550784, 'ملكا'),
 (0.6550542, 'ملكبة')]

In [24]:
# Perform word analogy: word1 - word2 + word3 to predict the fourth word (word4).
# Example: king - man + woman = queen
word_analogy(model, word1='ملك', word2='راجل', word3='مرا')

[(0.71703374, 'ملك'),
 (0.6302972, 'ملكى'),
 (0.6290631, 'ملكة^.'),
 (0.6266298, 'مراكيز'),
 (0.6172255, 'مذمرة')]

# finetuned skipgram

In [3]:
# load the model
model_ft = load_model('fasttext_skipgram_vft_0.bin')

In [6]:
# Perform word analogy: word1 - word2 + word3 to predict the fourth word (word4).
# Example: king - man + woman = queen
word_analogy(model_ft, word1='ملك', word2='راجل', word3='مرا')

[(0.72983986, 'ملك'),
 (0.64226836, '\xadالملك'),
 (0.6392498, 'ملكة^.'),
 (0.63371533, 'ملك…'),
 (0.6259298, '؟الملك')]

In [10]:
# Perform word analogy: word1 - word2 + word3 to predict the fourth word (word4).
# Example: king - man + woman = queen
word_analogy(model_ft, word1='ملك', word2='رجل', word3='مراة')

[(0.5926169, 'ملك'),
 (0.5788265, '،ملك'),
 (0.5725579, "'ملك"),
 (0.56963116, 'ب"ملك'),
 (0.5662609, '"*ملك')]

# finetuned cbow

In [7]:
# load the model
model_cbow_v1 = load_model('fasttext_cbow_v1.bin')

In [8]:
# Perform word analogy: word1 - word2 + word3 to predict the fourth word (word4).
# Example: king - man + woman = queen
word_analogy(model_cbow_v1, word1='ملك', word2='راجل', word3='مرا')

[(0.6718948, 'ملك'),
 (0.6669533, 'ملكة^.'),
 (0.66431314, 'مراثا'),
 (0.6603274, 'مرأى'),
 (0.6575509, 'ملكك')]

In [9]:
# Perform word analogy: word1 - word2 + word3 to predict the fourth word (word4).
# Example: king - man + woman = queen
word_analogy(model_cbow_v1, word1='ملك', word2='رجل', word3='مراة')

[(0.56534886, 'ملك'),
 (0.53648967, 'ملكش'),
 (0.5363179, 'محملك'),
 (0.5303863, 'وفحملك'),
 (0.52712154, 'دحملك')]