In [2]:
import os
import nltk
import re
import numpy as np
from hazm import word_tokenize, Lemmatizer, Stemmer, Normalizer
from pprint import pprint
import pandas as pd
from tensorflow import keras
from keras.preprocessing.text import Tokenizer
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.model_selection import train_test_split

In [3]:
np.set_printoptions(suppress=True)

In [4]:
raw_text = pd.read_csv('Shams_Corpus_Paper3.txt')
raw_text.iloc[5]

دیوان شمس تبریزی (غزلیات)    صورت دل صورت مخلوق نیست\tکز رخ دل حسن خدا رو نمود
Name: 5, dtype: object

In [5]:
file = open(os.getcwd() + "/Shams_Corpus_Paper3.txt", "rt")
raw_text = file.read()
file.close()


In [6]:
def remove_persian_stopword(tokens):
    file = open(os.getcwd() + "/persian_stopw.txt", "rt")
    raw_stop_words = file.read()
    file.close()

    stop_words = word_tokenize(raw_stop_words)

    return [word for word in tokens if not word in stop_words]

In [7]:
def normalize_text(text):
    normalizer = Normalizer()
    
    return normalizer.normalize(text)

In [8]:
def lemma_tokenizer(tokens):
    lemmatizer = Lemmatizer()    
    return [lemmatizer.lemmatize(token).split("#")[-1] for token in tokens]

In [9]:
def stem_tokenizer(tokens):
    stemmer = Stemmer()
    return [stemmer.stem(token) for token in tokens]

In [10]:
raw_text = "".join([i for i in raw_text if not i.isdigit()])
raw_text = re.sub(r"-+|\d+", "", raw_text)

raw_text = normalize_text(raw_text)

raw_tokens = word_tokenize(raw_text)
len(raw_tokens)

78389

In [11]:
type(raw_tokens)

list

In [12]:
tokens = remove_persian_stopword(raw_tokens)
len(tokens)

47335

In [13]:
t = Tokenizer()
t.fit_on_texts(tokens)

In [14]:
count_list = sorted(t.word_counts.items(), key=lambda x: x[1], reverse=True)
count_list[:20]

[('دل', 621),
 ('جان', 587),
 ('عشق', 455),
 ('سر', 390),
 ('شمس', 210),
 ('خوش', 187),
 ('آب', 179),
 ('دست', 175),
 ('سوی', 174),
 ('چشم', 171),
 ('خویش', 171),
 ('جهان', 167),
 ('صد', 165),
 ('یار', 162),
 ('دم', 154),
 ('روی', 145),
 ('شب', 143),
 ('کار', 141),
 ('رو', 140),
 ('مست', 138)]

In [15]:
lammatized_tokens = remove_persian_stopword(lemma_tokenizer(tokens))
print(len(lammatized_tokens))

stemmed_tokens = remove_persian_stopword(stem_tokenizer(tokens))
print(len(stemmed_tokens))

45614
45349


In [16]:
lammatized_tokens[:20]

['دیوان',
 'شمس',
 'تبریزی',
 'غزلیات',
 'آه',
 'شمع',
 'منور',
 'کآتش',
 'زن',
 'دل',
 'دل',
 'ربود',
 'زده',
 'دل',
 'آتش',
 'سوز',
 'دوست',
 'زود',
 'زود',
 'دل']

In [17]:
def concat(*iterables):
    for iterable in iterables:
        yield from iterable

In [18]:
def one_hot_encode(id, vocab_size):
    res = [0] * vocab_size
    res[id] = 1
    return res

In [19]:
def generate_training_data(tokens, word_to_id, window):
    X = []
    y = []
    n_tokens = len(tokens)
    #     unique_tokens = len(word_to_id)
    for i in range(n_tokens):
        idx = concat(
            range(max(0, i - window), i), range(i, min(n_tokens, i + window + 1))
        )
        for j in idx:
            if i == j:
                continue
            X.append(one_hot_encode(word_to_id[tokens[i]] - 1, len(word_to_id)))
            y.append(one_hot_encode(word_to_id[tokens[j]] - 1, len(word_to_id)))

    return np.asarray(X), np.asarray(y)

In [20]:
def generate_training_data_imp(tokens, word_to_id, window):
    X = []
    y = []
    n_tokens = len(tokens)
    unique_tokens = len(word_to_id)
    for i in range(n_tokens):
        idx = concat(
            range(max(0, i - window), i), range(i, min(n_tokens, i + window + 1))
        )
        for j in idx:
            if i == j:
                continue
            X.append(word_to_id[tokens[i]] - 1)
            y.append(word_to_id[tokens[j]] - 1)

    return np.asarray(X), np.asarray(y)

In [21]:
def generate_training_data(tokens, word_to_id, window):
    X = []
    y = []
    unique_token = len(word_to_id)
    n_tokens = len(tokens)
    #     unique_tokens = len(word_to_id)
    for i in range(n_tokens):
        idx = concat(
            range(max(0, i - window), i), 
            range(i, min(n_tokens, i + window + 1))
        )

        X.append(one_hot_encode(word_to_id[tokens[i]] - 1, unique_token))
        y.append(one_hot_encode_agg(tokens, unique_token, idx, i))

    return np.asarray(X), np.asarray(y)

In [22]:
def one_hot_encode_agg(tokens, vocab_size, idx, i):
    res = [0] * vocab_size
    for id in idx:
        if i == id:
            continue
        res[word_to_id[tokens[id]] - 1] = 1
    return res

In [23]:
tokens_ = lammatized_tokens
t = Tokenizer(filters="")
t.fit_on_texts(tokens_)
sorted_count_list = sorted(t.word_counts.items(), key=lambda x: x[1], reverse=True)
word_to_id, id_to_word = t.word_index, t.index_word
X, y = generate_training_data_imp(tokens_, word_to_id, 2)

In [24]:
word_to_id

{'دل': 1,
 'جان': 2,
 'عشق': 3,
 'سر': 4,
 'دان': 5,
 'رو': 6,
 'گو': 7,
 'گرد': 8,
 'بین': 9,
 'مست': 10,
 'سو': 11,
 'شمس': 12,
 'چشم': 13,
 'خوش': 14,
 'آب': 15,
 'دست': 16,
 'عاشق': 17,
 'زن': 18,
 'یار': 19,
 'جهان': 20,
 'خویش': 21,
 'کار': 22,
 'شب': 23,
 'پر': 24,
 'صد': 25,
 'کش': 26,
 'دم': 27,
 'روز': 28,
 'روی': 29,
 'آ': 30,
 'شکر': 31,
 'خور': 32,
 'آتش': 33,
 'گل': 34,
 'خون': 35,
 'غم': 36,
 'خاک': 37,
 'نی': 38,
 'نگر': 39,
 'بس': 40,
 'ماه': 41,
 'باده': 42,
 'ره': 43,
 'کو': 44,
 'عقل': 45,
 'رس': 46,
 'مه': 47,
 'نور': 48,
 'بند': 49,
 'باز': 50,
 'تن': 51,
 'لب': 52,
 'لطف': 53,
 'خورشید': 54,
 'ترش': 55,
 'شیر': 56,
 'خانه': 57,
 'رخ': 58,
 'جو': 59,
 'چرخ': 60,
 'جمله': 61,
 'نظر': 62,
 'آخر': 63,
 'عالم': 64,
 'گوش': 65,
 'جام': 66,
 'زین': 67,
 'آفتاب': 68,
 'پرده': 69,
 'تبریز': 70,
 'دین': 71,
 'باغ': 72,
 'باد': 73,
 'دور': 74,
 'شاه': 75,
 'زر': 76,
 'شه': 77,
 'سیر': 78,
 'دریا': 79,
 'چونک': 80,
 'ساقی': 81,
 'سنگ': 82,
 'تبریزی': 83,
 'خواب': 84,
 'شکن':

In [25]:
len(X)

182450

In [26]:
y

array([  11,   82, 1373, ..., 2598, 1895,  459])

In [27]:
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(X)
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
one_hotter = onehot_encoder.fit(integer_encoded)
X_onehot_encoded = one_hotter.transform(integer_encoded)

integer_encoded = label_encoder.fit_transform(y)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
y_onehot_encoded = one_hotter.transform(integer_encoded)

In [28]:
import pickle
with open("one_hot_encoder.pkl", "wb") as f: 
    pickle.dump(one_hotter, f)

In [29]:
y_onehot_encoded.shape

(182450, 8368)

In [30]:
X_onehot_encoded.shape

(182450, 8368)

In [31]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=50
)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(145960,)
(145960,)
(36490,)
(36490,)


In [32]:
X


array([1373, 1373,   11, ...,  459, 2598, 2598])

In [33]:
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Activation, Dense, Reshape, Embedding

embed_size = 50
vocab_size = X_onehot_encoded.shape[1]

model = Sequential()
model.add(
    Embedding(
        input_dim=vocab_size,
        output_dim=embed_size,
        embeddings_initializer="glorot_uniform",
        input_length=1,
    )
)
model.add(Reshape((embed_size,)))
model.add(Dense(vocab_size, kernel_initializer="glorot_uniform", activation="softmax"))
model.compile(loss="categorical_crossentropy", optimizer="rmsprop", metrics=['mae', 'acc'])


print(model.summary())

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 1, 50)             418400    
                                                                 
 reshape (Reshape)           (None, 50)                0         
                                                                 
 dense (Dense)               (None, 8368)              426768    
                                                                 
Total params: 845,168
Trainable params: 845,168
Non-trainable params: 0
_________________________________________________________________
None


2022-05-15 01:08:45.964470: E tensorflow/stream_executor/cuda/cuda_driver.cc:271] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-05-15 01:08:45.964497: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (Emoji-U): /proc/driver/nvidia/version does not exist
2022-05-15 01:08:45.964714: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [34]:
X.shape

(182450,)

In [35]:
models = model.fit(
    X, y_onehot_encoded, epochs=20, validation_split=0.2, batch_size=128, verbose=1
)

2022-05-15 01:08:51.371346: W tensorflow/core/framework/cpu_allocator_impl.cc:82] Allocation of 4885573120 exceeds 10% of free system memory.


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [36]:
def n_similar(word, model_, n=10):
    word_id = word_to_id[word]
    result = model_.predict([word_id]).squeeze()
    for word in (id_to_word[id + 1] for id in np.argsort(result)[::-1][0:n]):
        print(word)


In [40]:
n_similar('باغ', model_ = model, n=10)

باد
سر
جان
خاک
دل
کش
دم
عشق
لطف
خوش


In [54]:
model.predict([5]).squeeze()

array([0.02277675, 0.02288023, 0.01977304, ..., 0.00000015, 0.00000015,
       0.00000015], dtype=float32)

In [35]:
# tokens_ = lammatized_tokens[:2000]
# t = Tokenizer(filters='')
# t.fit_on_texts(tokens_)
# sorted_count_list = sorted(t.word_counts.items(), key=lambda x: x[1], reverse=True)
# word_to_id, id_to_word = t.word_index, t.index_word
# len(word_to_id)
# X, y = generate_training_data(tokens_, word_to_id, 3)


In [36]:
# X

In [37]:
# y

In [38]:
# X.shape

In [39]:
# y.shape

In [40]:
def init_network(vocab_size, n_embedding):
    model = {
        "w1": np.random.uniform(-1, 1, (vocab_size, n_embedding)),
        "w2": np.random.uniform(-1, 1, (n_embedding, vocab_size))
    }
    return model

In [41]:
model = init_network(len(word_to_id), 10)
model["w1"].shape

(8368, 10)

In [52]:
model["w2"].shape

(10, 1171)

In [53]:
def softmax(X):
#     e_x = np.exp(x - np.max(x))
#     return e_x / e_x.sum(axis=0)

    res = []
    for x in X:
        exp = np.exp(x)
        res.append(exp / exp.sum())
    return res

In [54]:
def stable_sigmoid(x):
    sig = np.where(x < 0, np.exp(x) / (1 + np.exp(x)), 1 / (1 + np.exp(-x)))
    return sig

In [55]:
X.shape

(11988, 1171)

In [56]:
(X @ model["w1"]).shape

(11988, 10)

In [57]:
(X @ model["w1"] @ model["w2"]).shape


(11988, 1171)

In [58]:
def forward(model, X, return_cache=True):
    cache = {}
    
    cache["a1"] = X @ model["w1"]
    cache["a2"] = cache["a1"] @ model["w2"]
    print(f"a2 = {cache['a2']}")
    cache["z"] = softmax(cache["a2"])
#     cache["z"] = stable_sigmoid(cache["a2"])
    
    if not return_cache:
        return cache["z"]
    return cache

In [59]:
def cross_entropy(z, y):
    return - np.sum(np.log(z) * y)

In [66]:
def backward(model, X, y, alpha):
    cache  = forward(model, X)
#     dl_weight_inp_hidden = np.outer(target_word_vector, np.dot(weight_hidden_output, total_error.T))
#     dl_weight_hidden_output = np.outer(hidden_layer, total_error)
    da2 = cache["z"] - y
    dw2 = cache["a1"].T @ da2
    da1 = da2 @ model["w2"].T
    dw1 = X.T @ da1
    assert(dw2.shape == model["w2"].shape)
    assert(dw1.shape == model["w1"].shape)
    model["w1"] -= alpha * dw1
    model["w2"] -= alpha * dw2
    
    return cross_entropy(cache["z"], y)

In [65]:
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'svg'
plt.style.use("seaborn")

model = init_network(len(word_to_id), 10)

n_iter = 100

learning_rate = 0.01

history = [backward(model, X, y, learning_rate) for _ in range(n_iter)]

plt.clf()
plt.plot(range(len(history)), history, color="skyblue")
plt.show()

a2 = [[-2.74132539 -1.27900225 -1.80771619 ...  0.27653819  2.18316733
   0.356036  ]
 [-2.74132539 -1.27900225 -1.80771619 ...  0.27653819  2.18316733
   0.356036  ]
 [-2.74132539 -1.27900225 -1.80771619 ...  0.27653819  2.18316733
   0.356036  ]
 ...
 [-0.86869516 -1.01409124 -1.90894888 ... -0.25463134  1.92658359
   0.3085458 ]
 [-0.86869516 -1.01409124 -1.90894888 ... -0.25463134  1.92658359
   0.3085458 ]
 [-0.86869516 -1.01409124 -1.90894888 ... -0.25463134  1.92658359
   0.3085458 ]]
a2 = [[-2.30536967 -1.2409838  -1.34889173 ...  0.30970255  2.12515847
   0.32588619]
 [-2.30536967 -1.2409838  -1.34889173 ...  0.30970255  2.12515847
   0.32588619]
 [-2.30536967 -1.2409838  -1.34889173 ...  0.30970255  2.12515847
   0.32588619]
 ...
 [-0.67576492 -0.99458788 -1.69838724 ... -0.19292213  1.91841591
   0.33170993]
 [-0.67576492 -0.99458788 -1.69838724 ... -0.19292213  1.91841591
   0.33170993]
 [-0.67576492 -0.99458788 -1.69838724 ... -0.19292213  1.91841591
   0.33170993]]
a2 = [

a2 = [[-5.9980348  -5.95889827  1.36587994 ...  0.87812623  1.92214075
   0.24219688]
 [-5.9980348  -5.95889827  1.36587994 ...  0.87812623  1.92214075
   0.24219688]
 [-5.9980348  -5.95889827  1.36587994 ...  0.87812623  1.92214075
   0.24219688]
 ...
 [-4.7517591  -4.49144355  0.38128546 ...  0.76334177  2.37185902
   0.92954448]
 [-4.7517591  -4.49144355  0.38128546 ...  0.76334177  2.37185902
   0.92954448]
 [-4.7517591  -4.49144355  0.38128546 ...  0.76334177  2.37185902
   0.92954448]]
a2 = [[-1.66831947 -4.46820011  1.49254524 ...  0.92144401  1.93494083
   0.25116315]
 [-1.66831947 -4.46820011  1.49254524 ...  0.92144401  1.93494083
   0.25116315]
 [-1.66831947 -4.46820011  1.49254524 ...  0.92144401  1.93494083
   0.25116315]
 ...
 [-1.09994208 -3.26007664  0.62125857 ...  0.83668957  2.4304383
   0.97857969]
 [-1.09994208 -3.26007664  0.62125857 ...  0.83668957  2.4304383
   0.97857969]
 [-1.09994208 -3.26007664  0.62125857 ...  0.83668957  2.4304383
   0.97857969]]
a2 = [[-2

a2 = [[-3.67260638 -1.53700187  3.4852808  ...  1.72973698  2.18606308
   0.41579631]
 [-3.67260638 -1.53700187  3.4852808  ...  1.72973698  2.18606308
   0.41579631]
 [-3.67260638 -1.53700187  3.4852808  ...  1.72973698  2.18606308
   0.41579631]
 ...
 [-4.40268915 -0.23729693  5.79760099 ...  2.60515046  3.78679666
   1.94762952]
 [-4.40268915 -0.23729693  5.79760099 ...  2.60515046  3.78679666
   1.94762952]
 [-4.40268915 -0.23729693  5.79760099 ...  2.60515046  3.78679666
   1.94762952]]
a2 = [[-2.3767334  -3.04214528  3.54412939 ...  1.78290792  2.20560842
   0.42966066]
 [-2.3767334  -3.04214528  3.54412939 ...  1.78290792  2.20560842
   0.42966066]
 [-2.3767334  -3.04214528  3.54412939 ...  1.78290792  2.20560842
   0.42966066]
 ...
 [-3.27130698 -1.42870191  6.01174377 ...  2.75661738  3.89666166
   2.02379091]
 [-3.27130698 -1.42870191  6.01174377 ...  2.75661738  3.89666166
   2.02379091]
 [-3.27130698 -1.42870191  6.01174377 ...  2.75661738  3.89666166
   2.02379091]]
a2 = [

  exp = np.exp(x)
  res.append(exp / exp.sum())
  return - np.sum(np.log(z) * y)
  return - np.sum(np.log(z) * y)


a2 = [[nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 ...
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]]
a2 = [[nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 ...
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]]
a2 = [[nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 ...
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]]
a2 = [[nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 ...
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]]
a2 = [[nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 ...
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]]
a2 = [[nan nan 

KeyboardInterrupt: 

In [375]:
history

[41174.36920437467,
 34001.97985689214,
 36940.93215854208,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan,
 nan]

In [341]:
model

{'w1': array([[ 2.13567115,  1.56644306, -3.31168808, ..., -1.27881225,
         -3.665346  , -0.71643332],
        [ 2.94476094,  6.58798465, -1.4649149 , ..., -2.23153483,
         -0.37211619, -2.20278355],
        [ 0.55475392,  1.25378585,  0.35800644, ..., -1.14515458,
         -1.38277165,  0.17231827],
        ...,
        [-0.3725349 , -0.38839347, -0.72622149, ..., -0.41574326,
          0.22873693,  0.41594231],
        [ 0.65206881,  0.13455332, -0.77005649, ..., -0.54984952,
          0.67393012,  0.74957822],
        [-0.06613055,  1.20413236,  0.14463711, ..., -0.24066138,
          1.16531767,  0.13553259]]),
 'w2': array([[-0.38738713, -0.20646114, -1.97325222, ..., -1.9887021 ,
          0.37277566, -0.68325111],
        [ 1.07978176,  1.52915627,  1.04595509, ...,  1.44282791,
          0.73902303, -0.02384995],
        [ 1.66933701, -1.79943934,  0.94013394, ..., -0.23462402,
          1.0196433 ,  1.28843454],
        ...,
        [-1.11376898,  0.75841651,  1.0733

In [342]:
learning = one_hot_encode(word_to_id["گرم"], len(word_to_id))
result = forward(model, [learning], return_cache=False)[0]
result

array([5.05350373e-05, 2.07845523e-03, 1.52483075e-07, ...,
       1.66334004e-04, 9.25094728e-07, 4.76827251e-06])

In [343]:
np.argsort(result)[::-1][0:5]

array([  18,   90,  533, 1048, 1127])

In [344]:
for word in (id_to_word[id] for id in np.argsort(result)[::-1][0:10]):
    print(word)


آتش
ربود
فشرد
نگار
برق
درده
وان
فرید
خبر
عدم


In [345]:
def get_word_similarities(word, model, n_similars=10):
    try:
        learning = one_hot_encode(word_to_id[word]-1, len(word_to_id))
    except KeyError:
        print(f"Word = {word} is not in corpus")
        exit()
    result = forward(model, [learning], return_cache=False)[0]
    for word in (id_to_word[id+1] for id in np.argsort(result)[::-1][0:n_similars]):
        print(word)

    

In [346]:
def get_embedding(model, word):
    try:
        idx = word_to_id[word] -1
    except KeyError:
        print("`word` not in corpus")
    one_hot = one_hot_encode(idx, len(word_to_id))
    return forward(model, one_hot)["a1"]

In [347]:
get_embedding(model, "دیو")


array([ 0.003464  , -0.59311171,  0.6253052 , -1.32282204, -0.69900121,
        2.49275488, -1.19139365, -1.16855214,  0.6965306 , -0.49444951])

In [348]:
get_word_similarities('عیش', model, 10)

خور
غلام
عشق
رقاد
قوم
عاشق
باللقاء
منت
پارس
جوع


In [349]:
get_word_similarities('میخانه', model, 10)

معاد
نسیم
مستفید
الکتب
شیفته
بلا
کرده
حمرا
خواهی_چو
اجل


In [350]:
get_word_similarities('بشر', model, 10)

جان
قوم
خور
باده
چشم
زود
خواب
دل
الفناء
نظر


In [351]:
get_word_similarities('ویرانه', model, 10)

رو
عاطفه
عاشق
سقف
لست
قافله
درآ
گنبد
انفک
شبت


In [352]:
get_word_similarities('حلال', model, 10)

گنبد
گرد
آتش
بموتی
جمیلا
نسیم
دین
الکتب
رو
اوباش


In [61]:
np.random.randn(2, 4)

array([[-0.87561612, -1.14905166,  0.10131987, -1.86758747],
       [-1.55412121,  0.31428543,  1.61102762, -1.25036518]])

In [63]:
np.random.uniform(-1, 1, (vocab_size, word_embedding_dimension))

array([[ 0.6661623 ,  0.70206294, -0.97759554,  0.75347279],
       [-0.7818824 , -0.63595427, -0.05453096,  0.15557554]])

In [33]:
from tensorflow import keras

from keras.models import Sequential 
from keras.layers import Activation, Dense, Reshape 

 
model = Sequential() 
layer_1 = Dense(16, input_shape = (8,), use_bias=False) 
model.add(layer_1) 
# layer_2 = Reshape((16,8)) 
# model.add(layer_2) 
layer_3 = Dense(4, use_bias=False) 
model.add(layer_3) 

# print(layer_2.input_shape) 
#(None, 8, 16) 
# print(layer_2.ouput_shape) 
#(None, 16, 8)

print(model.summary())
visualizer(model, format='png', view=True)


Model: "sequential_26"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_44 (Dense)            (None, 16)                128       
                                                                 
 dense_45 (Dense)            (None, 4)                 64        
                                                                 
Total params: 192
Trainable params: 192
Non-trainable params: 0
_________________________________________________________________
None


In [None]:
from tensorflow import keras
from keras.models import Sequential 
from keras.layers import Activation, Dense, Reshape 


