# Word2Vec Vektör Görselleştirme

In [24]:
from gensim.models import Word2Vec

In [25]:
model = Word2Vec.load("data/word2vec.model")

In [26]:
words = list(model.wv.index_to_key)
words[0:6]

['dil', 'doğal', 'işle', 'yapay', 'zeka', 'il']

In [27]:
import pandas as pd

In [28]:
df_name = []
df_count = []

for i in words:
    df_name.append(i)
    df_count.append(model.wv.get_vecattr(i, "count"))
    
df = pd.DataFrame({"name":df_name, "count":df_count})
df.head()

Unnamed: 0,name,count
0,dil,962
1,doğal,822
2,işle,733
3,yapay,304
4,zeka,269


In [29]:
df.sort_values("count", ascending=False).head()

Unnamed: 0,name,count
0,dil,962
1,doğal,822
2,işle,733
6,iç,514
89,ben,421


In [30]:
df

Unnamed: 0,name,count
0,dil,962
1,doğal,822
2,işle,733
3,yapay,304
4,zeka,269
...,...,...
10692,hatırlamak,1
10693,istemedikçe,1
10694,fiyasko,1
10695,çıkarıp,1


In [31]:
model.wv.most_similar(["doğal"])

[('dil', 0.9989959001541138),
 ('işle', 0.9987336993217468),
 ('iç', 0.9987024068832397),
 ('yapay', 0.9986445903778076),
 ('olarak', 0.9983895421028137),
 ('il', 0.998386800289154),
 ('zeka', 0.9983227252960205),
 ('dah', 0.998312771320343),
 ('gip', 0.9980441927909851),
 ('iş', 0.9980183839797974)]

In [32]:
model.wv.most_similar(["dil"])

[('iç', 0.999093770980835),
 ('doğal', 0.9989960789680481),
 ('dah', 0.99885094165802),
 ('yapay', 0.9987738728523254),
 ('işle', 0.9987685084342957),
 ('olarak', 0.998701274394989),
 ('il', 0.9986147880554199),
 ('zeka', 0.9984176754951477),
 ('türkçe', 0.9983814358711243),
 ('var', 0.9982227683067322)]

In [33]:
model.wv.most_similar(["işle"])

[('iç', 0.9988319277763367),
 ('dil', 0.9987685680389404),
 ('doğal', 0.9987338781356812),
 ('yapay', 0.9985160827636719),
 ('olarak', 0.9985044598579407),
 ('zeka', 0.998446524143219),
 ('il', 0.9984024167060852),
 ('dah', 0.9982218742370605),
 ('alan', 0.9980417490005493),
 ('ola', 0.9978750944137573)]

In [34]:
model.wv.similarity("doğal", "dil")

0.998996

In [35]:
import numpy as np

In [36]:
arr = np.empty((0,100), dtype='f')
arr

array([], shape=(0, 100), dtype=float32)

In [37]:
word = "işle"
word_labels = [word]
word_labels

['işle']

In [38]:
close_words = model.wv.most_similar([word])
close_words

[('iç', 0.9988319277763367),
 ('dil', 0.9987685680389404),
 ('doğal', 0.9987338781356812),
 ('yapay', 0.9985160827636719),
 ('olarak', 0.9985044598579407),
 ('zeka', 0.998446524143219),
 ('il', 0.9984024167060852),
 ('dah', 0.9982218742370605),
 ('alan', 0.9980417490005493),
 ('ola', 0.9978750944137573)]

In [39]:
arr = np.append(arr, np.array([model.wv.get_vector(word)]),axis=0)
arr

array([[-7.69467056e-02,  1.82619497e-01,  5.95802367e-02,
        -6.93961680e-02,  7.51917483e-03, -2.98412532e-01,
         6.67512566e-02,  4.29895937e-01, -2.08192408e-01,
        -9.58838612e-02, -5.41213043e-02, -3.77751470e-01,
         1.59371812e-02,  1.57314554e-01,  3.87071603e-04,
        -1.65612891e-01,  4.79662828e-02, -2.46980652e-01,
        -5.09812087e-02, -4.69783098e-01,  2.71651316e-02,
        -1.42113958e-02,  1.74123466e-01, -1.02280967e-01,
        -3.83242667e-02, -1.81046762e-02, -1.07267536e-01,
        -5.83755597e-02, -1.91316783e-01,  8.51792097e-02,
         2.55325437e-01, -1.49619021e-02,  6.11409433e-02,
        -2.02835903e-01, -5.45914620e-02,  2.24078491e-01,
         9.74430069e-02, -2.23751947e-01, -1.20208301e-01,
        -3.44310999e-01,  3.42043079e-02, -2.10584551e-01,
        -7.22560212e-02, -1.14270132e-02,  1.65857643e-01,
        -1.19816154e-01, -2.28528604e-01, -2.70629767e-02,
         8.83955657e-02,  1.25442863e-01,  5.18791676e-0

In [40]:
for word_score in close_words:
    wrd_vector = model.wv.get_vector(word_score[0])
    word_labels.append(word_score[0])
    arr = np.append(arr, np.array([wrd_vector]),axis=0)

In [41]:
arr

array([[-0.07694671,  0.1826195 ,  0.05958024, ..., -0.19176115,
         0.08052529, -0.02790773],
       [-0.06683029,  0.17434488,  0.06577127, ..., -0.19955452,
         0.07069445, -0.01611632],
       [-0.10654556,  0.25341523,  0.10013319, ..., -0.2813356 ,
         0.10476237, -0.02459876],
       ...,
       [-0.05669197,  0.15023863,  0.06453616, ..., -0.16859646,
         0.06482721, -0.02634447],
       [-0.05582891,  0.11655576,  0.04441177, ..., -0.10806572,
         0.0495034 , -0.01780752],
       [-0.05490715,  0.10954124,  0.0335113 , ..., -0.11841559,
         0.05140685, -0.01121555]], dtype=float32)

In [42]:
len(arr)

11

In [43]:
from sklearn.manifold import TSNE 

In [48]:
tsne = TSNE(n_components=2, random_state=42)
np.printoptions(suppress=True)
Y = tsne.fit_transform(arr)

ValueError: perplexity must be less than n_samples

In [45]:
Y

NameError: name 'Y' is not defined

In [49]:
x_cords = Y[:0]
x_cords

NameError: name 'Y' is not defined

In [50]:
y_cords = Y[:1]
y_cords

NameError: name 'Y' is not defined

In [51]:
word_labels

['işle',
 'iç',
 'dil',
 'doğal',
 'yapay',
 'olarak',
 'zeka',
 'il',
 'dah',
 'alan',
 'ola']

In [52]:
!pip install plotly



In [53]:
import plotly.express as px 

In [54]:
fig=px.scatter(x=x_cords.tolist(), y=y_cords.tolist(), text=word_labels, size_max=100)
fig_update_layout(height=800, title_text="Word2Vec Most Similarity")
fig.show()

NameError: name 'x_cords' is not defined

In [55]:
!pip install wordcloud 

Collecting wordcloud
  Downloading wordcloud-1.9.2-cp310-cp310-win_amd64.whl (152 kB)
     ------------------------------------ 152.1/152.1 kB 647.7 kB/s eta 0:00:00
Installing collected packages: wordcloud
Successfully installed wordcloud-1.9.2


In [58]:
import matplotlip.pyplot as plt
%matplotlip inline 
from wordcloud import WordCloud

ModuleNotFoundError: No module named 'matplotlip'

In [57]:
len(words)

10697

In [59]:
sentence = []

for i in words:
    sentence.append(i)
    
text = " ".join(map(str,sentence))

wordcloud = WordCloud(width=3000, height=3000, max_font_size=300, background_color="white", colormap="twilight_shifted").generate(text)

plt.figure(figsize=(20,17))
pl.imshow(wordcloud,interpolation=None)
plt.axis("off")
plt.show()

NameError: name 'WordCloud' is not defined