### Import librairies

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.metrics.pairwise import cosine_similarity
from mpl_toolkits.mplot3d import Axes3D

### Plot

In [None]:

# Step 1: Loading  Embeddings and Vocabulary
embeddings = np.load('right_path')  
with open('right_path', 'r') as f:
    words = f.read().splitlines()
    
# Step 2: Selecting a Target Word
target_word = 'yellow'  

# Step 3: Cosine Similarity
# Mapping words to their embeddings in a new dictionary
word_to_embedding = {word: embedding for word, embedding in zip(words, embeddings)}
# Getting the embedding of the target word
target_embedding = word_to_embedding[target_word]
# Cosine similarity between the target word and all other words
similarities = cosine_similarity([target_embedding], embeddings).flatten()

# Step 4: Filtering Most Similar Words
# Getting the indices of the most similar words
most_similar_indices = np.argsort(similarities)[::-1][1:11]  # Top 10 most similar words, excluding the target word itself
most_similar_words = [words[i] for i in most_similar_indices]
most_similar_embeddings = embeddings[most_similar_indices]

# Step 5: Applying t-SNE for 3D Visualization
tsne = TSNE(n_components=3, perplexity=5, random_state=42)  
embeddings_3d = tsne.fit_transform(most_similar_embeddings)

# Step 6: Plotting the Results in 3D
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, projection='3d')
sc = ax.scatter(embeddings_3d[:, 0], embeddings_3d[:, 1], embeddings_3d[:, 2], s=50)
for i, word in enumerate(most_similar_words):
    ax.text(embeddings_3d[i, 0], embeddings_3d[i, 1], embeddings_3d[i, 2], word, fontsize=10)
    

ax.set_title(f'Most Similar Words to "{target_word}" in 3D Space')
ax.set_xlabel('Component 1')
ax.set_ylabel('Component 2')
ax.set_zlabel('Component 3')

plt.show()