In [None]:
from gensim.models import Word2Vec
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

corpus = [
    "The patient was diagnosed with diabetes and hypertension.",
    "MRI scans reveal abnormalities in the brain tissue.",
    "The treatment involves antibiotics and regular monitoring.",
    "Symptoms include fever, fatigue and muscle pain.",
    "The vaccine is effective against several viral infections.",
    "Doctors recommend physical therapy for recovery.",
    "The clinical trial results were published in the journal.",
    "The surgeon performed a minimally invasive procedure.",
    "The prescription includes pain relievers and anti-inflammatory drugs.",
    "The diagnosis confirmed a rare genetic disorder."
]

data = [sentence.lower().split() for sentence in corpus]
model = Word2Vec(data, vector_size=100, window=5, min_count=1, epochs=50)

words = model.wv.index_to_key
coords = TSNE(n_components=2, random_state=42, perplexity=5, n_iter=300).fit_transform(model.wv[words])

plt.figure(figsize=(10, 8))
plt.scatter(coords[:, 0], coords[:, 1], color='blue')
for x, y, w in zip(coords[:, 0], coords[:, 1], words):
    plt.text(x, y + 0.02, w, fontsize=12)
plt.title("Word Embeddings Visualization (Medical Domain)")
plt.xlabel("Dimension 1")
plt.ylabel("Dimension 2")
plt.grid(True)
plt.show()

for target in ["treatment", "vaccine"]:
    print(f"Words similar to '{target}':")
    for w, score in model.wv.most_similar(target, topn=5):
        print(f"{w} ({score:.2f})")
