# Text Embeddings with PCA + K-means

In [None]:
!pip install sentence-transformers -q

In [None]:
from sentence_transformers import SentenceTransformer
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

model = SentenceTransformer('all-MiniLM-L6-v2')
print('Model loaded!')

In [None]:
texts = [
    'Machine learning predicts outcomes',
    'AI learns patterns from data',
    'The weather is sunny today',
    'Rain expected tomorrow',
    'Football team won the game',
    'Basketball players train hard',
    'Pizza is delicious Italian food',
    'Sushi is Japanese cuisine',
    'Paris has the Eiffel Tower',
    'Tokyo is a vibrant city'
]

embeddings = model.encode(texts)
print(f'Embeddings: {embeddings.shape}')

In [None]:
# PCA + K-means + Plot
pca = PCA(n_components=2)
X = pca.fit_transform(embeddings)

kmeans = KMeans(n_clusters=5, random_state=42)
clusters = kmeans.fit_predict(embeddings)

plt.figure(figsize=(10, 6))
plt.scatter(X[:, 0], X[:, 1], c=clusters, cmap='tab10', s=150)
for i, txt in enumerate(texts):
    plt.annotate(txt[:20], (X[i, 0], X[i, 1]), fontsize=8)
plt.title('Text Embeddings (5 clusters)')
plt.xlabel('PCA 1')
plt.ylabel('PCA 2')
plt.tight_layout()
plt.show()