# 🧠 Embeddings and Similarity Scores
This notebook demonstrates how to:
- Embed short text snippets
- Compute pairwise similarity scores using cosine similarity
- Identify most semantically aligned statements

In [None]:
# 🟩 Install dependencies
!pip install -q sentence-transformers scikit-learn

In [None]:
# 🔤 Define  texts
texts = [
    "Silk black asymmetric gown with structured shoulders",
    "Minimalist white cotton dress with pleated skirt",
    "True winter red blazer with peak lapels",
    "Cobalt blue tailored pants with high waist",
    "Black leather ankle boots with silver zipper",
    "Winter red turtleneck sweater with clean lines"
]

In [None]:
# ✨ Load SentenceTransformer embedding model
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')
embeddings = model.encode(texts, convert_to_tensor=True)
embeddings.shape

In [None]:
# 📊 Compute similarity scores
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

similarity_matrix = cosine_similarity(embeddings)
df = pd.DataFrame(similarity_matrix, columns=[f"T{i+1}" for i in range(len(texts))], index=[f"T{i+1}" for i in range(len(texts))])
df.style.background_gradient(cmap='Blues')

In [None]:
# 🧭 Print ranked most similar text pairs
import numpy as np

pairs = []
for i in range(len(texts)):
    for j in range(i + 1, len(texts)):
        score = similarity_matrix[i, j]
        pairs.append(((texts[i], texts[j]), score))

sorted_pairs = sorted(pairs, key=lambda x: x[1], reverse=True)
for (a, b), score in sorted_pairs[:3]:
    print(f"Score: {score:.4f}\n↳ '{a}'\n↳ '{b}'\n")