# Cosine Similarity

In [2]:
import numpy as np

word_vectors = {
    'murah':       np.array([0.9, 0.3, 0.5]),
    'murahan':     np.array([1.0, 0.2, 0.6]),
    'berkualitas': np.array([0.7, 0.6, 0.9]),
    'mahal':       np.array([1.2, 0.5, 0.8]),
    'biasa':       np.array([0.6, 0.4, 0.5]),
    'premium':     np.array([1.3, 0.6, 0.9])
}

def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)

words = list(word_vectors.keys())
n = len(words)

print(f"{'':12}", end='')
for w in words:
    print(f"{w:12}", end='')
print()

for i in range(n):
    print(f"{words[i]:12}", end='')
    for j in range(n):
        sim = cosine_similarity(word_vectors[words[i]], word_vectors[words[j]])
        print(f"{sim:.4f}      ", end='')
    print()


            murah       murahan     berkualitas mahal       biasa       premium     
murah       1.0000      0.9930      0.9119      0.9958      0.9670      0.9925      
murahan     0.9930      1.0000      0.8921      0.9855      0.9439      0.9795      
berkualitas 0.9119      0.8921      1.0000      0.9458      0.9818      0.9546      
mahal       0.9958      0.9855      0.9458      1.0000      0.9855      0.9994      
biasa       0.9670      0.9439      0.9818      0.9855      1.0000      0.9906      
premium     0.9925      0.9795      0.9546      0.9994      0.9906      1.0000      


# Euclidean distance

In [4]:
import numpy as np

word_vectors = {
    "murah":       np.array([0.9, 0.3, 0.5]),
    "murahan":     np.array([1.0, 0.2, 0.6]),
    "berkualitas": np.array([0.7, 0.6, 0.9]),
    "mahal":       np.array([1.2, 0.5, 0.8]),
    "biasa":       np.array([0.6, 0.4, 0.5]),
    "premium":     np.array([1.3, 0.6, 0.9]),
}

words = list(word_vectors.keys())

distances = {}

for i, w1 in enumerate(words):
    for j, w2 in enumerate(words):
        if j >= i:
            vec1 = word_vectors[w1]
            vec2 = word_vectors[w2]
            dist = np.linalg.norm(vec1 - vec2)
            distances[(w1, w2)] = dist

print("Euclidean Distance Matrix (upper triangle):\n")
print(f"{'':12}", end='')
for w in words:
    print(f"{w:12}", end='')
print()

for i, w1 in enumerate(words):
    print(f"{w1:12}", end='')
    for j, w2 in enumerate(words):
        if j < i:
            print(f"{'':12}", end='')
        else:
            print(f"{distances[(w1, w2)]:<12.3f}", end='')
    print()


Euclidean Distance Matrix (upper triangle):

            murah       murahan     berkualitas mahal       biasa       premium     
murah       0.000       0.173       0.539       0.469       0.316       0.640       
murahan                 0.000       0.583       0.412       0.458       0.583       
berkualitas                         0.000       0.520       0.458       0.600       
mahal                                           0.000       0.678       0.173       
biasa                                                       0.000       0.831       
premium                                                                 0.000       
