https://www.snowflake.com/en/product/features/postgres/

In [74]:
!pip install psycopg2-binary



In [75]:
import psycopg2

conn = psycopg2.connect(
    host="",   # e.g. db.example.com
    port=,
    database="",
    user="",
    password="",
    sslmode="require"
)

cur = conn.cursor()
cur.execute("SELECT version();")
print(cur.fetchone())



('PostgreSQL 18.1 on aarch64-unknown-linux-gnu, compiled by gcc (GCC) 11.5.0 20240719 (Red Hat 11.5.0-11), 64-bit',)


In [76]:
cur.execute("CREATE EXTENSION vector;")

In [77]:
cur.execute("SELECT typname FROM pg_type WHERE typname = 'vector';")
cur.fetchone()

('vector',)

In [78]:
cur.execute("CREATE TABLE test_embeddings(product_id bigint, embeddings vector(3) );")
cur.execute("INSERT INTO test_embeddings VALUES (1, '[1, 2, 3]'), (2, '[2,-3,-4]');")


In [79]:
cur.execute("""SELECT product_id, embeddings, embeddings <=> '[3,1,2]' AS distance
FROM test_embeddings
ORDER BY embeddings <=> '[3,1,2]';""")
cur.fetchall()

[(1, '[1,2,3]', 0.2142857142857143), (2, '[2,-3,-4]', 1.2481458334927327)]

In [80]:
def cosine_distance(a,b):
  return 1-(a[0]*b[0]+a[1]*b[1]+a[2]*b[2])/(((a[0]**2+a[1]**2+a[2]**2)**0.5)*((b[0]**2+b[1]**2+b[2]**2))**0.5)

In [81]:
cosine_distance([1, 2, 3],[3,1,2])

0.2142857142857143

In [82]:
cosine_distance([2,-3,-4],[3,1,2])

1.2481458334927327

# Find the relationship between Euclidian & Cosine similarity

In [83]:
cur.execute("""SELECT product_id, embeddings, embeddings <-> '[3,1,2]' AS distance
FROM test_embeddings
ORDER BY embeddings <-> '[3,1,2]';""")
cur.fetchall()

[(1, '[1,2,3]', 2.449489742783178), (2, '[2,-3,-4]', 7.280109889280518)]

In [84]:
def euclidean_distance(a, b):
    return ((a[0] - b[0])**2 +
            (a[1] - b[1])**2 +
            (a[2] - b[2])**2) ** 0.5


In [85]:
euclidean_distance([1, 2, 3],[3,1,2])

2.449489742783178

In [86]:
euclidean_distance([2,-3,-4],[3,1,2])

7.280109889280518

In [88]:

print("Cosine Distance with denormalization")

cur.execute("""SELECT
    product_id,
    embeddings,
    (embeddings) <=> ('[3,1,2]'::vector) AS distance
FROM test_embeddings
ORDER BY distance;""")
print(cur.fetchall())
print()
print("Cosine Distance with normalization")
cur.execute("""SELECT
    product_id,
    embeddings,
    l2_normalize(embeddings) as normalized_embedding,
    l2_normalize(embeddings) <=> l2_normalize('[3,1,2]'::vector) AS distance
FROM test_embeddings
ORDER BY distance;""")
print(cur.fetchall())


print('*'*100)


print()

print("Euclidian Distance with denormalization")
cur.execute("""SELECT
    product_id,
    embeddings,
    (embeddings) <-> ('[3,1,2]'::vector) AS distance
FROM test_embeddings
ORDER BY distance;""")
print(cur.fetchall())
print()
print("Euclidian Distance with normalization")
cur.execute("""SELECT
    product_id,
    embeddings,
    l2_normalize(embeddings) as normalized_embedding,
    l2_normalize(embeddings) <-> l2_normalize('[3,1,2]'::vector) AS distance,
    POWER(
        l2_normalize(embeddings) <-> l2_normalize('[3,1,2]'::vector),
        2
    ) / 2 AS approx_cosine_distance_value
FROM test_embeddings
ORDER BY distance;
;""")
print(cur.fetchall())
print('*'*100)

Cosine Distance with denormalization
[(1, '[1,2,3]', 0.2142857142857143), (2, '[2,-3,-4]', 1.2481458334927327)]

Cosine Distance with normalization
[(1, '[1,2,3]', '[0.26726124,0.5345225,0.80178374]', 0.21428577814782845), (2, '[2,-3,-4]', '[0.37139067,-0.557086,-0.74278134]', 1.248145796413846)]
****************************************************************************************************

Euclidian Distance with denormalization
[(1, '[1,2,3]', 2.449489742783178), (2, '[2,-3,-4]', 7.280109889280518)]

Euclidian Distance with normalization
[(1, '[1,2,3]', '[0.26726124,0.5345225,0.80178374]', 0.6546537194834747, 0.21428574621677401), (2, '[2,-3,-4]', '[0.37139067,-0.557086,-0.74278134]', 1.5799657076723703, 1.248145818710327)]
****************************************************************************************************
