In [None]:
# https://docs.snowflake.com/en/user-guide/snowflake-cortex/vector-embeddings

In [1]:
'''
create table vectors (
    a VECTOR(float,3), 
    b VECTOR(float, 3)
)

insert into vectors SELECT [1.1,2.2,3]::VECTOR(FLOAT,3),SELECT [1,1,1]::VECTOR(FLOAT,3);
insert into vectors SELECT [1,2.2,3]::VECTOR(FLOAT,3),SELECT [4,6,8]::VECTOR(FLOAT,3);


-- Compute the pairwise inner product between columns a and b
SELECT VECTOR_INNER_PRODUCT(a, b) FROM vectors;

SELECT VECTOR_INNER_PRODUCT(a,b) FROM vectors;

'''


### 
''''
 SELECT 
   a, VECTOR_COSINE_SIMILARITY(a, [1,2,3]::VECTOR(FLOAT,3)) AS similarity
 FROM vectors
 ORDER BY similarity DESC
 LIMIT 1;
'''

"'\n SELECT \n   a, VECTOR_COSINE_SIMILARITY(a, [1,2,3]::VECTOR(FLOAT,3)) AS similarity\n FROM vectors\n ORDER BY similarity DESC\n LIMIT 1;\n"

In [None]:
### Snowpark in PYTHOn 

from snowflake.snowpark import Session, Row
session = ... # Set up session

from snowflake.snowpark.types import VectorType, StructType, StructField
from snowflake.snowpark.functions import col, lit, vector_l2_distance

schema = StructType([StructField("vec", VectorType(int, 3))])

data = [Row([1, 2, 3]), Row([4, 5, 6]), Row([7, 8, 9])]

df = session.create_dataframe(data, schema)

df.select(
    "vec",
    vector_l2_distance(df.vec, lit([1, 2, 2]).cast(VectorType(int, 3))).as_("dist"),
).sort("dist").limit(1).show()

### **2. Create vector embeddings from text**

In [None]:
''''
-- Create embedding vectors for wiki articles (only do once)
ALTER TABLE wiki ADD COLUMN vec VECTOR(FLOAT, 768);
UPDATE wiki SET vec = SNOWFLAKE.CORTEX.EMBED_TEXT_768('snowflake-arctic-embed-m', content);

-- Embed incoming query
SET query = 'in which year was Snowflake Computing founded?';
CREATE OR REPLACE TABLE query_table (query_vec VECTOR(FLOAT, 768));
INSERT INTO query_table SELECT SNOWFLAKE.CORTEX.EMBED_TEXT_768('snowflake-arctic-embed-m', $query);

-- Do a semantic search to find the relevant wiki for the query
WITH result AS (
    SELECT
        w.content,
        $query AS query_text,
        VECTOR_COSINE_SIMILARITY(w.vec, q.query_vec) AS similarity
    FROM wiki w, query_table q
    ORDER BY similarity DESC
    LIMIT 1
)

-- Pass to large language model as context
SELECT SNOWFLAKE.CORTEX.COMPLETE('mistral-7b',
    CONCAT('Answer this question: ', query_text, ' using this text: ', content)) FROM result;
'''