In [None]:
DROP DATABASE IF EXISTS snowflake_llm_poc;
CREATE Database snowflake_llm_poc;
use snowflake_llm_poc;

In [None]:
create or replace stage snowflake_llm_poc.PUBLIC.Snow_stage_directory_table_stock_data url="s3://landinglayertest/mmragtest/" 
credentials=(aws_key_id=''
aws_secret_key='')
Directory=(ENABLE=TRUE);

In [None]:
ls @snowflake_llm_poc.PUBLIC.Snow_stage_directory_table_stock_data;

In [None]:
CREATE OR Replace table snowflake_llm_poc.PUBLIC.existing_data as 
(SELECT RELATIVE_PATH, FILE_URL,
       (AI_EMBED('voyage-multimodal-3', TO_FILE(FILE_URL))) AS image_embedding
FROM DIRECTORY(@snowflake_llm_poc.PUBLIC.Snow_stage_directory_table_stock_data))

In [None]:
select * from snowflake_llm_poc.PUBLIC.existing_data;

In [None]:
set user_question = 'What is the estimated market size of GenAI for 2032?';
SELECT Relative_path,file_url from snowflake_llm_poc.PUBLIC.existing_data
            ORDER BY VECTOR_L2_DISTANCE(
            AI_EMBED('voyage-multimodal-3', 
            $user_question
            ), image_embedding
            ) limit 3;

In [None]:
set user_question = 'What is the estimated market size of GenAI for 2032?';

-- User query that we want the system to answer
-- This question exists in the PDF and should be answered
-- ONLY using the retrieved document images
SET user_question = 'What is the estimated market size of GenAI for 2032?';

-- Step 1: RETRIEVAL
-- Rank images based on semantic similarity between:
-- 1) The user question (embedded as a multimodal vector)
-- 2) The stored image embeddings from the PDF pages
WITH ranked_images AS (

    SELECT
        -- Relative path of the image inside the external stage
        relative_path,

        -- Compute L2 (Euclidean) distance between:
        -- a) embedding of the user question
        -- b) embedding of each image
        -- Smaller distance = higher similarity
        VECTOR_L2_DISTANCE(
            AI_EMBED(
                'voyage-multimodal-3',  -- Multimodal embedding model
                $user_question          -- Natural language query
            ),
            image_embedding             -- Precomputed image embeddings
        ) AS distance

    FROM snowflake_llm_poc.PUBLIC.existing_data

    -- Order images by semantic similarity
    ORDER BY distance

    -- Keep only the Top-K most relevant images
    LIMIT 3
),

-- Step 2: AUGMENTATION
-- Convert the retrieved image paths into Snowflake FILE objects
-- These FILE objects can be passed directly to multimodal LLMs
image_files AS (

    SELECT
        ARRAY_AGG(
            TO_FILE(
                '@snowflake_llm_poc.PUBLIC.Snow_stage_directory_table_stock_data',
                relative_path
            )
        ) AS files
    FROM ranked_images
)

-- Step 3: GENERATION
-- Use a multimodal LLM to generate an answer
-- The model is explicitly instructed to rely ONLY on the retrieved images
SELECT AI_COMPLETE(
    'pixtral-large',
    PROMPT(
        'INSTRUCTIONS:
        Answer the QUERY using ONLY the CONTEXT provided below.
        Keep the answer strictly grounded in the given context.
        If the context does not contain enough information to answer the query,
        respond with: "I do not have enough context to respond to this query."

        CONTEXT:
        Document Image 1: {0}
        Document Image 2: {1}
        Document Image 3: {2}

        QUERY:
        {3}',

        -- Top-K retrieved images from similarity search
        files[0],
        files[1],
        files[2],

        -- Original user question
        $user_question
    )
) AS answer
FROM image_files;


In [None]:
-- User query that we want the system to answer
-- This question exists in the PDF and should be answered
-- ONLY using the retrieved document images
set user_question = 'Why customers choose AWS?';

-- Step 1: RETRIEVAL
-- Rank images based on semantic similarity between:
-- 1) The user question (embedded as a multimodal vector)
-- 2) The stored image embeddings from the PDF pages
WITH ranked_images AS (

    SELECT
        -- Relative path of the image inside the external stage
        relative_path,

        -- Compute L2 (Euclidean) distance between:
        -- a) embedding of the user question
        -- b) embedding of each image
        -- Smaller distance = higher similarity
        VECTOR_L2_DISTANCE(
            AI_EMBED(
                'voyage-multimodal-3',  -- Multimodal embedding model
                $user_question          -- Natural language query
            ),
            image_embedding             -- Precomputed image embeddings
        ) AS distance

    FROM snowflake_llm_poc.PUBLIC.existing_data

    -- Order images by semantic similarity
    ORDER BY distance

    -- Keep only the Top-K most relevant images
    LIMIT 3
),

-- Step 2: AUGMENTATION
-- Convert the retrieved image paths into Snowflake FILE objects
-- These FILE objects can be passed directly to multimodal LLMs
image_files AS (

    SELECT
        ARRAY_AGG(
            TO_FILE(
                '@snowflake_llm_poc.PUBLIC.Snow_stage_directory_table_stock_data',
                relative_path
            )
        ) AS files
    FROM ranked_images
)

-- Step 3: GENERATION
-- Use a multimodal LLM to generate an answer
-- The model is explicitly instructed to rely ONLY on the retrieved images
SELECT AI_COMPLETE(
    'pixtral-large',
    PROMPT(
        'INSTRUCTIONS:
        Answer the QUERY using ONLY the CONTEXT provided below.
        Keep the answer strictly grounded in the given context.
        If the context does not contain enough information to answer the query,
        respond with: "I do not have enough context to respond to this query."

        CONTEXT:
        Document Image 1: {0}
        Document Image 2: {1}
        Document Image 3: {2}

        QUERY:
        {3}',

        -- Top-K retrieved images from similarity search
        files[0],
        files[1],
        files[2],

        -- Original user question
        $user_question
    )
) AS answer
FROM image_files;


" Customers choose AWS for several reasons:\n\n1. **Experience**: AWS has 18 years of experience helping millions of customers.\n2. **Global Reach**: AWS spans 36 regions and 114 Availability Zones.\n3. **Security**: AWS offers over 300 security features.\n4. **Innovation**: AWS provides over 250 service offerings.\n5. **AWS Infrastructure**: AWS infrastructure is 3.6 times more energy efficient than the median of surveyed U.S. enterprise data centers.\n6. **Total Cost of Ownership (TCO)**: AWS has achieved 151 price reductions since 2006.\n7. **Ecosystem**: AWS ecosystem includes 12,000 software listings from 2,000 Independent Software Vendors (ISVs)."

In [None]:
-- User query that we want the system to answer
-- This question exists in the PDF and should be answered
-- ONLY using the retrieved document images
set user_question = 'How did Standard Chartered reduce its risk grid compute costs by using AWS?';

-- Step 1: RETRIEVAL
-- Rank images based on semantic similarity between:
-- 1) The user question (embedded as a multimodal vector)
-- 2) The stored image embeddings from the PDF pages
WITH ranked_images AS (

    SELECT
        -- Relative path of the image inside the external stage
        relative_path,

        -- Compute L2 (Euclidean) distance between:
        -- a) embedding of the user question
        -- b) embedding of each image
        -- Smaller distance = higher similarity
        VECTOR_L2_DISTANCE(
            AI_EMBED(
                'voyage-multimodal-3',  -- Multimodal embedding model
                $user_question          -- Natural language query
            ),
            image_embedding             -- Precomputed image embeddings
        ) AS distance

    FROM snowflake_llm_poc.PUBLIC.existing_data

    -- Order images by semantic similarity
    ORDER BY distance

    -- Keep only the Top-K most relevant images
    LIMIT 3
),

-- Step 2: AUGMENTATION
-- Convert the retrieved image paths into Snowflake FILE objects
-- These FILE objects can be passed directly to multimodal LLMs
image_files AS (

    SELECT
        ARRAY_AGG(
            TO_FILE(
                '@snowflake_llm_poc.PUBLIC.Snow_stage_directory_table_stock_data',
                relative_path
            )
        ) AS files
    FROM ranked_images
)

-- Step 3: GENERATION
-- Use a multimodal LLM to generate an answer
-- The model is explicitly instructed to rely ONLY on the retrieved images
SELECT AI_COMPLETE(
    'pixtral-large',
    PROMPT(
        'INSTRUCTIONS:
        Answer the QUERY using ONLY the CONTEXT provided below.
        Keep the answer strictly grounded in the given context.
        If the context does not contain enough information to answer the query,
        respond with: "I do not have enough context to respond to this query."

        CONTEXT:
        Document Image 1: {0}
        Document Image 2: {1}
        Document Image 3: {2}

        QUERY:
        {3}',

        -- Top-K retrieved images from similarity search
        files[0],
        files[1],
        files[2],

        -- Original user question
        $user_question
    )
) AS answer
FROM image_files;


" Standard Chartered reduced its risk grid compute costs by moving to AWS, specifically by utilizing Amazon EC2 Spot Instances. This migration allowed the bank to triple its compute capacity and reduce its compute costs by 60%."