# Module 2: Build a RAG with Cortex Search

## Create the database, tables and warehouse

In [None]:
CREATE DATABASE IF NOT EXISTS cortex_search_tutorial_db;

CREATE OR REPLACE WAREHOUSE cortex_search_tutorial_wh WITH
     WAREHOUSE_SIZE='X-SMALL'
     AUTO_SUSPEND = 120
     AUTO_RESUME = TRUE
     INITIALLY_SUSPENDED=TRUE;

 USE WAREHOUSE cortex_search_tutorial_wh;

Note:

The CREATE DATABASE statement creates a database. The database automatically includes a schema named PUBLIC.

The CREATE WAREHOUSE statement creates an initially suspended warehouse.

## Get PDF data

You will use a sample dataset of the Federal Open Market Committee (FOMC) meeting minutes for this example. This is a sample of twelve 10-page documents with meeting notes from FOMC meetings from 2023 and 2024. Download the files directly from your browser by following this link:

[FOMC minutes sample](https://drive.google.com/file/d/1C6TdVjy6d-GnasGO6ZrIEVJQRcedDQxG/view)

The complete set of FOMC minutes can be found at the [US Federal Reserve’s website](https://www.federalreserve.gov/monetarypolicy/fomccalendars.htm).

Note: In a non-classroom setting, you would bring your own data, possibly already in a Snowflake stage.

## Load data into Snowflake stage

In [None]:
CREATE OR REPLACE STAGE cortex_search_tutorial_db.public.fomc
    DIRECTORY = (ENABLE = TRUE)
    ENCRYPTION = (TYPE = 'SNOWFLAKE_SSE');

Now upload the dataset. You can upload the dataset in Snowsight or using SQL. To upload in Snowsight:

1. Sign in to Snowsight.

2. Select Data in the left-side navigation menu.

3. Select your database cortex_search_tutorial_db.

4. Select your schema public.

5. Select Stages and select fomc.

6. On the top right, Select the + Files button.

7. Drag and drop files into the UI or select Browse to choose a file from the dialog window.

8. Select Upload to upload your file.

## Verify the PDF Files are uploaded to stage

In [None]:
from snowflake.snowpark.context import get_active_session
session = get_active_session()

pdf_rows = session.sql("LIST @cortex_search_tutorial_db.public.fomc PATTERN='.*\\.pdf'").collect()

# Extract just the base file names (no directory prefixes)
pdf_files = []
for r in pdf_rows:
    # If the filename contains a '/', split and take the last part
    # Otherwise, just use the filename as is
    file_name = r.name.split('/')[-1]
    pdf_files.append((file_name,))

# Step 2: Convert the Python list into a Snowpark DataFrame
pdf_df = session.createDataFrame(pdf_files, schema=["file_name"])

# Optionally, create a temporary view so you can write SQL against it
pdf_df.create_or_replace_temp_view("PDF_FILES")

In [None]:
WITH PDF_FILES_CTE AS (
    SELECT file_name AS name 
    FROM PDF_FILES
)
SELECT * FROM PDF_FILES_CTE;

## Parse PDF Files

In [None]:
CREATE TABLE IF NOT EXISTS CORTEX_SEARCH_TUTORIAL_DB.PUBLIC.CHUNKS_PARSE_DOC (
   relative_path VARCHAR
    , scoped_file_url VARCHAR
    , document VARIANT
    );

In [None]:
CREATE OR REPLACE TEMP VIEW PARSED_FOMC_DOCS AS 
SELECT file_name, 
    SNOWFLAKE.CORTEX.PARSE_DOCUMENT(
                '@cortex_search_tutorial_db.public.fomc', 
                file_name, 
                OBJECT_CONSTRUCT('mode', 'ocr')
        ) AS content
    FROM PDF_FILES

In [None]:
SELECT * FROM PARSED_FOMC_DOCS LIMIT 2

## Chunk text

First we need to transform the data to extract the content from the parsed JSON

In [None]:
import json

df = session.table("PARSED_FOMC_DOCS").to_pandas()

df['CONTENT'] = df['CONTENT'].apply(json.loads)  # converts JSON string to dict
df['CONTENT'] = df['CONTENT'].apply(lambda x: x["content"])

In [None]:
session.write_pandas(df, "PARSED_FOMC_CONTENT", database = "CORTEX_SEARCH_TUTORIAL_DB", schema = "PUBLIC", auto_create_table=True)

In [None]:
SELECT * FROM CORTEX_SEARCH_TUTORIAL_DB.PUBLIC.PARSED_FOMC_CONTENT

In [None]:
CREATE OR REPLACE TABLE CORTEX_SEARCH_TUTORIAL_DB.PUBLIC.CHUNKED_FOMC_DOCS (
    file_name VARCHAR,
    CHUNK VARCHAR
);

INSERT INTO CORTEX_SEARCH_TUTORIAL_DB.PUBLIC.CHUNKED_FOMC_DOCS (file_name, CHUNK)
SELECT
    file_name,
    c.value AS CHUNK
FROM
    CORTEX_SEARCH_TUTORIAL_DB.PUBLIC.PARSED_FOMC_CONTENT,
    LATERAL FLATTEN( input => SNOWFLAKE.CORTEX.SPLIT_TEXT_RECURSIVE_CHARACTER (
        content,
        'none',
        1800,
        250
    )) c;

In [None]:
SELECT * FROM CORTEX_SEARCH_TUTORIAL_DB.PUBLIC.CHUNKED_FOMC_DOCS

## Create Search Service

In [None]:
CREATE OR REPLACE CORTEX SEARCH SERVICE CORTEX_SEARCH_TUTORIAL_DB.PUBLIC.FOMC_SEACH_SERVICE
    ON chunk
    WAREHOUSE = cortex_search_tutorial_wh
    TARGET_LAG = '1 hour'
    EMBEDDING_MODEL = 'snowflake-arctic-embed-l-v2.0'
    AS (
    SELECT
        file_name,
        chunk
    FROM CORTEX_SEARCH_TUTORIAL_DB.PUBLIC.CHUNKED_FOMC_DOCS
    );

## Use the Search Service

In [None]:
import os
from snowflake.core import Root
from typing import List
from snowflake.snowpark.session import Session

class CortexSearchRetriever:

    def __init__(self, snowpark_session: Session, limit_to_retrieve: int = 4):
        self._snowpark_session = snowpark_session
        self._limit_to_retrieve = limit_to_retrieve

    def retrieve(self, query: str) -> List[str]:
        root = Root(session)

        search_service = (root
          .databases["CORTEX_SEARCH_TUTORIAL_DB"]
          .schemas["PUBLIC"]
          .cortex_search_services["FOMC_SEACH_SERVICE"]
        )
        resp = search_service.search(
          query="gdp growth",
          columns=["chunk"],
          limit=self._limit_to_retrieve
        )

        if resp.results:
            return [curr["chunk"] for curr in resp.results]
        else:
            return []

In [None]:
retriever = CortexSearchRetriever(snowpark_session=session, limit_to_retrieve=4)

retrieved_context = retriever.retrieve(query="how was inflation expected to evolve in 2024?")

retrieved_context

## Create a RAG

Now that we have setup Cortex Search to be our retriever, we can add Cortex Complete for generation to build our RAG.

We'll also add TruLens instrumentation with the @instrument decorator to our app.

The first thing we need to do however, is to set the database connection to Snowflake where we'll log the traces and evaluation results from our application. This way we have a stored record that we can use to understand the app's performance.

In [None]:
from trulens.core import TruSession
from trulens.connectors.snowflake import SnowflakeConnector

tru_snowflake_connector = SnowflakeConnector(snowpark_session=session, init_server_side=True)

tru_session = TruSession(connector=tru_snowflake_connector)

In [None]:
from snowflake.cortex import Complete
from trulens.apps.custom import instrument

class RAG:

    def __init__(self):
        self.retriever = CortexSearchRetriever(snowpark_session=session, limit_to_retrieve=4)

    @instrument
    def retrieve_context(self, query: str) -> list:
        """
        Retrieve relevant text from vector store.
        """
        return self.retriever.retrieve(query)

    @instrument
    def generate_completion(self, query: str, context_str: list) -> str:
        """
        Generate answer from context.
        """
        prompt = f"""
          You are an expert assistant extracting information from context provided.
          Answer the question based on the context. Be concise and do not hallucinate.
          If you don´t have the information just say so.
          Context: {context_str}
          Question:
          {query}
          Answer:
        """
        return Complete("mistral-large2", prompt)

    def query(self, query: str) -> str:
        context_str = self.retrieve_context(query)
        return self.generate_completion(query, context_str)


rag = RAG()

## Query the RAG

In [None]:
rag.query("how was inflation expected to evolve in 2024?")

## Create Evaluations

In [None]:
from trulens.providers.cortex.provider import Cortex
from trulens.core.feedback.feedback import SnowflakeFeedback
from trulens.core import Select
import numpy as np

provider = Cortex(session, "mistral-large2")

f_groundedness = (
    SnowflakeFeedback(provider.groundedness_measure_with_cot_reasons, name="Groundedness")
    .on(Select.RecordCalls.retrieve_context.rets[:].collect())
    .on_output()
)

f_context_relevance = (
    SnowflakeFeedback(provider.context_relevance, name="Context Relevance")
    .on_input()
    .on(Select.RecordCalls.retrieve_context.rets[:])
    .aggregate(np.mean)
)

f_answer_relevance = (
    SnowflakeFeedback(provider.relevance, name="Answer Relevance")
    .on_input()
    .on_output()
    .aggregate(np.mean)
)

In [None]:
from trulens.apps.custom import TruCustomApp
from trulens.core.schema.app import RecordIngestMode

tru_rag = TruCustomApp(
    rag,
    app_name="FOMC RAG",
    app_version="simple",
    feedbacks=[f_groundedness, f_answer_relevance, f_context_relevance],
    record_ingest_mode=RecordIngestMode.BUFFERED,
)

In [None]:
with tru_rag as recording:
    response = rag.query("how was inflation expected to evolve in 2024?")
    print(response)
    response = rag.query("how do housing services contribute to inflation?")
    print(response)

In [None]:
tru_session.get_leaderboard()