In [None]:
from src.rds import db
with db.get_cursor() as cur:
    cur.execute("""SELECT question, pdf_document_name FROM "QUESTION_BANK" """)
    questions = cur.fetchall()

In [None]:
for q in questions:
    print(q[0])
            

In [None]:
from rag_swr import swr_pipeline
import os


In [None]:
swr_pipeline(q[0])

In [None]:
os.env

In [None]:
from litellm import completion
import os

## set ENV variables

response = completion(
  model="vertex_ai/gemini-pro", 
  messages=[{ "content": "Hello, how are you?","role": "user"}]
)

In [None]:
response

In [None]:
import os, litellm 

# set via env var


### OR ###

# set directly on module 
litellm.vertex_project = "arched-forest-409209" # Your Project ID`
litellm.vertex_location = "us-central1"

In [None]:
response = completion(
  model="vertex_ai/<your-endpoint-id>", 
  messages=[{ "content": "Hello, how are you?","role": "user"}]
)

In [None]:
# %load src/generator.py
from vertexai.preview.generative_models import GenerativeModel, GenerationConfig
import google.generativeai as genai
import os
import vertexai

RAG_PROMPT = """
You are an expert in the field of finance and legal reasoning. 
Context information from multiple sources is below.
---------------------
{}
---------------------
Given the information from multiple sources and not prior knowledge, answer the query.\
If you are unsure, just say there is not enough information to answer the question.
Query: {}
"""

QUERY_EXPANSION_PROMPT = """
You are an expert in the field of finance and legal reasoning for listed companies in Singapore \
(which may include multinational companies). Based on the following query, please provide a \
a more comprehensive query that can be used for semantic search.
Original Query: {}
"""

QUERY_ENTITY_PROMPT = """
You are an expert in the field of finance and legal reasoning for listed companies in Singapore \
(which may include multinational companies). Based on the following query, please provide a \
list of entities that can be used for filtering documents. Look out for entities such as \
company names, people, locations, dates, events, and financial keywords.
Query: {}
"""

QUERY_DECOMPOSITION_PROMPT = """
You are an expert in the field of finance and legal reasoning for listed companies in Singapore \
(which may include multinational companies). Based on the following query, please break it down to 3 to 5 \
smaller and more specific queries.

---------------------
For example, "What was the financial performance of company X in 2020?" can be broken down into:
1. What was the revenue of Company X in 2020?
2. What was the net income of Company X in 2020?
3. What the debt of Company X in 2020?
---------------------

Query: {}

"""


class Generator:
    def __init__(self, use_google_api=True):
        
        vertexai.init(project='1043867876565')

        if use_google_api:
            genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))
        self.generation_config = GenerationConfig(
            temperature=0.2, max_output_tokens=2048, top_p=0.8, top_k=40
        )

        self.gemini = GenerativeModel(
            "gemini-pro", generation_config=self.generation_config
        )
        self.palm_bison = GenerativeModel(
            "chat-bison", generation_config=self.generation_config
        )

    def response_synthesis(
        self, retrieved_text: list, query: str, use_gemini=True
    ) -> str:
        if use_gemini:
            out = self._gemini_generation(retrieved_text, query)
        else:
            out = self._palm_generation(retrieved_text, query)

        return out[0]

    def _gemini_generation(self, retrieved_text: list, query) -> str:
        context_str = ""
        for i in range(len(retrieved_text)):
            chunk = str(i + 1) + " " + retrieved_text[i]
            context_str += chunk

        prompt = RAG_PROMPT.format(retrieved_text, query)

        response = self.gemini.generate_content(prompt, stream=True)
        out = []
        for r in response:
            out.append(r)
            print(r.text, end="")
        return " ".join(out)

    def _palm_generation(self, retrieved_text: list, query) -> str:
        context_str = ""
        for i in range(len(retrieved_text)):
            chunk = str(i + 1) + " " + retrieved_text[i]
            context_str += chunk

        prompt = RAG_PROMPT.format(retrieved_text, query)

        response = self.palm_bison.generate_content(prompt, stream=True)
        out = []

        for r in response:
            out.append(r)
            print(r.text, end="")
        return " ".join(out)

    # def query_expansion(self, query: str) -> str:
    #     prompt = QUERY_EXPANSION_PROMPT.format(query)
    #     response = self.llm.generate_content(prompt)
    #     return response[0].text


In [None]:
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'arched-forest-409209-854f83e41ed5.json'
g = Generator()

In [None]:
class PipelineOutput:



    def __init__(self

In [3]:
pipeline

<function rag_swr.swr_pipeline(query: str)>

In [2]:
# %load pipeline_eval.py
from rag_swr import swr_pipeline
from rag_amr import amr_pipeline
from src.eval.rag_eval import RAGeval
from src.rds import db
import argparse 
import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = 'arched-forest-409209-854f83e41ed5.json'

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("--retrieval", type=str, required=True)
    return parser.parse_args()




evaluation = RAGeval()

with db.get_cursor() as cur:
    cur.execute("""SELECT question, pdf_document_name FROM "QUESTION_BANK" """)
    questions = cur.fetchall()
   
    pipeline = swr_pipeline
    eval_table = "chunked_128_sentence_window"
    eval_dict = {
            "groundness_score": 0,
            "answer_relevance_score": 0,
            "context_relevance_score": 0,
        }
    
    for i,(query, pdf_name) in enumerate(questions):
        print("Question: "+ query)
        print("-" * 100)
        response, retrieved = pipeline(query)
        retrieved_text = ''.join([retrieved[i][0][2] for i in range(len(retrieved))])
        # docs = []
        # for r in retrieved:
        #     print(r[0][0])
        #     cur.execute("""SELECT pdf_document_name FROM chunked_128_sentence_window WHERE chunk_id = %s""", (str(r[0][0]),))
        #     doc = cur.fetchall()
        #     docs.extend(doc)
        
        print("Evaluation: ")
        eval_object = {'question':q[0],
               'context':retrieved_text,
               'generated':response }

        out = evaluation.assess_single_retrieval(**eval_object)
        eval_dict['groundness_score'] += out['groundness_score']
        eval_dict['answer_relevance_score']+= out['answer_relevance_score']
        eval_dict['context_relevance_score']+= out['context_relevance_score']

        print('context_relevance_score',eval_dict['context_relevance_score']/(i+1))
        print('answer_relevance_score',eval_dict['answer_relevance_score']/(i+1))
        print('groundness_score',eval_dict['groundness_score']/(i+1))
        
        

                


        



Package litellm is installed but has a version conflict:
	(litellm 1.28.4 (/home/capstoners/DuRAG/.venv/lib/python3.11/site-packages), Requirement.parse('litellm<=1.24.0,>=1.11.1'))

This package is optional for trulens_eval so this may not be a problem but if
you need to use the related optional features and find there are errors, you
will need to resolve the conflict:

    ```bash
    pip install 'litellm<=1.24.0,>=1.11.1'
    ```

If you are running trulens_eval in a notebook, you may need to restart the
kernel after resolving the conflict. If your distribution is in a bad place
beyond this package, you may need to reinstall trulens_eval so that all of the
dependencies get installed and hopefully corrected:
    
    ```bash
    pip uninstall -y trulens_eval
    pip install trulens_eval
    ```

/home/capstoners/DuRAG/.venv/lib/python3.11/site-packages/pydantic/_internal/_config.py:272: PydanticDeprecatedSince20: Support for class-based `config` is deprecated, use ConfigDict instead.

✅ In Groundedness, input source will be set to __record__.main_input or `Select.RecordInput` .
✅ In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Context Relevance, input question will be set to __record__.main_input or `Select.RecordInput` .
✅ In Context Relevance, input statement will be set to __record__.main_output or `Select.RecordOutput` .
✅ In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
✅ In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
Question: How many people are there on the LREITs board of directors 2023? 
----------------------------------------------------------------------------------------------------
Sentence Window response: 


----------------------------------------------------------------------------------------------------
((_WeaviateUUIDInt('51d532a1-81ec-5476-9b88-ca29131d87d5'), 'Represent this sentence for 

  return compile(source, filename, mode, flags,

KeyboardInterrupt



In [None]:
eval_dict = {
            "groundness_score": 0,
            "answer_relevance_score": 0,
            "context_relevance_score": 0,
        }

In [None]:
BGE_QUERY_PREFIX = "Represent this sentence for searching relevant passages: "
client = weaviate.connect_to_local()
reranker = Reranker()
swr_engine = SentenceWindowRetriever(client)
query = (
    BGE_QUERY_PREFIX
    + query
)
retrieval_response = swr_engine.hybrid_search(query, filters=None, limit=10)
# print("*" * 100)
# print("Retrieval response: \n\n")
# print(swr_engine.chunk_text_joiner_response(retrieval_response.objects))
# print("*" * 100)
# retrieval_response = swr_engine.full_text_search(query, filters=None, limit=10)
# retrieval_response = swr_engine.semantic_search(query, filters=None, limit=10)
sentence_windows = swr_engine.get_sentence_windows(retrieval_response.objects)

In [13]:
response, retrieved = pipeline(q[0])

Sentence Window response: 


----------------------------------------------------------------------------------------------------
((_WeaviateUUIDInt('51d532a1-81ec-5476-9b88-ca29131d87d5'), 'Represent this sentence for searching relevant passages: How many people are there on the LREITs board of directors 2023? ', 'Non-Executive   Director; and   Ms Sharon Lim, Executive   Director and CEO.\n•\n•\n•\n•\n•\n•\n•\n•\n•\n•\n•\n•\nPost-Merger, the Board was reconstituted to comprise certain members from the Manager (formerly known as Mapletree Commercial Trust Management Ltd.) and Mapletree North Asia Commercial Trust Management Ltd. to provide assurance of continuity to Unitholders and achieve an optimal balance of experience, skills and knowledge relevant to the merged entity’s business. As at 31 March 2023, the Board comprises fourteen directors (the “Directors”), of whom thirteen are Non-Executive Directors and ten are Independent Directors. The following sets out the composition of th