## Install Required Packages

In [3]:
!pip install groq yfinance agno
!pip install groq duckduckgo-search newspaper4k lxml_html_clean agno
!pip install -U sqlalchemy 'psycopg[binary]' pgvector pypdf agno
!pip install udocker
!pip install sentence-transformers

Collecting groq
  Downloading groq-0.20.0-py3-none-any.whl.metadata (15 kB)
Collecting agno
  Downloading agno-1.2.5-py3-none-any.whl.metadata (40 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/40.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.7/40.7 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
Collecting pydantic-settings (from agno)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting python-dotenv (from agno)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting python-multipart (from agno)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting tomli (from agno)
  Downloading tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Downloading groq-0.20.0-py3-none-any.whl (124 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m124.9/124.9 kB[0m [31m8.2 MB/s[0m eta

In [None]:
!python --version

Python 3.11.11


## Set Environment Variables

In [4]:
import os
from google.colab import userdata

os.environ['GROQ_API_KEY'] = userdata.get('GROQ_API_KEY')
print("API keys have been set!")


API keys have been set!


##Agent 1:
##Functional Tool Calling Capability: Web Search

In [5]:
from textwrap import dedent
from agno.agent import Agent
from agno.models.groq import Groq
from agno.tools.duckduckgo import DuckDuckGoTools
from agno.tools.newspaper4k import Newspaper4kTools

In [7]:
import yfinance as yf

# Define the stock ticker (Example: Apple)
ticker = "INFY"

# Fetch data
company = yf.Ticker(ticker)

# Balance Sheet
balance_sheet = company.balance_sheet

# Income Statement
income_statement = company.financials

# Cash Flow Statement
cash_flow = company.cashflow

# Print results
print("Balance Sheet:\n", balance_sheet)

#conver in csv for 2024-03-31
balance_sheet.to_csv('balance_sheet.csv')
income_statement.to_csv('income_statement.csv')
cash_flow.to_csv('cash_flow.csv')



Balance Sheet:
                                                     2024-03-31    2023-03-31  \
Treasury Shares Number                              10916829.0    12172119.0   
Ordinary Shares Number                            4139950635.0  4136387925.0   
Share Issued                                      4150867464.0  4148560044.0   
Total Debt                                        1002000000.0  1010000000.0   
Tangible Book Value                               9517000000.0  8077000000.0   
...                                                        ...           ...   
Cash Cash Equivalents And Short Term Investments  3433000000.0  2359000000.0   
Other Short Term Investments                      1660000000.0   878000000.0   
Cash And Cash Equivalents                         1773000000.0  1481000000.0   
Cash Equivalents                                           0.0   261000000.0   
Cash Financial                                    1773000000.0  1220000000.0   

                       

In [8]:
import pandas as pd
from textwrap import dedent

# Initialize the research agent with advanced journalistic capabilities
research_agent = Agent(
    model=Groq(id="llama3-70b-8192"),
    tools=[DuckDuckGoTools()],  # Using DuckDuckGo for web research
    description=dedent("""\
        You are an elite financial research analyst with deep expertise in investigative analysis,
        data-driven reporting, and synthesizing CSV-based financial data with up-to-date web research.
        Your mission is to uncover the true financial narrative of a company by comparing its FY24 balance
        sheet data with its public narrative.
    """),
    instructions=dedent("""\
        1. Data Integration:
           - Use the provided CSV data which represents FY24 values.
           - The metrics, in order, are:
             Treasury Shares, Ordinary Shares, Total Debt, Tangible Book Value, Working Capital,
             Retained Earnings, Goodwill, Net PPE, Accounts Receivable, Cash and Cash Equivalents.

        2. Web Research:
           - Search for the latest authoritative sources discussing these metrics and the company’s overall financial performance.
           - Cross-reference the FY24 data with the company’s public narrative as reported in recent publications.

        3. Analysis & Storytelling:
           - Craft a comprehensive financial report that includes:
             * A compelling headline.
             * An executive summary outlining key insights.
             * Detailed technical analysis of each metric using the FY24 data.
             * A narrative comparing the technical data with the company’s public communications.
             * Identification of any red flags or discrepancies.
             * A future outlook with recommendations.

        4. Quality Control:
           - Ensure clarity, objectivity, and balance.
           - Present the analysis in a story format that is accessible to both technical and lay audiences.
    """),
    expected_output=dedent("""\
        # {Compelling Headline}

        ## Executive Summary
        {A concise overview of the key financial insights and overall narrative.}

        ## Detailed Analysis
        - **Treasury Shares:** [FY24 data analysis and narrative insights]
        - **Ordinary Shares:** [FY24 data analysis and narrative insights]
        - **Total Debt:** [FY24 data analysis and narrative insights]
        - **Tangible Book Value:** [FY24 data analysis and narrative insights]
        - **Working Capital:** [FY24 data analysis and narrative insights]
        - **Retained Earnings:** [FY24 data analysis and narrative insights]
        - **Goodwill:** [FY24 data analysis and narrative insights]
        - **Net PPE:** [FY24 data analysis and narrative insights]
        - **Accounts Receivable:** [FY24 data analysis and narrative insights]
        - **Cash and Cash Equivalents:** [FY24 data analysis and narrative insights]

        ## Narrative Comparison
        {A detailed comparison between the company’s public narrative and its FY24 financial data. Highlight any congruencies or discrepancies.}

        ## Red Flags & Recommendations
        {Identification of any financial red flags and recommendations for further action or caution.}

        ## Future Outlook
        {Expert predictions and future trends based on current data and market insights.}

        ---
        Research conducted by Financial Agent
        Credit Rating Style Report
        Published: {current_date}
        Last Updated: {current_time}
    """),
    markdown=True,
    show_tool_calls=True,
    add_datetime_to_instructions=True,
)

# Read the balance sheet CSV file.
# We assume that the CSV file contains only one column of FY24 data (no header)
fy24_values = pd.read_csv('balance_sheet.csv', header=None)[0].tolist()

# Define the specific metrics in the same order as they appear in the CSV.
desired_metrics = [
    'Treasury Shares', 'Ordinary Shares', 'Total Debt', 'Tangible Book Value',
    'Working Capital', 'Retained Earnings', 'Goodwill', 'Net PPE',
    'Accounts Receivable', 'Cash and Cash Equivalents'
]

# Map each metric to its corresponding FY24 value from the CSV.
metrics_context = "FY24 Data: " + ", ".join(
    f"{metric}: {value}" for metric, value in zip(desired_metrics, fy24_values)
)

# Combine the research topic with the CSV-derived context.
prompt = dedent(f"""
    Conduct a forensic financial investigation of the infosys company using FY24 balance sheet data as the foundation,
    augmented by cutting-edge web intelligence. Approach this as a financial detective building a
    evidence-backed narrative.

    **Core Directive:**
    Uncover the hidden financial truth by executing:

    1. **Metric Deep Dives** (For Each):
    - Contextualize using 3-5 industry benchmarks from authoritative sources (S&P, Bloomberg, Fed Economic Data)
    - Calculate key ratios: Debt/Equity, Current Ratio, ROE, Cash Conversion Cycle
    - Compare against management's earnings call statements from past 12 months
    - Highlight percentage changes vs FY23 (estimate if unavailable)

    2. **Narrative Cross-Examination:**
    - Extract 5 key claims from the CEO's latest shareholder letter
    - Validate each claim against FY24 data points using SEC filing cross-references
    - Identify semantic gaps using linguistic analysis (e.g., "strong liquidity position" vs actual Quick Ratio)

    3. **Advanced Signal Detection:**
    - Compute Altman Z-Score for bankruptcy risk
    - Calculate Beneish M-Score for earnings manipulation probability
    - Analyze working capital trends through the lens of the cash conversion cycle

    4. **Investigative Angles:**
    - Compare accounts receivable days vs industry payment norm standards
    - Assess goodwill impairment risks using SIC code sector performance data
    - Reverse-engineer probable stock buyback timing from treasury share movements

    5. **Story Development:**
    Craft a Pulitzer-level investigative piece structured as:
    - Hook: Start with paradoxical finding (e.g., "Record Cash Reserves Mask 200% Debt Surge")
    - Act 1: The Public Narrative (extract key themes from press releases)
    - Act 2: The Data Reality (visualize discrepancies using FRED-style charts)
    - Act 3: The Why - Motive Analysis (strategic blunder vs intentional obfuscation)
    - Closing: Future Scenarios (model 3 outcomes using Monte Carlo simulations)

    **Required Output Enhancements:**
    - Insert dynamic data visualization proposals (e.g., "Interactive waterfall chart showing...")
    - Include investigative questions for follow-up (e.g., "Why did PPE increase 40% while...")
    - Add regulatory watchlist alerts where applicable (SEC, FASB, IRS red flags)
    - Propose 3 FOIA request targets for deeper investigation

    **Data Context:**
    {metrics_context}

    **Journalistic Mandate:**
    Your final piece should be publication-ready for the Financial Times' deep dive section,
    combining the numerical precision of a 10-K with the readability of Michael Lewis.
""")

# Initiate the research agent with the new prompt that includes the CSV FY24 context.
research_agent.print_response(prompt, stream=True)


Output()

In [None]:
# 1. First install udocker
!udocker --allow-root install

# 2. Kill existing processes and clean up
!pkill -9 -f postgres
!rm -rf /content/pgdata
!udocker --allow-root rm pgvector
!rm -f postgres.log

# 3. Create fresh directory
!mkdir -p /content/pgdata
!chmod -R 777 /content/pgdata

# 4. Pull and create container with correct image path
!udocker --allow-root pull ankane/pgvector
!udocker --allow-root create --name=pgvector ankane/pgvector

# 5. Run the container
!nohup udocker --allow-root run \
    --env="POSTGRES_DB=ai" \
    --env="POSTGRES_USER=ai" \
    --env="POSTGRES_PASSWORD=ai" \
    --env="PGDATA=/var/lib/postgresql/data/pgdata" \
    --volume="/content/pgdata:/var/lib/postgresql/data" \
    --publish="5532:5432" \
    pgvector > postgres.log 2>&1 &

# 6. Connection testing
import time
from sqlalchemy import create_engine, text
from sqlalchemy.exc import OperationalError

def test_db_connection(max_retries=5, wait_time=10):
    db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai"

    for attempt in range(max_retries):
        try:
            print(f"\nConnection attempt {attempt + 1}/{max_retries}")
            engine = create_engine(db_url)
            with engine.connect() as connection:
                result = connection.execute(text("SELECT version();"))
                version = result.fetchone()[0]
                print("✅ Successfully connected to PostgreSQL!")
                print(f"Server Version: {version}")

                # Test vector extension
                connection.execute(text("CREATE EXTENSION IF NOT EXISTS vector;"))
                print("✅ Vector extension ready!")
                return engine
        except OperationalError as e:
            print(f"Attempt {attempt + 1} failed, waiting {wait_time} seconds...")
            print("\nChecking postgres status:")
            !ps aux | grep postgres
            print("\nLatest logs:")
            !tail -n 20 postgres.log
            time.sleep(wait_time)

    return None

# 7. Apply 30 secs sleep time to wait for DB to finish set up before testing for connection
print("Waiting for database to initialize...")
time.sleep(30)

engine = test_db_connection()

if engine:
    print("\n✅ Database is ready for RAG Agent initialization!")
else:
    print("\n❌ Database connection failed. Please check the logs above.")

Error: invalid container id 
Info: downloading layer sha256:f2c967e41f72b294e2b96f25154dda38dbde3603b3be33888fb437147972f24b
Info: downloading layer sha256:c5f09b50002256f9e40253d9f3f34381edbe3ca083eb5ce77ecffc874c087995
Info: downloading layer sha256:2e3723549f1143b2c0381181709301932d6a592d8969d0827c1f0133772dfbe0
Info: downloading layer sha256:7077e54346e0cc4692391042abd0479bb02443892be7c6b1085fe7184caff826
Info: downloading layer sha256:bb153abf380255875eda2f78bb3c853520a77f3175574a91d909b5d6912c75a4
Info: downloading layer sha256:f1a157d7d7b01f004e4e758a97a38a5d10c8ce79348e5b674187a99d4f0cabda
Info: downloading layer sha256:6e662fa63f18991e2026f333e95c9670506a0c891ec82e5593bb613a627c6a96
Info: downloading layer sha256:2c35234636c95a2fed252512bb033c920753cffdd75c796da556a594845c121d
Info: downloading layer sha256:04efcdd3a2a4cbfcbdd1542bb9af0b2ff422f4e7b2cde58bfe8c61521df96056
Info: downloading layer sha256:786562b3be85b223d9577821b409d7147981e9b2c9611e0c5ec8725b1255df43
Info: downl

Define a class **Documemt QA**:
- Initialize a OS sentence-transformer embedding model
- Access the Vector DB path to load the embeddings of the PDF URL passed by user
- Initialize database, ready to run QA

In [None]:
from typing import Union, List, Tuple
from sentence_transformers import SentenceTransformer
from agno.models.groq import Groq
from agno.knowledge.pdf_url import PDFUrlKnowledgeBase

In [None]:
class DocumentQA:
    def __init__(self):
        # Initialize embedder
        self.embedder = self._create_embedder()
        # Initialize Groq model
        self.chat_model = Groq(id="llama3-8b-8192")
        # Database URL
        self.db_url = "postgresql+psycopg://ai:ai@localhost:5532/ai"
        self.current_knowledge_base = None
        self.agent = None

    def _create_embedder(self):
        """Create the embedding model"""
        class EmbeddingModel:
            def __init__(self):
                self.model = SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L6-v2')
                self.dimensions = 384

            def get_embedding_and_usage(self, text: Union[str, List[str]]) -> Tuple[Union[List[List[float]], List[float]], dict]:
                if isinstance(text, str):
                    embedding = self.model.encode(text)
                    embedding_list = embedding.tolist()
                    usage = {"prompt_tokens": len(text.split()), "total_tokens": len(text.split())}
                    return embedding_list, usage
                else:
                    embeddings = self.model.encode(text)
                    embedding_list = embeddings.tolist()
                    total_tokens = sum(len(t.split()) for t in text)
                    usage = {"prompt_tokens": total_tokens, "total_tokens": total_tokens}
                    return embedding_list, usage

            def get_embedding(self, text: Union[str, List[str]]) -> Union[List[float], List[List[float]]]:
                if isinstance(text, str):
                    return self.model.encode(text).tolist()
                return self.model.encode(text).tolist()

        print("✅ Embedding model(sentence-transformers/paraphrase-MiniLM-L6-v2) initialized successfully!")
        return EmbeddingModel()

    def load_pdf_url(self, url: str, table_name: str = "documents"):
        """Load a PDF from a URL"""
        try:
            # Create PDF URL knowledge base
            self.current_knowledge_base = PDFUrlKnowledgeBase(
                urls=[url],
                vector_db=PgVector(
                    table_name=table_name,
                    db_url=self.db_url,
                    embedder=self.embedder
                ),
            )

            # Initialize the Agent
            self.agent = Agent(
                knowledge=self.current_knowledge_base,
                search_knowledge=True,
                model=self.chat_model
            )

            # Load knowledge base
            print("Loading knowledge base...")
            self.current_knowledge_base.load(recreate=True)
            print("✅ Knowledge base loaded successfully!")

            # Show sample content
            self.show_sample_content()

        except Exception as e:
            print(f"❌ Error loading PDF: {e}")
            import traceback
            print(traceback.format_exc())

    def show_sample_content(self, num_samples: int = 5):
        """Show sample content from the knowledge base"""
        try:
            if not self.current_knowledge_base:
                print("No knowledge base loaded!")
                return

            docs = self.current_knowledge_base.search("")
            print("\nSample documents in knowledge base:")
            print("-" * 50)
            for i, doc in enumerate(docs[:num_samples], 1):
                print(f"\nDocument {i}:")
                if hasattr(doc, 'content'):
                    print(doc.content[:200] + "..." if len(doc.content) > 200 else doc.content)
                elif hasattr(doc, 'text'):
                    print(doc.text[:200] + "..." if len(doc.text) > 200 else doc.text)
        except Exception as e:
            print(f"Error showing samples: {e}")

    def ask(self, question: str):
        """Ask a question about the loaded document"""
        if not self.current_knowledge_base or not self.agent:
            print("Please load a document first!")
            return

        print(f"\nQ: {question}")
        try:
            # Get relevant documents
            relevant_docs = self.current_knowledge_base.search(question)
            print("\nRelevant documents found:", len(relevant_docs) if relevant_docs else 0)

            # Build context from relevant documents
            context = "\n".join([doc.content if hasattr(doc, 'content') else doc.text
                               for doc in relevant_docs])

            # Create a prompt that includes the context
            full_prompt = f"""Based on the following content:{context}
            Question: {question}
            Please provide a detailed answer based ONLY on the information provided above."""

            # Get response with context
            response = self.agent.run(full_prompt)
            print(f"\nA: {response.content}")

        except Exception as e:
            print(f"Error: {e}")
            import traceback
            print(traceback.format_exc())

In [None]:
RAG_qa = DocumentQA()

✅ Embedding model(sentence-transformers/paraphrase-MiniLM-L6-v2) initialized successfully!


In [None]:
RAG_qa.load_pdf_url("https://www.apple.com/environment/pdf/Apple_Environmental_Progress_Report_2024.pdf")

Loading knowledge base...


✅ Knowledge base loaded successfully!

Sample documents in knowledge base:
--------------------------------------------------

Document 1:
Environmental progress can and should be good for business. We underpin our climate strategy with strong business principles and innovation while harnessing the power of markets to replicate our solut...

Document 2:
Supporting communities worldwide Through our engagement efforts, we work directly with groups and individuals who are addressing environmental injustice in their communities. We evaluate each opportun...

Document 3:
 Environmental Progress Report 104Engagement and AdvocacyEnvironmental Initiatives DataIntroduction Contents Appendix

Document 4:
      2024 Environmental Progress Report 102Engagement and AdvocacyEnvironmental Initiatives DataIntroduction Contents Appendix

Document 5:
An ambitious goal for 2030: We committed to be carbon neutral for our entire carbon footprint by the end of the decade. Our journey to 2030 centers on

In [None]:
RAG_qa.ask("Key points in this report? Give in 5 bullets")


Q: Key points in this report? Give in 5 bullets

Relevant documents found: 5

A: Here are 5 key points from the report:

• Apple has established a rigorous chemical safety program, including the Regulated Substances Specification, to ensure the use of chemicals and materials in their products, accessories, and manufacturing processes.

• The company regularly updates and expands its chemical restrictions to surpass current regulatory restrictions, with recent additions including perfluorohexanesulfonic acid (PFHxA), phenol, isopropylated, phosphate (3:1) (PIP 3:1), and skin-sensitizing substances.

• Apple requires suppliers to analyze materials that come into prolonged skin contact with skin according to Apple's requirements and reviews compliance with these requirements, mandating clear restrictions on potentially harmful chemicals in materials.

• The company has conducted toxicological assessments on over 1,600 new materials to proactively evaluate and eliminate potentially harmfu

In [None]:
RAG_qa.ask("Executive Summary in 100 words")


Q: Executive Summary in 100 words

Relevant documents found: 5

A: Here is a 100-word summary of the 2024 Environmental Progress Report:

Apple's Environmental Progress Report covers their fiscal year 2023. The report highlights Apple's efforts to reduce their carbon footprint and improve sustainability. Apple accounts for their carbon footprint by following international standards, and they have made significant progress towards reducing their direct and indirect greenhouse gas emissions. They have also implemented carbon removals and offsets to maintain carbon neutrality. The report provides detailed breakdowns of Apple's emissions by sector, including energy consumption, manufacturing, transportation, and product use. Apple aims to continue reducing their environmental impact and promote sustainability throughout their supply chain.


## Agent 3:
##Stock Market analysis

1. Utilize yahoo finance to run comparative analysis using many
2. Generate a small summary report

In [None]:
from textwrap import dedent

from agno.agent import Agent
from agno.models.openai import OpenAIChat
from agno.tools.yfinance import YFinanceTools

stock_agent = Agent(
    model=Groq(id="llama3-70b-8192"),
    tools=[
        YFinanceTools(
            stock_price=True,
            analyst_recommendations=True,
            stock_fundamentals=True,
            historical_prices=True,
            company_info=True,
            company_news=True,
        )
    ],
    instructions=dedent("""\
        You are a seasoned credit rating analyst with deep expertise in market analysis! 📊

        Follow these steps for comprehensive financial analysis:
        1. Market Overview
           - Latest stock price
           - 52-week high and low
        2. Financial Deep Dive
           - Key metrics (P/E, Market Cap, EPS)
        3. Market Context
           - Industry trends and positioning
           - Competitive analysis
           - Market sentiment indicators

        Your reporting style:
        - Begin with an executive summary
        - Use tables for data presentation
        - Include clear section headers
        - Highlight key insights with bullet points
        - Compare metrics to industry averages
        - Include technical term explanations
        - End with a forward-looking analysis

        Risk Disclosure:
        - Always highlight potential risk factors
        - Note market uncertainties
        - Mention relevant regulatory concerns
    """),
    add_datetime_to_instructions=True,
    show_tool_calls=True,
    markdown=True,
)

print("Stock Agent created. Ready to take user queries..")

Stock Agent created. Ready to take user queries..


In [None]:

# User Query 1
stock_agent.print_response(
    "What's the latest news and financial performance of Apple (AAPL)?", stream=True)

Output()

In [None]:
# User Query 2: Semiconductor market analysis
finance_agent.print_response(
    dedent("""\
    Analyze the semiconductor market performance focusing on:
    - NVIDIA (NVDA)
    - AMD (AMD)
    - Intel (INTC)
    - Taiwan Semiconductor (TSM)
    Compare their market positions, growth metrics, and future outlook in terms of AI growth."""),
    stream=True,
)



Output()

In [None]:
# User Query 3: Competitive analysis

finance_agent.print_response("How is Microsoft performing in the age of AI?", stream=True)

Output()

## Agent 4

## Evaluation: LLM-as-a-judge

In [None]:
from textwrap import dedent
from agno.agent import Agent
from agno.models.groq import Groq

class RAGEvaluator:
    def __init__(self):
        self.evaluator = self._initialize_evaluator()

    def _initialize_evaluator(self):
        return Agent(
            model=Groq(id="llama-3.1-8b-instant"),  # Using different Llama model
            description=dedent("""\
                You are an expert RAG system evaluator with deep expertise in:
                - Information retrieval quality assessment
                - Response accuracy evaluation
                - Source attribution verification
                - Context relevance analysis
                - Natural language generation evaluation
            """),
            instructions=dedent("""\
                Evaluate the RAG system output based on these key metrics:

                1. Faithfulness (1-5):
                   - How accurately does the response reflect the source documents?
                   - Are there any hallucinations or incorrect statements?
                   - Does it maintain factual consistency?

                2. Context Relevance (1-5):
                   - Are the retrieved passages relevant to the query?
                   - Is important context missing?
                   - Is irrelevant information included?

                3. Answer Completeness (1-5):
                   - Does the response fully address the query?
                   - Are all key aspects covered?
                   - Is the level of detail appropriate?

                4. Source Attribution (1-5):
                   - Are sources properly cited?
                   - Is it clear which information comes from where?
                   - Can claims be traced back to sources?

                5. Response Coherence (1-5):
                   - Is the response well-structured?
                   - Does it flow logically?
                   - Is it easy to understand?

                Provide specific examples and explanations for each score.
            """),
            expected_output=dedent("""\
                # RAG Evaluation Report

                ## Overview
                Query: {query}
                Response Length: {n_chars} characters

                ## Metric Scores

                ### Faithfulness: {score}/5
                - Justification:
                - Examples:
                - Areas for Improvement:

                ### Context Relevance: {score}/5
                - Justification:
                - Examples:
                - Areas for Improvement:

                ### Answer Completeness: {score}/5
                - Justification:
                - Examples:
                - Areas for Improvement:

                ### Source Attribution: {score}/5
                - Justification:
                - Examples:
                - Areas for Improvement:

                ### Response Coherence: {score}/5
                - Justification:
                - Examples:
                - Areas for Improvement:

                ## Overall Score: {total}/25

                ## Key Recommendations
                1. {rec1}
                2. {rec2}
                3. {rec3}

                ## Summary
                {final_assessment}
            """),
            markdown=True,
        )

    def evaluate(self, query: str, response: str, context: list, stream: bool = True):
        """
        Evaluate a RAG system's response

        Args:
            query (str): Original user query
            response (str): RAG system's response
            context (list): Retrieved passages used for the response
            stream (bool): Whether to stream the evaluation output
        """
        evaluation_prompt = f"""
        Please evaluate this RAG system output:

        QUERY:
        {query}

        RETRIEVED CONTEXT:
        {' '.join(context)}

        RESPONSE:
        {response}

        Provide a detailed evaluation following the metrics and format specified.
        """

        return self.evaluator.print_response(evaluation_prompt, stream=stream)


# Initialize evaluator
evaluator = RAGEvaluator()
print("LLM-as-a Judge Evaluator initialized successfully!")

LLM-as-a Judge Evaluator initialized successfully!


In [None]:
# Example evaluation. Rerun this to use actual financial RAG outputs

query = "What are the key features of transformer models?"
context = [
    "Transformer models use self-attention mechanisms to process input sequences.",
    "Key features include parallel processing and handling of long-range dependencies."
]
response = "Transformer models are characterized by their self-attention mechanism..."

# Run evaluation
evaluator.evaluate(query, response, context)

Output()