In [1]:
print("ok")

ok


In [3]:
import os
import warnings
warnings.filterwarnings('ignore')

from dotenv import load_dotenv
load_dotenv()

True

In [4]:
import numpy as np
import pandas as pd
import asyncio
import json

from langchain_openai import AzureChatOpenAI
from langchain_openai import AzureOpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

from ragas import SingleTurnSample, EvaluationDataset, evaluate
from ragas.metrics import (
    Faithfulness,
    ResponseRelevancy,
    LLMContextPrecisionWithReference,
    LLMContextRecall,
    ContextEntityRecall,
    NoiseSensitivity
)

from ragas.llms import LangchainLLMWrapper
from ragas.embeddings import LangchainEmbeddingsWrapper

print("‚úÖ All imports successful")

‚úÖ All imports successful


In [5]:
llm = AzureChatOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    deployment_name=os.getenv("AZURE_OPENAI_DEPLOYMENT_NAME"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
)

embeddings = AzureOpenAIEmbeddings(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    model=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")
)

ragas_llm = LangchainLLMWrapper(llm)
ragas_embeddings = LangchainEmbeddingsWrapper(embeddings)

print("‚úÖ LLM initialized: gpt-4o")
print("‚úÖ Embeddings initialized: text-embedding-ada-002")
print("‚úÖ RAGAS wrappers ready")

‚úÖ LLM initialized: gpt-4o
‚úÖ Embeddings initialized: text-embedding-ada-002
‚úÖ RAGAS wrappers ready


In [19]:
def run_async(coro):
    try:
        loop = asyncio.get_event_loop()
        if loop.is_running():
            import nest_asyncio
            nest_asyncio.apply()
            return loop.run_until_complete(coro)
        else:
            return asyncio.run(coro)
    except RuntimeError:
        return asyncio.run(coro)


In [7]:
test_response = "The first Super Bowl was held on January 15, 1967 in Los Angeles. It was a sunny day with clear skies."
test_context = [
    "The First AFL-NFL World Championship Game was played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles, California."
]
print("üìù Response to evaluate:")
print(f"   '{test_response}'")
print("\nüìö Retrieved context:")
print(f"   '{test_context[0]}'")

üìù Response to evaluate:
   'The first Super Bowl was held on January 15, 1967 in Los Angeles. It was a sunny day with clear skies.'

üìö Retrieved context:
   'The First AFL-NFL World Championship Game was played on January 15, 1967, at the Los Angeles Memorial Coliseum in Los Angeles, California.'


In [8]:
claim_extraction_prompt = ChatPromptTemplate.from_template("""
Given the following response, extract ALL factual claims as a numbered list.
Each claim should be a single, verifiable statement.

Response: {response}

Extract each factual claim:
""")

claim_chain = claim_extraction_prompt | llm | StrOutputParser()
extracted_claim_raw = claim_chain.invoke({"response":test_context})

print("üîç STEP 1: Extracted Claims from Response")
print("=" * 50)
print(extracted_claim_raw)

üîç STEP 1: Extracted Claims from Response
1. The First AFL-NFL World Championship Game was played on January 15, 1967.  
2. The game took place at the Los Angeles Memorial Coliseum.  
3. The game was held in Los Angeles, California.


In [9]:
claims = [
    "The first Super Bowl was held on January 15, 1967",
    "The first Super Bowl was held in Los Angeles",
    "It was a sunny day",
    "There were clear skies"
]
print("üìã Claims to verify:")
for i, claim in enumerate(claims, 1):
    print(f"   {i}. {claim}")

üìã Claims to verify:
   1. The first Super Bowl was held on January 15, 1967
   2. The first Super Bowl was held in Los Angeles
   3. It was a sunny day
   4. There were clear skies


In [12]:
verification_prompt = ChatPromptTemplate.from_template("""
Given the following context and claim, determine if the claim is SUPPORTED by the context.

Context: {context}

Claim: {claim}

Answer with:
- "SUPPORTED" if the claim can be verified from the context
- "NOT SUPPORTED" if the claim cannot be verified or contradicts the context

Also provide a brief explanation.

Verdict:
""")

verify_claims = verification_prompt | llm | StrOutputParser()

print("üîç STEP 2: Verifying Each Claim Against Context")
print("=" * 60)

verification_results = []
for claim in claims:
    result = verify_claims.invoke({
        "context":test_context[0],
        "claim":claim
    })
    is_supported = "SUPPORTED" in result.upper() and "NOT SUPPORTED" not in result.upper()
    verification_results.append({
        "claim":claim,
        "supported": is_supported,
        "explanation":result,
    })
    status = "‚úÖ" if is_supported else "‚ùå"
    print(f"\n{status} Claims: {claim}")
    print(f"    Result: {result[:100]}..." if len(result)>100 else f"    Result: {result[:100]}")

üîç STEP 2: Verifying Each Claim Against Context

‚úÖ Claims: The first Super Bowl was held on January 15, 1967
    Result: **SUPPORTED**

**Explanation:** The context states that the First AFL-NFL World Championship Game wa...

‚úÖ Claims: The first Super Bowl was held in Los Angeles
    Result: SUPPORTED

Explanation: The context states that the First AFL-NFL World Championship Game (the event...

‚ùå Claims: It was a sunny day
    Result: **NOT SUPPORTED**

**Explanation:** The context provides information about the date, location, and e...

‚ùå Claims: There were clear skies
    Result: NOT SUPPORTED

Explanation: The context does not provide any information about the weather condition...


In [13]:
print("\nüìä Claim Verification Summary")
print("=" * 80)

df_verification = pd.DataFrame([
    {
        "Clain": r['claim'],
        "Supported?":"‚úÖ Yes" if r["supported"] else "‚ùå No",
        "Reason":"Found in context" if r['supported'] else "HALLUCINATION - Not in context!"

    }
    for r in verification_results
])
df_verification.head()


üìä Claim Verification Summary


Unnamed: 0,Clain,Supported?,Reason
0,"The first Super Bowl was held on January 15, 1967",‚úÖ Yes,Found in context
1,The first Super Bowl was held in Los Angeles,‚úÖ Yes,Found in context
2,It was a sunny day,‚ùå No,HALLUCINATION - Not in context!
3,There were clear skies,‚ùå No,HALLUCINATION - Not in context!


In [17]:
supported_count = sum(1 for r in verification_results if r['supported'])
total_claims = len(verification_results)

manual_faithfulness = supported_count/total_claims

print("üî¢ STEP 3: Calculate Faithfulness Score")
print("=" * 50)
print(f"\n   Supported claims: {supported_count}")
print(f"   Total claims: {total_claims}")
print(f"\n   Formula: Faithfulness = {supported_count} / {total_claims}")
print(f"\n   üìä Manual Faithfulness Score: {manual_faithfulness:.2f}")

üî¢ STEP 3: Calculate Faithfulness Score

   Supported claims: 2
   Total claims: 4

   Formula: Faithfulness = 2 / 4

   üìä Manual Faithfulness Score: 0.50


In [21]:
faithfulness_sample = SingleTurnSample(
    user_input="When was the first Super Bowl?",
    response=test_response,
    retrieved_contexts=test_context
)
faithfulness_metric = Faithfulness(llm=ragas_llm)
ragas_faithfulness = run_async(faithfulness_metric.single_turn_ascore(faithfulness_sample))

print("üî¨ RAGAS Faithfulness Result")
print("=" * 50)
print(f"\n   Manual calculation:  {manual_faithfulness:.2f}")
print(f"   RAGAS metric score:  {ragas_faithfulness:.2f}")
print(f"\n   Difference: {abs(manual_faithfulness - ragas_faithfulness):.2f}")

üî¨ RAGAS Faithfulness Result

   Manual calculation:  0.50
   RAGAS metric score:  0.50

   Difference: 0.00


In [None]:
faithfulness_examples = [
    {
        "name": "Perfect Faithfulness (No hallucinations)",
        "response": "The first Super Bowl was played on January 15, 1967 at the Los Angeles Memorial Coliseum.",
        "context": ["The First AFL-NFL World Championship Game was played on January 15, 1967, at the Los Angeles Memorial Coliseum."]
    },
    {
        "name": "Partial Faithfulness (Some hallucinations)",
        "response": "The first Super Bowl was on January 15, 1967. The Green Bay Packers won 35-10 with Bart Starr as MVP.",
        "context": ["The First AFL-NFL World Championship Game was played on January 15, 1967."]
    },
    {
        "name": "Zero Faithfulness (Complete hallucination)",
        "response": "The first Super Bowl was held in Miami in 1970 and attracted over 100,000 spectators.",
        "context": ["The First AFL-NFL World Championship Game was played on January 15, 1967, at the Los Angeles Memorial Coliseum."]
    }
]
