In [1]:
# %pip install google-genai openai langfuse openinference-instrumentation-google-genai

In [2]:
from langfuse import Langfuse
from langfuse import get_client
from langfuse import observe, propagate_attributes, Langfuse
from dotenv import load_dotenv
from google import genai
import json
from google.genai import types
import os

load_dotenv(".env")

langfuse = Langfuse(
    public_key=os.getenv("LANGFUSE_PUBLIC_KEY"),
    secret_key=os.getenv("LANGFUSE_SECRET_KEY"),
    host=os.getenv("LANGFUSE_HOST")
)

@observe(name="call-gemini-common-fn", as_type="generation", capture_input=True, capture_output=True)
def call_gemini(input, ground_truth, model_id="gemini-2.0-flash", file_paths=None, generation_config=None):
    """
    Process multiple files with Gemini and trace with Langfuse.
    
    Args:
        input: Text prompt/instruction
        model_id: Gemini model to use
        file_paths: List of file paths or single file path (string)
    """
    with propagate_attributes(
        user_id="eshanj",
        session_id="session_x",
        tags=["gemini", "eshan's-trace", "multi-file"],
        metadata={"email": "eshan@fonixedu.com"},
        version="1.0.0",
    ):
        client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
        
        # Handle both single file and multiple files
        if file_paths is None:
            file_paths = []
        elif isinstance(file_paths, str):
            file_paths = [file_paths]
        
        uploaded_files = []
        
        try:
            # Upload all files
            for file_path in file_paths:
                print(f"  Uploading file: {file_path}...")
                uploaded_file = client.files.upload(file=file_path)
                uploaded_files.append(uploaded_file)
                print(f"  ✓ File uploaded: {uploaded_file.name} (URI: {uploaded_file.uri})")
            
            # Build content array: [prompt, file1, file2, file3, ...]
            contents = [input] + uploaded_files
            
            print(f"  Processing {len(uploaded_files)} file(s) with prompt...")

            if generation_config:
                response = client.models.generate_content(
                    model=model_id,
                    contents=contents,
                )
            else:
                response = client.models.generate_content(
                    model=model_id,
                    contents=contents,
                    config=generation_config,
                )
            
            # print("\n--- Response ---")
            # print(response.text)
            # print("----------------")
            
            usage_meta = response.usage_metadata
    
            prompt_tokens = usage_meta.prompt_token_count or 0
            candidate_tokens = usage_meta.candidates_token_count or 0
            thought_tokens = usage_meta.thoughts_token_count or 0
            cached_tokens = usage_meta.cached_content_token_count or 0
            total_tokens = usage_meta.total_token_count or 0

            # loop through details to find IMAGE modality
            image_tokens = 0
            if usage_meta.prompt_tokens_details:
                for detail in usage_meta.prompt_tokens_details:
                    if detail.modality == "IMAGE":
                        image_tokens += detail.token_count

            effective_output_tokens = candidate_tokens + thought_tokens

            langfuse.update_current_trace(
                input={
                    "prompt": input,
                    "files": [f.name for f in uploaded_files],
                    "file_count": len(uploaded_files)
                },
                output=response.text,
                metadata={
                    "ground_truth": ground_truth,
                }
            )

            INPUT_PRICE_PER_TOKEN = 0.3 / 1000000
            OUTPUT_PRICE_PER_TOKEN = 2.5 / 1000000
            CACHING_PRICE_PER_TOKEN = 0.03 / 1000000

            input_cost = prompt_tokens * INPUT_PRICE_PER_TOKEN
            output_cost = effective_output_tokens * OUTPUT_PRICE_PER_TOKEN
            cache_read_input_cost = cached_tokens * CACHING_PRICE_PER_TOKEN
            total_cost = input_cost + output_cost + cache_read_input_cost
            
            langfuse.update_current_generation(
                cost_details={
                    "input": input_cost,
                    "cache_read_input_tokens": cache_read_input_cost,
                    "output": output_cost,
                    "total": total_cost,
                },
                usage_details={
                    "input": prompt_tokens,
                    "output": effective_output_tokens,
                    "cache_read_input_tokens": cached_tokens 
                },
            )

            return response.text, ground_truth
            
        except FileNotFoundError as e:
            print(f"  ERROR: File not found: {e}")
            raise
        except Exception as e:
            print(f"  An error occurred: {e}")
            raise
        finally:
            for uploaded_file in uploaded_files:
                try:
                    print(f"  Deleting uploaded file: {uploaded_file.name}...")
                    client.files.delete(name=uploaded_file.name)
                    print(f"  ✓ File deleted: {uploaded_file.name}")
                except Exception as e:
                    print(f"  Failed to delete {uploaded_file.name}: {e}")

@observe(as_type="evaluator")
def evaluate_with_gemini(prediction, ground_truth):
    eval_generation_config = types.GenerateContentConfig(
        temperature=0.0,
        top_p=0.9, # Nucleus sampling threshold (0.0 to 1.0) OVERRIDES if temprature is 0
        top_k=40, # Number of top tokens to sample from (e.g., 40) OVERRIDES if temprature is 0
        max_output_tokens=256, # Maximum tokens to generate
        # frequency_penalty=0.1, # Penalizes tokens based on how often they have appeared (0.0 to 1.0)
        # presence_penalty=0.1, # Penalizes tokens based on whether they have appeared at least once (0.0 to 1.0)
        system_instruction="You are an evaluator. Compare the ground truth and the prediction.",
        # tools=tools_list,  # List of functions the model can call
        response_mime_type="application/json", # Forces output format (e.g., "application/json" for structured data)
        thinking_config=types.ThinkingConfig(
            # Set to a number of tokens to budget for internal thought process (0 disables)
            thinking_budget=1024, 
            # Include the model's internal thoughts in the response (useful for debugging)
            include_thoughts=True, 
        ),
        # Note: candidate_count is currently fixed at 1 for most models/use cases
    )

    eval_prompt = f"""
    Return a JSON object with exactly two fields:

    - "score": a float between 0 and 1 inclusive
    - "reason": a short explanation of why the score was given

    STRICT RULES:
    - Output ONLY valid JSON.
    - Do NOT include backticks, markdown, or any text outside the JSON.
    - "score" MUST be a float.
    - "reason" MUST be a string.

    ground_truth:
    {ground_truth}

    prediction:
    {prediction}
    """

    raw_output = call_gemini(
        eval_prompt,
        ground_truth=None,
        model_id="gemini-2.5-flash",
        file_paths=None,
        generation_config=eval_generation_config
    )

    # If call_gemini returns a tuple → extract the text
    if isinstance(raw_output, tuple):
        raw_output = raw_output[0]

    print("Gemini Raw Output:", raw_output)

    clean_json = raw_output.replace("```json", "").replace("```", "").strip()

    try:
        result = json.loads(clean_json)
    except Exception as e:
        raise ValueError(f"Gemini did not return valid JSON: {clean_json}") from e

    score = float(result["score"])
    reason = result["reason"]

    print(" Score:", score)
    print(" Reason:", reason)

    langfuse.score_current_trace(
        name="score",
        value=score,
        comment=reason,
    )

    return score, reason

@observe(name="gemini-qa-pipeline", as_type="chain")
def main():
  INSTRUCTION_PROMPT = """
  Task: Identify all meaningful blocks of content and extract the structural relationships between them.

  JSON Schema: Output the prediction using the 'document_elements' array, where each object contains:
  - id (string): A unique identifier (e.g., B1, N_Start).
  - text (string): The transcribed content.
  - type (enum): The element's function. Use only: TITLE, PARAGRAPH, LIST, TABLE_CELL, DIAGRAM_NODE, DIAGRAM_ARROW, KEY_VALUE_PAIR.
  - bbox (array of 4 integers): Normalized coordinates [xmin, ymin, xmax, ymax]. All values MUST be integers between 0 and 100.
  - relations (array of objects): A list of semantic connections.

  Relations Schema (Inside relations):
  - target_id (string): The id of the element it connects to.
  - relation_type (enum): The connection type. Use: FLOWS_TO, IS_LABEL_FOR, VALUE_FOR.

  Specific Instructions:
  1. For diagrams, use DIAGRAM_NODE for shapes and DIAGRAM_ARROW for lines. Use FLOWS_TO to link the source node to the target node.
  2. For forms/tables, use KEY_VALUE_PAIR. If a value is separated from its label, link them using VALUE_FOR.
  """

  GROUND_TRUTH = """```json
  {
    "document_elements": [
      {
        "id": "T1",
        "text": "උදාහරණ 2",
        "type": "TITLE",
        "bbox": [
          12,
          9,
          30,
          12
        ],
        "relations": [
          {
            "target_id": "H1",
            "relation_type": "FLOWS_TO"
          }
        ]
      },
      {
        "id": "H1",
        "text": "චුම්බක අනුනාද මූර්ණ යන්ත්‍රය (MRI - Magnetic Resonance Imaging Machine)",
        "type": "PARAGRAPH",
        "bbox": [
          12,
          14,
          92,
          20
        ],
        "relations": [
          {
            "target_id": "P1",
            "relation_type": "FLOWS_TO"
          }
        ]
      },
      {
        "id": "P1",
        "text": "රේඩියෝ තරංග සහ ප්‍රබල චුම්බක අනුනාද (දෙශික) මගින් ශරීරයේ අභ්‍යන්තර කොටස්වල සවිස්තරාත්මක රූප සටහන් ලබා ගැනීම මෙම යන්ත්‍රය මගින් සිදු වේ. රෝග හඳුනා ගැනීමේ දී මෙන් ම ප්‍රතිකාර නිර්ණය කිරීමේ දී ද මෙම රූප උපකාරී වේ.",
        "type": "PARAGRAPH",
        "bbox": [
          12,
          23,
          99,
          36
        ],
        "relations": [
          {
            "target_id": "T2",
            "relation_type": "FLOWS_TO"
          }
        ]
      },
      {
        "id": "T2",
        "text": "උදාහරණ 3",
        "type": "TITLE",
        "bbox": [
          12,
          39,
          29,
          42
        ],
        "relations": [
          {
            "target_id": "H2",
            "relation_type": "FLOWS_TO"
          }
        ]
      },
      {
        "id": "H2",
        "text": "විද්‍යුත් තන්තු රේඛිය යන්ත්‍රය (ECG - Electrocardiogram Machine)",
        "type": "PARAGRAPH",
        "bbox": [
          12,
          44,
          99,
          50
        ],
        "relations": [
          {
            "target_id": "P2",
            "relation_type": "FLOWS_TO"
          }
        ]
      },
      {
        "id": "P2",
        "text": "හෘද ස්පන්දනය නිරීක්ෂණය කිරීම සඳහා මෙම යන්ත්‍රය යොදා ගැනේ. හෘදයේ සිට ශරීරයේ අනෙකුත් ඉන්ද්‍රියයන් වෙත රුධිරය සැපයීමේ දී හෘදයේ ඇති වන විද්‍යුත් ස්පන්දනයට අනුව නිපදවෙන තරංග ප්‍රස්තාරික කඩදාසියක සටහන් වීම මෙහි දී සිදු වේ.",
        "type": "PARAGRAPH",
        "bbox": [
          12,
          53,
          99,
          67
        ],
        "relations": [
          {
            "target_id": "T3",
            "relation_type": "FLOWS_TO"
          }
        ]
      },
      {
        "id": "T3",
        "text": "උදාහරණ 4",
        "type": "TITLE",
        "bbox": [
          12,
          69,
          31,
          72
        ],
        "relations": [
          {
            "target_id": "H3",
            "relation_type": "FLOWS_TO"
          }
        ]
      },
      {
        "id": "H3",
        "text": "හෘද රෝග නිර් ගන්වීමේ යන්ත්‍රය (Cardiac Screening Machine)",
        "type": "PARAGRAPH",
        "bbox": [
          12,
          74,
          97,
          80
        ],
        "relations": [
          {
            "target_id": "P3",
            "relation_type": "FLOWS_TO"
          }
        ]
      },
      {
        "id": "P3",
        "text": "හෘදයේ ක්‍රියාකාරීත්වය පරිගණක තිරයක දැක්වීම මෙම යන්ත්‍රය මගින් සිදු වේ. හෘදයේ රුධිර නාල සිහින් වීම වැනි විවිධ ආසාදන තත්ත්වයන් හඳුනා ගැනීමට හැකි වීමෙන් අවශ්‍ය ප්‍රතිකාර සඳහා යොමු කිරීමට මේ නිසා පහසු වේ.",
        "type": "PARAGRAPH",
        "bbox": [
          12,
          83,
          98,
          96
        ],
        "relations": []
      }
    ]
  }
  ```
  """

  generation_config = types.GenerateContentConfig(
        temperature=0.0,
        top_p=0.9, # Nucleus sampling threshold (0.0 to 1.0) OVERRIDES if temprature is 0
        top_k=40, # Number of top tokens to sample from (e.g., 40) OVERRIDES if temprature is 0
        max_output_tokens=8192, # Maximum tokens to generate
        # frequency_penalty=0.1, # Penalizes tokens based on how often they have appeared (0.0 to 1.0)
        # presence_penalty=0.1, # Penalizes tokens based on whether they have appeared at least once (0.0 to 1.0)
        system_instruction="You are an expert Document Layout and Diagram Analyzer. Your task is to process the provided handwritten document image, including any diagrams, tables, or complex layouts. Your entire response MUST be a single JSON object.",
        # tools=tools_list,  # List of functions the model can call
        response_mime_type="application/json", # Forces output format (e.g., "application/json" for structured data)
        thinking_config=types.ThinkingConfig(
            # Set to a number of tokens to budget for internal thought process (0 disables)
            thinking_budget=-1, 
            # Include the model's internal thoughts in the response (useful for debugging)
            include_thoughts=True, 
        ),
        # Note: candidate_count is currently fixed at 1 for most models/use cases
    )

  print("Layout analysis and transcription process started...")
  prediction, ground_truth = call_gemini(
      INSTRUCTION_PROMPT, 
      GROUND_TRUTH, model_id="gemini-flash-latest", 
      file_paths="Generated Image December 09, 2025 - 4_41PM.jpeg",
      generation_config=generation_config
      )
  print("Evaluation started...")
  evaluate_with_gemini(prediction, ground_truth)

if __name__ == "__main__":
    main()
    langfuse.flush()

Layout analysis and transcription process started...
  Uploading file: Generated Image December 09, 2025 - 4_41PM.jpeg...
  ✓ File uploaded: files/3cj7whcdp4eo (URI: https://generativelanguage.googleapis.com/v1beta/files/3cj7whcdp4eo)
  Processing 1 file(s) with prompt...
  Deleting uploaded file: files/3cj7whcdp4eo...
  ✓ File deleted: files/3cj7whcdp4eo
Evaluation started...
  Processing 0 file(s) with prompt...
  An error occurred: 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. \n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 20, model: gemini-2.5-flash\nPlease retry in 45.408672434s.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/googl

ClientError: 429 RESOURCE_EXHAUSTED. {'error': {'code': 429, 'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, head to: https://ai.google.dev/gemini-api/docs/rate-limits. To monitor your current usage, head to: https://ai.dev/usage?tab=rate-limit. \n* Quota exceeded for metric: generativelanguage.googleapis.com/generate_content_free_tier_requests, limit: 20, model: gemini-2.5-flash\nPlease retry in 45.408672434s.', 'status': 'RESOURCE_EXHAUSTED', 'details': [{'@type': 'type.googleapis.com/google.rpc.Help', 'links': [{'description': 'Learn more about Gemini API quotas', 'url': 'https://ai.google.dev/gemini-api/docs/rate-limits'}]}, {'@type': 'type.googleapis.com/google.rpc.QuotaFailure', 'violations': [{'quotaMetric': 'generativelanguage.googleapis.com/generate_content_free_tier_requests', 'quotaId': 'GenerateRequestsPerDayPerProjectPerModel-FreeTier', 'quotaDimensions': {'location': 'global', 'model': 'gemini-2.5-flash'}, 'quotaValue': '20'}]}, {'@type': 'type.googleapis.com/google.rpc.RetryInfo', 'retryDelay': '45s'}]}}