# Solar Panel Datasheet Comparison Workflow

### Provide api-keys manually

In [1]:
import os
from getpass import getpass

if "LLAMA_CLOUD_API_KEY" not in os.environ:
    os.environ["LLAMA_CLOUD_API_KEY"] = getpass("Enter your Llama Cloud API Key: ")

OPENAI_KEY = ""
if OPENAI_KEY == "":
    OPENAI_KEY = getpass("Enter your OpenAI API Key: ")

## Initialize Extraction Agent

Here we initialize our extraction agent that will be responsible for extracting the schema from the solar panel datasheet.

In [2]:
from llama_cloud_services import (
    LlamaExtract,
    EU_BASE_URL,
)
from llama_cloud.core.api_error import ApiError
from llama_cloud import ExtractConfig

# Initialize the LlamaExtract client
# llama_extract = LlamaExtract()
llama_extract = LlamaExtract(base_url=EU_BASE_URL)

### Define the Structured Extraction Schema

In [None]:
from pydantic import BaseModel, Field
from typing import List


class PowerRange(BaseModel):
    min_power: float = Field(..., description="Minimum power output in Watts")
    max_power: float = Field(..., description="Maximum power output in Watts")
    unit: str = Field("W", description="Power unit")

class SolarPanelSpec(BaseModel):
    module_name: str = Field(..., description="Name or model of the solar panel module")
    power_output: PowerRange = Field(..., description="Power output range")
    maximum_efficiency: float = Field(
        ..., description="Maximum module efficiency in percentage"
    )
    temperature_coefficient: float = Field(
        ..., description="Temperature coefficient in %/K"
    )
    max_length: int = Field(..., description="Maximum length of product in mm")
    max_weight: int = Field(..., description="Maximum weight of product in kg")

    warranty: int = Field(..., description="Minimum number of years for product to be in warranty")

    certifications: List[str] = Field([], description="List of certifications")
    page_citations: dict = Field(
        ..., description="Mapping of each extracted field to its page numbers"
    )

class SolarPanelSchema(BaseModel):
    specs: List[SolarPanelSpec] = Field(
        ..., description="List of extracted solar panel specifications"
    )

In [4]:
try:
    existing_agent = llama_extract.get_agent(name="solar-panel-datasheet")
    if existing_agent:
        llama_extract.delete_agent(existing_agent.id)
except ApiError as e:
    if e.status_code == 404:
        pass
    else:
        raise

extract_config = ExtractConfig(
    extraction_mode="BALANCED",
)

agent = llama_extract.create_agent(
    name="solar-panel-datasheet", data_schema=SolarPanelSchema, config=extract_config
)

## Workflow Overview

In [11]:
from llama_index.core.workflow import (
    Event,
    StartEvent,
    StopEvent,
    Context,
    Workflow,
    step,
)
from llama_index.llms.openai import OpenAI
from llama_index.core.prompts import ChatPromptTemplate
from llama_cloud_services import LlamaExtract
from llama_cloud.core.api_error import ApiError
from pydantic import BaseModel, Field
from typing import List, Literal, Optional

class DetailItem(BaseModel):
    status: Literal["PASS", "FAIL"] = Field(..., description="PASS or FAIL")
    explanation: str = Field(..., description="Why it passed or failed")
    # recommendation: Optional[str] = Field(None, description="What to do if FAIL")

class ComparisonDetails(BaseModel):
    maximum_power: DetailItem
    minimum_power: DetailItem
    max_length: DetailItem
    max_weight: DetailItem
    certification: DetailItem
    efficiency: DetailItem
    temperature_coefficient: DetailItem
    warranty: DetailItem

class ComparisonReportOutput(BaseModel):
    component_name: str
    meets_requirements: bool
    summary: str
    details: ComparisonDetails

# Define custom events
class DatasheetParseEvent(Event):
    datasheet_content: dict

class RequirementsLoadEvent(Event):
    requirements_text: str

class ComparisonReportEvent(Event):
    report: ComparisonReportOutput

class LogEvent(Event):
    msg: str
    delta: bool = False

llm = OpenAI(model="gpt-4o", api_key=OPENAI_KEY)  # or your preferred model
resp = llm.complete("Say 'ok'")
print(resp)

OK


In [12]:
class SolarPanelComparisonWorkflow(Workflow):
    """
    Workflow to extract data from a solar panel datasheet and generate a comparison report
    against provided design requirements.
    """

    def __init__(self, agent: LlamaExtract, requirements_path: str, **kwargs):
        super().__init__(**kwargs)
        self.agent = agent
        # Load design requirements from file as a text blob
        with open(requirements_path, "r") as f:
            self.requirements_text = f.read()

    @step
    async def parse_datasheet(
        self, ctx: Context, ev: StartEvent
    ) -> DatasheetParseEvent:
        # datasheet_path is provided in the StartEvent
        datasheet_path = (
            ev.datasheet_path
        )  # e.g., "./data/solar_panel_comparison/datasheet.pdf"
        extraction_result = await self.agent.aextract(datasheet_path)
        datasheet_dict = (
            extraction_result.data
        )  # assumed to be a string with page citations
        await ctx.store.set("datasheet_content", datasheet_dict)
        ctx.write_event_to_stream(LogEvent(msg="Datasheet parsed successfully."))
        return DatasheetParseEvent(datasheet_content=datasheet_dict)

    @step
    async def load_requirements(
        self, ctx: Context, ev: DatasheetParseEvent
    ) -> RequirementsLoadEvent:
        # Use the pre-loaded requirements text from __init__
        req_text = self.requirements_text
        ctx.write_event_to_stream(LogEvent(msg="Design requirements loaded."))
        return RequirementsLoadEvent(requirements_text=req_text)

    @step
    async def generate_comparison_report(
        self, ctx: Context, ev: RequirementsLoadEvent
    ) -> StopEvent:
        
        print(ctx)
        # Build a prompt that injects both the extracted datasheet content and the design requirements
        datasheet_content = await ctx.store.get("datasheet_content")
        prompt_str = """
You are an expert renewable energy engineer.

Compare the following solar panel datasheet information with the design requirements.

Design Requirements:
{requirements_text}

Extracted Datasheet Information:
{datasheet_content}

Generate a detailed comparison report in JSON format with the following schema:
  - component_name: string
  - meets_requirements: boolean
  - summary: string
  - details: dictionary of comparisons for each parameter

For each parameter (Maximum Power, Minimum Power, Max Length, Max Weight, Certification, Efficiency, Temperature Coefficient, Warranty),
indicate PASS or FAIL and provide brief explanations and recommendations.
"""

        # extract from contract
        prompt = ChatPromptTemplate.from_messages([("user", prompt_str)])

        # Call the LLM to generate the report using the prompt
        report_output = await llm.astructured_predict(
            ComparisonReportOutput,
            prompt,
            requirements_text=ev.requirements_text,
            datasheet_content=str(datasheet_content),
        )
        ctx.write_event_to_stream(LogEvent(msg="Comparison report generated."))
        return StopEvent(
            result={"report": report_output, "datasheet_content": datasheet_content}
        )

## Running the Workflow

In [19]:
import nest_asyncio
nest_asyncio.apply()

# Path to design requirements file (e.g., a text file with design criteria for solar panels)
project_path = "/home/daghbeji/ragragi/genAI_3D_CAD/llamaindex/"
requirements_path = project_path + "full-pipeline-templates/solar-panels/design_requirements.txt"


# Instantiate the workflow
workflow = SolarPanelComparisonWorkflow(
    agent=agent, requirements_path=requirements_path, verbose=True, timeout=180
)

# Run the workflow; pass the datasheet path in the StartEvent
result = await workflow.run(
    datasheet_path=project_path + "data/solar-panels/EU_Datasheet_HoneyM_DE08M.08(II)_2021_A.pdf"
)

Uploading files: 100%|██████████| 1/1 [00:04<00:00,  4.41s/it]
Creating extraction jobs: 100%|██████████| 1/1 [00:00<00:00,  1.48it/s]
Extracting files: 100%|██████████| 1/1 [00:07<00:00,  7.25s/it]


<workflows.context.context.Context object at 0x70d1aaa74c80>


In [None]:
print("\n********Final Comparison Report:********\n")
print(result["report"].model_dump_json(indent=4))
# print("\n********Datasheet Content:********\n", result["datasheet_content"])


********Final Comparison Report:********

{
    "component_name": "TSM-DE08M.08(II)",
    "meets_requirements": false,
    "summary": "The solar panel TSM-DE08M.08(II) does not fully meet the design requirements due to insufficient power output and efficiency, despite meeting other criteria.",
    "details": {
        "maximum_power": {
            "status": "FAIL",
            "explanation": "The maximum power output is 385 W, which is within the required range of 370 W to 420 W. However, the minimum power output is 360 W, which is below the required minimum of 370 W."
        },
        "minimum_power": {
            "status": "FAIL",
            "explanation": "The minimum power output is 360 W, which is below the required minimum of 370 W."
        },
        "max_length": {
            "status": "PASS",
            "explanation": "The maximum length is 1763 mm, which is within the required limit of 1900 mm."
        },
        "max_weight": {
            "status": "PASS",
       

In [21]:
print("\n********Datasheet Content:********\n", result["datasheet_content"])


********Datasheet Content:********
 {'specs': [{'module_name': 'TSM-DE08M.08(II)', 'power_output': {'min_power': 360.0, 'max_power': 385.0, 'unit': 'W'}, 'maximum_efficiency': 21.0, 'temperature_coefficient': -0.36, 'max_length': 1763, 'max_weight': 20, 'warranty': 15, 'certifications': ['IEC61215', 'IEC61730', 'UL1703', 'IEC61701: Salt Mist Corrosion', 'IEC62716: Ammonia Corrosion', 'IEC60068: Blowing Sand', 'ISO9001', 'ISO14001', 'ISO45001', 'ISO14064'], 'page_citations': {}}]}
