# PRACTISE: TEST THE CODE

## CONFIG

In [2]:
import os
from dataclasses import dataclass
from typing import List, Optional
from dotenv import load_dotenv

In [3]:
load_dotenv()

True

In [4]:
# config/config.py
import os
from dataclasses import dataclass
from typing import Optional
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()


@dataclass
class Config:
    """Configuration for the LLM Smoke Test Framework."""

    # Crawler settings
    HEADLESS: bool = True
    PAGE_LOAD_TIMEOUT: int = 30
    CAPTURE_SCREENSHOTS: bool = True
    ANALYZE_LAYOUT: bool = True
    CHROME_DRIVER_PATH: Optional[str] = None

    # Output settings
    OUTPUT_DIR: str = "output"
    BASE_URL: str = ""  # Base URL for the application under test

    # LLM settings
    OPENAI_API_KEY: Optional[str] = None
    LLM_MODEL: str = "gpt-4o-mini"  # Using non-vision model
    LLM_TEMPERATURE: float = 0.0
    LLM_MAX_TOKENS: int = 500  # Further reduced for split analysis
    LLM_MAX_CONTEXT: int = 8000  # Maximum context size for mini model
    VISUAL_ANALYSIS_TOKENS: int = 300  # Specific limit for visual analysis

    # Screenshot optimization settings
    SCREENSHOT_MAX_DIMENSION: int = 1280  # Maximum dimension in pixels
    SCREENSHOT_QUALITY: int = 75  # JPEG quality (1-100)

    def __post_init__(self):
        """Load configuration from environment variables."""
        # Load from environment variables
        self.OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", self.OPENAI_API_KEY)
        self.CHROME_DRIVER_PATH = os.getenv(
            "CHROME_DRIVER_PATH", self.CHROME_DRIVER_PATH
        )
        self.OUTPUT_DIR = os.getenv("OUTPUT_DIR", self.OUTPUT_DIR)
        self.BASE_URL = os.getenv("BASE_URL", self.BASE_URL)
        self.LLM_MODEL = os.getenv("LLM_MODEL", self.LLM_MODEL)
        self.LLM_TEMPERATURE = float(
            os.getenv("LLM_TEMPERATURE", str(self.LLM_TEMPERATURE))
        )
        self.LLM_MAX_TOKENS = int(os.getenv("LLM_MAX_TOKENS", str(self.LLM_MAX_TOKENS)))

        # Create output directories
        self._create_output_directories()

        self.validate()

    def validate(self):
        """Validate configuration."""
        if not self.OPENAI_API_KEY:
            raise ValueError("OPENAI_API_KEY must be set")

    def _create_output_directories(self):
        """Create all required output directories."""
        directories = {
            "page_data_path": os.path.join(self.OUTPUT_DIR, "page_data"),
            "analysis_path": os.path.join(self.OUTPUT_DIR, "analysis"),
            "test_scripts_path": os.path.join(self.OUTPUT_DIR, "test_scripts"),
            "screenshots_path": os.path.join(self.OUTPUT_DIR, "screenshots"),
        }

        for path in directories.values():
            os.makedirs(path, exist_ok=True)

        # Add directory paths as properties
        for name, path in directories.items():
            setattr(self, name, path)

In [5]:
config = Config()

## CORE

### CRAWLERS

### LLM ANALYZER

In [6]:
import sys
import os

project_root = "/Users/vamsi_mbmax/Library/CloudStorage/OneDrive-Personal/01_vam_PROJECTS/PROFESSIONAL/proj_llm_smoke_test_framework"
if project_root not in sys.path:
    print("found the project_root. appending to the path")
    sys.path.append(project_root)
else:
    print(
        f"did not find the project root. changing the working directory to project root :{project_root}"
    )
    os.chdir(project_root)

found the project_root. appending to the path


In [7]:
import os
import json
import logging
from langchain_openai import ChatOpenAI
from langchain.prompts import ChatMessagePromptTemplate
from langchain.output_parsers import StructuredOutputParser, ResponseSchema
from config.config import Config

from openai import OpenAI
import re
from core.screenshot_utils import optimize_screenshot

In [8]:
class LLMAnalyzer:
    """
    LLM-based analyzer that processes page data and generates test information.
    """

    def __init__(self, config: Config):
        """
        Initialize the LLM analyzer.

        Args:
            config (Config): Configuration object
        """
        self.config = config
        self.config.validate()  # Ensure required settings are present

        # Initialize OpenAI client
        self.llm = ChatOpenAI(
            api_key=self.config.OPENAI_API_KEY,
            model=self.config.LLM_MODEL,
            temperature=self.config.LLM_TEMPERATURE,
            max_tokens=self.config.LLM_MAX_TOKENS,
        )

        # Direct OpenAI client for vision capabilities
        self.openai_client = OpenAI(api_key=self.config.OPENAI_API_KEY)

    def analyze_page(self, page_data):
        """Analyze page data to identify key elements for testing."""
        try:
            # More aggressive data simplification
            def truncate_text(text, max_length=500):
                return (
                    text[:max_length]
                    if isinstance(text, str) and len(text) > max_length
                    else text
                )

            # Create a simplified version of the page data to stay within token limits
            simplified_data = {
                "url": page_data.get("url", ""),
                "title": page_data.get("title", ""),
            }

            # Process elements - only keep key interactive elements
            if "elements" in page_data:
                # Sort elements by importance (prefer elements with IDs, then with text content)
                def element_importance(elem):
                    has_id = elem.get("id", "") != ""
                    has_text = elem.get("text", "") != ""
                    has_name = elem.get("name", "") != ""
                    interactive = elem.get("tag", "") in [
                        "button",
                        "a",
                        "input",
                        "select",
                    ]
                    return (interactive, has_id, has_text, has_name)

                elements = page_data.get("elements", [])
                sorted_elements = sorted(elements, key=element_importance, reverse=True)

                # Take the top N elements and simplify them
                simplified_elements = []
                for elem in sorted_elements[:20]:  # Increased from 10 to 20 elements
                    simple_elem = {}
                    # Only keep the most important attributes
                    for key in ["id", "tag", "type", "name", "text", "class"]:
                        if key in elem and elem[key]:
                            # Truncate text values to reduce token count
                            if isinstance(elem[key], str) and len(elem[key]) > 100:
                                simple_elem[key] = elem[key][:100] + "..."
                            else:
                                simple_elem[key] = elem[key]
                    simplified_elements.append(simple_elem)

                simplified_data["elements"] = simplified_elements

            # Process forms - very important for testing
            if "forms" in page_data:
                forms = page_data.get("forms", [])
                simplified_forms = []
                for form in forms[:3]:
                    simple_form = {
                        "id": form.get("id", ""),
                        "action": form.get("action", ""),
                        "method": form.get("method", ""),
                        "inputs": [],
                    }
                    for input_field in form.get("inputs", [])[:5]:
                        simple_input = {}
                        for key in ["id", "name", "type", "required"]:
                            if key in input_field and input_field[key]:
                                simple_input[key] = input_field[key]
                        simple_form["inputs"].append(simple_input)
                    simplified_forms.append(simple_form)

                simplified_data["forms"] = simplified_forms

            # Process headings - helpful for understanding page structure
            if "headings" in page_data:
                headings = page_data.get("headings", [])
                simplified_headings = []
                for heading in headings[:5]:
                    simplified_headings.append(
                        {
                            "level": heading.get("level", ""),
                            "text": truncate_text(heading.get("text", ""), 100),
                        }
                    )
                simplified_data["headings"] = simplified_headings

            # Make the prompt clearer and more structured
            prompt_template = """
            I'm an expert web tester analyzing a webpage to generate smoke test information.

            PAGE URL: {url}
            PAGE TITLE: {title}

            ELEMENTS:
            {elements_info}

            FORMS:
            {forms_info}

            HEADINGS:
            {headings_info}

            Based on this data, I need to provide:

            1. KEY ELEMENTS:
            List the most important elements that should be tested.

            2. UNIQUE IDENTIFIERS:
            List unique ways to identify this page in tests (title, URL patterns, unique elements).

            3. RECOMMENDED SMOKE TEST STEPS:
            List 5-10 concise steps for smoke testing this page.

            4. SUGGESTED LOCATOR STRATEGIES:
            List element: locator pairs for important elements (use best practice selectors).
            """

            # Format element info
            elements_info = "ELEMENTS:\n"
            for elem in simplified_data.get("elements", []):
                elements_info += f"- {elem.get('tag', '')}"
                if elem.get("id"):
                    elements_info += f" id='{elem.get('id')}'"
                if elem.get("type"):
                    elements_info += f" type='{elem.get('type')}'"
                if elem.get("text"):
                    elements_info += f" text='{elem.get('text')}'"
                elements_info += "\n"

            # Format form info
            forms_info = "FORMS:\n"
            for form in simplified_data.get("forms", []):
                forms_info += f"- Form"
                if form.get("id"):
                    forms_info += f" id='{form.get('id')}'"
                forms_info += f" method='{form.get('method', '')}'\n"
                for input_field in form.get("inputs", []):
                    forms_info += f"  - Input"
                    if input_field.get("id"):
                        forms_info += f" id='{input_field.get('id')}'"
                    if input_field.get("type"):
                        forms_info += f" type='{input_field.get('type')}'"
                    if input_field.get("name"):
                        forms_info += f" name='{input_field.get('name')}'"
                    forms_info += "\n"

            # Format headings info
            headings_info = "HEADINGS:\n"
            for heading in simplified_data.get("headings", []):
                headings_info += (
                    f"- H{heading.get('level', '')}: {heading.get('text', '')}\n"
                )

            # Format the prompt
            formatted_prompt = prompt_template.format(
                url=simplified_data.get("url", ""),
                title=simplified_data.get("title", ""),
                elements_info=elements_info,
                forms_info=forms_info,
                headings_info=headings_info,
            )

            # get LLM response
            print(len(formatted_prompt))
            response = self.llm.invoke(formatted_prompt)
            return self._process_analysis_response(response.content, simplified_data)

        except Exception as e:
            # logger.error(f"Error analyzing page with LLM: {str(e)}")
            return {
                "url": page_data.get("url", ""),
                "title": page_data.get("title", ""),
                "error": str(e),
                "page_title_validation": page_data.get("title", ""),
                "unique_identifiers": ["URL: " + page_data.get("url", "")],
                "key_elements": [],
                "smoke_test_steps": [
                    "Visit the page and verify it loads",
                    f"Check page title is '{page_data.get('title', '')}'",
                ],
                "locator_strategies": {},
            }

    def _process_analysis_response(self, response_content, page_data):
        pass

## OUTPUT

## TESTS