# PRACTISE - INTEGRATION TESTING CODE

## SETUP

In [101]:
import os
import sys
import json
import tempfile
import logging
from typing import Dict, List, Any, Optional, Tuple
from dataclasses import dataclass
from pathlib import Path
import subprocess
from dotenv import load_dotenv
import requests
from openai import OpenAI

In [102]:
# logging
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
logger = logging.getLogger("llm_integration_test")

In [103]:
load_dotenv()

True

## DATA STRUCTURES

In [104]:
@dataclass
class RepoInfo:
    """dataclass to store repo information"""

    url: str
    local_path: str
    files: List[Dict[str, Any]]
    languages: List[str]

In [105]:
@dataclass
class IntegrationPoint:
    """class to store integration point information."""

    source: str
    target: str
    type: str
    complexity: int
    description: str

In [106]:
@dataclass
class Component:
    """data class to store component information"""

    name: str
    path: str
    language: str
    importance: int
    dependencies: List[str]
    integration_points: List[IntegrationPoint]

## INTEGRATION FRAMEWORK CLASS

In [None]:
class LLMIntegrationTestFramework:
    """
    main class for the framework.

    this framework analyzes github repos to identify critical integration points and generates a comprehensive testing strategy report.
    """

    def __init__(self, openai_api_key: Optional[str] = None):
        self.openai_api_key = openai_api_key or os.getenv("OPENAI_API_KEY")
        if not self.openai_api_key:  # FALSE IS TRUE,
            raise ValueError(
                "OpenAI API key is required. Set OPENAI_API_KEY environment variable."
            )

        self.client = OpenAI(api_key=self.openai_api_key)

    def clone_repository(self, repo_url: str) -> str:
        logger.info(f"cloning repository: {repo_url}")

        temp_dir = tempfile.mkdtemp()

        try:
            result = subprocess.run(
                ["git", "clone", repo_url, temp_dir],
                check=True,
                capture_output=True,
                text=True,
            )
            logger.info(f"repository cloned to {temp_dir}")
            return temp_dir
        except subprocess.CalledProcessError as e:
            logger.error(f"failed to clone repository: {e.stderr}")
            raise RuntimeError(f"Failed to clone repository: {e.stderr}")

    def scan_repository(self, repo_path: str) -> RepoInfo:
        """
        Scan a repository to extract file information.

        Args:
            repo_path: Path to the repository.

        Returns:
            RepoInfo object containing repository information.
        """
        logger.info(f"Scanning repository: {repo_path}")

        repo_url = self._get_repo_url(repo_path)
        files = []
        languages = set()

        # Walk through the repository
        for root, _, filenames in os.walk(repo_path):
            for filename in filenames:
                # Skip hidden files and directories
                if filename.startswith(".") or "/.git/" in root:
                    continue

                file_path = Path(root) / filename
                relative_path = file_path.relative_to(repo_path)

                # Try to detect language based on file extension
                ext = file_path.suffix.lower()
                language = self._detect_language(ext)
                if language:
                    languages.add(language)

                # Only include relevant files
                if language or ext in [".json", ".yaml", ".yml", ".xml", ".md"]:
                    try:
                        # Read file content (limit to first 1000 lines to avoid memory issues)
                        with open(
                            file_path, "r", encoding="utf-8", errors="ignore"
                        ) as f:
                            content = "".join(f.readlines()[:1000])

                        files.append(
                            {
                                "path": str(relative_path),
                                "language": language,
                                "content": (
                                    content
                                    if len(content) < 50000
                                    else f"{content[:25000]}... [content truncated] ...{content[-25000:]}"
                                ),
                                "size": file_path.stat().st_size,
                            }
                        )
                    except Exception as e:
                        logger.warning(f"Error reading file {file_path}: {str(e)}")

        return RepoInfo(
            url=repo_url, local_path=repo_path, files=files, languages=list(languages)
        )

    def _get_repo_url(self, repo_path: str) -> str:
        try:
            result = subprocess.run(
                ["git", "-C", repo_path, "config", "--get", "remote.origin.url"],
                check=True,
                capture_output=True,
                text=True,
            )
            return result.stdout.strip()
        except subprocess.CalledProcessError:
            return "Unknown"

    def _detect_language(self, extension: str) -> Optional[str]:
        language_map = {
            ".py": "Python",
            ".js": "JavaScript",
            ".ts": "TypeScript",
            ".jsx": "JavaScript",
            ".tsx": "TypeScript",
            ".java": "Java",
            ".c": "C",
            ".cpp": "C++",
            ".h": "C/C++",
            ".cs": "C#",
            ".go": "Go",
            ".rb": "Ruby",
            ".php": "PHP",
            ".swift": "Swift",
            ".kt": "Kotlin",
            ".rs": "Rust",
            ".scala": "Scala",
            ".html": "HTML",
            ".css": "CSS",
            ".sql": "SQL",
        }
        return language_map.get(extension)

    def analyze_repository(self, repo_info: RepoInfo) -> Dict[str, Any]:
        """
        Analyze a repository to identify components and integration points.

        Args:
            repo_info: Repository information.

        Returns:
            Dictionary containing analysis results.
        """
        logger.info(f"Analyzing repository: {repo_info.url}")

        # Prepare the prompt for OpenAI
        prompt = self._create_analysis_prompt(repo_info)

        # Call OpenAI API
        try:
            response = self.client.chat.completions.create(
                model="gpt-4o-mini",
                messages=[
                    {
                        "role": "system",
                        "content": "You are an expert software architect specializing in integration testing. You analyze code repositories to identify critical components, integration points, and recommend testing strategies.",
                    },
                    {"role": "user", "content": prompt},
                ],
                temperature=0.2,
            )

            analysis_text = response.choices[0].message.content

            # Try to extract JSON from the response
            try:
                # Look for JSON block in the response
                json_start = analysis_text.find("```json")
                json_end = analysis_text.rfind("```")

                if json_start != -1 and json_end != -1:
                    json_text = analysis_text[json_start + 7 : json_end].strip()
                    analysis_result = json.loads(json_text)
                else:
                    # Try to parse the entire response as JSON
                    analysis_result = json.loads(analysis_text)

                return analysis_result
            except json.JSONDecodeError:
                logger.warning(
                    "Failed to parse JSON from OpenAI response, returning raw text"
                )
                return {"raw_analysis": analysis_text}

        except Exception as e:
            logger.error(f"Error calling OpenAI API: {str(e)}")
            raise

    def _create_analysis_prompt(self, repo_info: RepoInfo) -> str:
        """Create a prompt for the OpenAI API to analyze the repository."""
        # Create a summary of the repository
        file_count = len(repo_info.files)
        language_summary = ", ".join(repo_info.languages)

        # Build a list of files to include in the prompt
        included_files = []
        total_content_length = 0
        max_content_length = 100000  # Limit to avoid exceeding OpenAI's token limit

        for file in repo_info.files:
            # Skip very large files
            if file["size"] > 100000:
                continue

            # Add file content until we reach the maximum
            content_length = len(file["content"])
            if total_content_length + content_length <= max_content_length:
                included_files.append(file)
                total_content_length += content_length
            else:
                # Just add the file path without content
                included_files.append(
                    {
                        "path": file["path"],
                        "language": file["language"],
                        "content": "[Content omitted due to size constraints]",
                        "size": file["size"],
                    }
                )

        # Create the prompt
        prompt = f"""
        # Repository Analysis Request

        Analyze the following GitHub repository to identify integration testing needs:

        - Repository URL: {repo_info.url}
        - Languages: {language_summary}
        - File count: {file_count}

        ## Repository Structure

        I'll provide a selection of file contents below. Please analyze these to identify:

        1. Critical components and their dependencies
        2. Integration points between components
        3. Recommended integration testing approaches
        4. Test prioritization based on component criticality
        5. Specific test strategy recommendations

        For each integration point, assess:
        - Type (API, database, service-to-service, etc.)
        - Complexity (1-5 scale, where 5 is most complex)
        - Testing approach recommendations

        ## Files

        """

        for file in included_files:
            prompt += f"""
        ### {file['path']} ({file['language'] or 'Unknown'})

        ```
        {file['content']}
        ```

        """

        prompt += """
        ## Response Format

        Please provide your analysis in JSON format with the following structure:

        ```json
        {
          "components": [
            {
              "name": "string",
              "path": "string",
              "language": "string",
              "description": "string",
              "dependencies": ["string"],
              "importance": 1-5
            }
          ],
          "integration_points": [
            {
              "source": "string",
              "target": "string",
              "type": "string",
              "complexity": 1-5,
              "description": "string",
              "testing_approach": "string"
            }
          ],
          "testing_strategy": {
            "recommended_approach": "string",
            "justification": "string",
            "test_order": ["string"],
            "critical_areas": ["string"]
          },
          "recommendations": [
            {
              "description": "string",
              "priority": "string",
              "effort": "string"
            }
          ]
        }
        ```

        Focus on providing actionable insights for integration testing.
        """

        return prompt

    # start from here
    def generate_report():
        pass

## TEST FRAMEWORK

In [108]:
framework = LLMIntegrationTestFramework()
framework.clone_repository(
    repo_url="https://github.com/Friend09/llm-smoke-test-framework"
)

2025-04-28 11:24:52,755 - llm_integration_test - INFO - cloning repository: https://github.com/Friend09/llm-smoke-test-framework
2025-04-28 11:24:54,222 - llm_integration_test - INFO - repository cloned to /var/folders/43/572h075x24q9rq1slmdfw9yw0000gn/T/tmp_l1f8lel


'/var/folders/43/572h075x24q9rq1slmdfw9yw0000gn/T/tmp_l1f8lel'

In [113]:
repo_info = framework.scan_repository(
    repo_path="/var/folders/43/572h075x24q9rq1slmdfw9yw0000gn/T/tmp_l1f8lel"
)

repo_info.languages

2025-04-28 11:26:59,366 - llm_integration_test - INFO - Scanning repository: /var/folders/43/572h075x24q9rq1slmdfw9yw0000gn/T/tmp_l1f8lel


['Python']

In [114]:
repo_analysis = framework.analyze_repository(repo_info=repo_info)
repo_analysis

2025-04-28 11:27:56,009 - llm_integration_test - INFO - Analyzing repository: https://github.com/Friend09/llm-smoke-test-framework
2025-04-28 11:28:16,307 - httpx - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


{'components': [{'name': 'Config',
   'path': 'config/config.py',
   'language': 'Python',
   'description': 'Configuration class for the LLM Smoke Test Framework, managing environment variables and settings.',
   'dependencies': [],
   'importance': 5},
  {'name': 'WebCrawler',
   'path': 'core/crawler.py',
   'language': 'Python',
   'description': 'Responsible for crawling web pages and extracting data, including screenshots.',
   'dependencies': ['Config'],
   'importance': 5},
  {'name': 'LLMAnalyzer',
   'path': 'core/llm_analyzer.py',
   'language': 'Python',
   'description': 'Analyzes web pages using OpenAI models, including both text and vision capabilities.',
   'dependencies': ['Config'],
   'importance': 5},
  {'name': 'TestGenerator',
   'path': 'core/test_generator.py',
   'language': 'Python',
   'description': 'Generates test scripts based on the analysis of web pages.',
   'dependencies': ['Config', 'LLMAnalyzer'],
   'importance': 5},
  {'name': 'SitemapCrawler',
   