In [6]:
from langchain_core.tools import tool
from langchain_ollama import ChatOllama
from langchain_community.tools import DuckDuckGoSearchResults
from langgraph.prebuilt import ToolNode
from langchain_community.utilities import DuckDuckGoSearchAPIWrapper

wrapper = DuckDuckGoSearchAPIWrapper(max_results=3)

search = DuckDuckGoSearchResults(api_wrapper=wrapper, output_format='list')



model = ChatOllama(model="jacob-ebey/phi4-tools:latest")


# @tool
# def magic_function(input: int) -> int:
#     """Applies a magic function to an input."""
#     return input + 2


tools = [search]


# query = "what should be the required materials and experiment procedure of the following lab Experiment (To verify the relationship between voltage (V), current (I), and resistance (R) in an electrical circuit, as expressed by Ohm's Law: V=IRV = IRV=IR)"

In [7]:
from pydantic import BaseModel,Field

class JsonnedOutput(BaseModel):
    title:str =Field(description="Title of the experiment")
    content:str = Field(description="the complete output of the agent plus ensuring that the response contains materials reuired, Experiments Steps and safety precautions")

In [4]:
from langchain_core.messages import SystemMessage
from langgraph.prebuilt import create_react_agent

system_message = '''You are a Lab Assistant designed to assist with scientific experiments. Your task is to provide:

Experiment Materials: A list of required materials and equipment.
Experiment Steps: A detailed step-by-step guide for conducting the experiment.
Safety Procedures: Essential precautions to ensure a safe experiment.
Before generating the final response, you will:

Search the query using DuckDuckGo to gather up-to-date and relevant information.
Analyze the results to determine if the information is complete.
Enhance the response by integrating the best available knowledge before providing the final answer.which is not a summary but well explained'''
# This could also be a SystemMessage object
# system_message = SystemMessage(content="You are a helpful assistant. Respond only in Spanish.")

langgraph_agent_executor = create_react_agent(model, tools, prompt=system_message)


messages = langgraph_agent_executor.invoke({"messages": [("user", query)]})

In [6]:
messages['messages']

[HumanMessage(content="what should be the required materials and experiment procedure of the following lab Experiment (To verify the relationship between voltage (V), current (I), and resistance (R) in an electrical circuit, as expressed by Ohm's Law: V=IRV = IRV=IR)", additional_kwargs={}, response_metadata={}, id='b3c57be0-1673-4c6b-bf01-8f5896ed9c78'),
 AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'jacob-ebey/phi4-tools:latest', 'created_at': '2025-02-14T05:22:35.615594999Z', 'done': True, 'done_reason': 'stop', 'total_duration': 3097766571, 'load_duration': 9775914, 'prompt_eval_count': 343, 'prompt_eval_duration': 66000000, 'eval_count': 211, 'eval_duration': 3018000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-b71eaee2-ded2-4882-b231-ad22a9c18efb-0', tool_calls=[{'name': 'duckduckgo_results_json', 'args': {'query': "Ohm's Law experiment procedure and materials"}, 'id': '5bdea9ed-b984-4517-961a-d926c34428ed

In [12]:
experiment_name = [
    "To verify the relationship between voltage (V), current (I), and resistance (R) in an electrical circuit, as expressed by Ohm's Law: V=IRV = IRV=IR",
    "To apply Kirchhoff’s Voltage Law (KVL) and Kirchhoff’s Current Law (KCL) to analyze and validate simple electrical circuits.",
    "To study the behavior of resistors in series and parallel configurations, including the equivalent resistance calculation.",
    "To study the forward and reverse bias characteristics of a PN junction diode.",
    "To analyze voltage regulation using a Zener diode.",
    "To construct and analyze the performance of half-wave and full-wave rectifiers, both with and without filters.",
    "To analyze and implement wave-shaping circuits using diodes for clipping and clamping applications.",
    "To analyze the input and output characteristics of Bipolar Junction Transistors (BJTs) and Field-Effect Transistors (FETs).",
    "To design a common emitter amplifier and analyze its frequency response.",
    "To implement and analyze operational amplifier (Op-Amp) circuits: inverting, non-inverting, summing, and differentiator configurations.",
    "To verify the operation of basic logic gates: AND, OR, NOT, NAND, NOR, XOR, and XNOR",
    "To investigate the charging and discharging behavior of a capacitor in an RC circuit and understand the time constant.",
    "To investigate the resonance behavior of an LC circuit and measure the resonant frequency.",
    "To calibrate an oscilloscope for accurate measurements of voltage and time.",
    "To determine the turns ratio of a transformer and verify the relationship between the primary and secondary voltages.",
    "To construct and analyze a bridge rectifier circuit and compare its performance with a half-wave rectifier.",
    "To study the frequency response of a low-pass filter and determine its cutoff frequency.",
    "To measure the input and output impedances of an operational amplifier (Op-Amp) in a given configuration.",
    "To generate and analyze Lissajous figures using an oscilloscope by applying two sinusoidal signals with different frequencies.",
    "To study the magnetic field produced by a solenoid and verify the relationship between current and magnetic field strength."
]
system_message = '''You are a Lab Assistant designed to assist with scientific experiments. Your task is to provide:

Experiment Materials: A list of required materials and equipment.
Experiment Steps: A detailed step-by-step guide for conducting the experiment.
Safety Procedures: Essential precautions to ensure a safe experiment.
Before generating the final response, you will:

Search the query using DuckDuckGo to gather up-to-date and relevant information.
Analyze the results to determine if the information is complete.
Enhance the response by integrating the best available knowledge before providing the final answer.which is not a summary but well explained'''

In [17]:
from langgraph.prebuilt import create_react_agent
def run_graph(exp:str,model=model,tools=tools,system_message=system_message):
    query = f"what should be the required materials and experiment procedure of the following lab Experiment ({exp})?"
    langgraph_agent_executor = create_react_agent(model, tools, prompt=system_message)
    while True:
        try:
            messages = langgraph_agent_executor.invoke({"messages": [("user", query)]})
            return messages
        except Exception as e:
            print(e)

In [16]:
import json
from tqdm import tqdm
message_len_list = []
for i in tqdm(range(10),desc="sample No"):
    fname = f"deepseekReAct_Final_{i}"
    for exp in tqdm(experiment_name,desc="experiment"):
        # query = f"what should be the required materials and experiment procedure of the following lab Experiment ({exp})?"
        # langgraph_agent_executor = create_react_agent(model, tools, prompt=system_message)
        
        # messages = langgraph_agent_executor.invoke({"messages": [("user", query)]})
        messages = run_graph(exp,model=model,tools=tools,system_message=system_message)
        # print(messages)
        message_len_list.append(len(messages['messages']))
        try:
            with open("testing_phi4/"+fname+".json","r",encoding='utf-8') as F:
                existing_data = json.load(F)
        except (FileNotFoundError, json.JSONDecodeError):
            existing_data = {"experiments": []}  # Default structure if file is empty or missing
        json_output = {exp:messages['messages'][-1].content}
        existing_data["experiments"].append(json_output)
        with open("testing_phi4/"+fname+".json","w",encoding='utf-8') as F:
            json.dump(existing_data,F,indent=4)
        

experiment:   0%|          | 0/20 [00:12<?, ?it/s]
sample No:   0%|          | 0/10 [00:12<?, ?it/s]


KeyboardInterrupt: 

In [18]:
message = run_graph("To prepare a butt joint with mild steel strip using MAG& MMAW technique")

In [22]:
from IPython.display import Markdown, display

content = message['messages'][-1].content
display(Markdown(content))

To prepare a butt joint with mild steel strip using Metal Active Gas (MAG) and Manual Metal Arc Welding (MMAW) techniques, you'll need specific materials and follow detailed procedures. Here's a comprehensive guide:

### Experiment Materials

#### For MAG Welding:
- **Mild Steel Strips**: Ensure they are clean and free from rust or oil.
- **Wire Feed Welder**: Suitable for MAG welding.
- **Shielded Metal Arc (SMA) Wire**: Typically ER70S-6 for mild steel.
- **Gas Cylinder**: Argon-CO2 mix, commonly 75% argon and 25% CO2.
- **Welding Helmet with Auto-Darkening Feature**
- **Clamps or Vise**: To hold the workpieces in place.
- **Wire Feeder and Contact Tip**: Compatible with your welder.
- **Ground Clamp**

#### For MMAW Welding:
- **Mild Steel Strips**: Same as above, ensure cleanliness.
- **Stick Electrodes**: E7018 is commonly used for mild steel.
- **Welding Helmet**
- **Clamps or Vise**
- **Wire Brush and Grinder**: For cleaning the joint area.

### Experiment Steps

#### MAG Welding Procedure:
1. **Preparation**:
   - Clean the mild steel strips using a wire brush to remove any rust, paint, or oil.
   - Align the edges of the steel strips to form a butt joint.
   - Clamp the workpieces securely in place.

2. **Setup**:
   - Install the SMA wire into the welder and connect it to the wire feeder.
   - Attach the gas cylinder to the welder and set the flow rate according to the manufacturer's specifications (usually around 20-25 CFH).
   - Connect the ground clamp to a clean, unpainted part of the workpiece.

3. **Welding**:
   - Put on your welding helmet with an auto-darkening filter.
   - Set the welder parameters: voltage and wire feed speed as per the electrode manufacturer's recommendations.
   - Start welding by moving the torch along the joint at a consistent speed, maintaining a 15-20 degree angle to the workpiece.

4. **Post-Welding**:
   - Allow the welded joint to cool naturally.
   - Inspect the weld for defects such as porosity or undercutting.

#### MMAW Welding Procedure:
1. **Preparation**:
   - Clean the mild steel strips thoroughly with a wire brush.
   - Align and clamp the edges of the steel strips to form a butt joint.

2. **Setup**:
   - Select the appropriate stick electrode (E7018) for mild steel.
   - Strike an arc by lightly tapping the electrode on the workpiece, then maintain a short arc length as you weld.

3. **Welding**:
   - Wear your welding helmet and start welding along the joint.
   - Use a weaving motion if necessary to ensure full penetration of the joint.
   - Maintain a consistent travel speed and electrode angle (about 15 degrees).

4. **Post-Welding**:
   - Let the weld cool naturally.
   - Clean any slag from the weld surface using a chipping hammer or grinder.

### Safety Procedures

- **Personal Protective Equipment (PPE)**: Always wear a welding helmet, gloves, and flame-resistant clothing.
- **Ventilation**: Ensure proper ventilation to avoid inhaling fumes. Use exhaust fans if necessary.
- **Fire Safety**: Keep a fire extinguisher nearby and remove any flammable materials from the vicinity.
- **Electrical Safety**: Inspect all cables and equipment for damage before use.
- **Eye Protection**: Never look directly at the welding arc without proper eye protection.

By following these guidelines, you can safely and effectively prepare a butt joint using both MAG and MMAW techniques.

In [23]:
message['messages']

[HumanMessage(content='what should be the required materials and experiment procedure of the following lab Experiment (To prepare a butt joint with mild steel strip using MAG& MMAW technique)?', additional_kwargs={}, response_metadata={}, id='408ed4ac-73e8-4517-a1cf-91911e8667e0'),
 AIMessage(content='', additional_kwargs={}, response_metadata={'model': 'jacob-ebey/phi4-tools:latest', 'created_at': '2025-02-18T07:02:21.396538352Z', 'done': True, 'done_reason': 'stop', 'total_duration': 2697404778, 'load_duration': 9806199, 'prompt_eval_count': 318, 'prompt_eval_duration': 34000000, 'eval_count': 186, 'eval_duration': 2650000000, 'message': Message(role='assistant', content='', images=None, tool_calls=None)}, id='run-4e0c260f-4c86-4117-a746-b308815a2456-0', tool_calls=[{'name': 'duckduckgo_results_json', 'args': {'query': 'Materials and procedure for preparing a butt joint with mild steel using MAG & MMAW'}, 'id': '288766ea-97cf-4411-a280-a4cfc016bf57', 'type': 'tool_call'}], usage_meta

In [None]:
!pip install crawl4ai


Collecting crawl4ai
  Downloading Crawl4AI-0.4.248-py3-none-any.whl.metadata (29 kB)
Collecting aiosqlite~=0.20 (from crawl4ai)
  Downloading aiosqlite-0.21.0-py3-none-any.whl.metadata (4.3 kB)
Collecting litellm>=1.53.1 (from crawl4ai)
  Downloading litellm-1.61.8-py3-none-any.whl.metadata (37 kB)
Collecting pillow~=10.4 (from crawl4ai)
  Using cached pillow-10.4.0-cp312-cp312-win_amd64.whl.metadata (9.3 kB)
Collecting playwright>=1.49.0 (from crawl4ai)
  Downloading playwright-1.50.0-py3-none-win_amd64.whl.metadata (3.5 kB)
Collecting tf-playwright-stealth>=1.1.0 (from crawl4ai)
  Downloading tf_playwright_stealth-1.1.1-py3-none-any.whl.metadata (2.6 kB)
Collecting xxhash~=3.4 (from crawl4ai)
  Using cached xxhash-3.5.0-cp312-cp312-win_amd64.whl.metadata (13 kB)
Collecting rank-bm25~=0.2 (from crawl4ai)
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Collecting aiofiles>=24.1.0 (from crawl4ai)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting

  You can safely remove it manually.

[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [34]:
!crawl4ai-doctor

[INIT].... → Running Crawl4AI health check...


[INIT].... → Crawl4AI 0.4.248
[TEST].... ℹ Testing crawling capabilities...
[EXPORT].. ℹ Exporting PDF and taking screenshot took 1.65s
[FETCH]... ↓ https://crawl4ai.com... | Status: True | Time: 6.84s
[SCRAPE].. ◆ Processed https://crawl4ai.com... | Time: 42ms
[COMPLETE] ● https://crawl4ai.com... | Status: True | Total: 6.88s
[COMPLETE] ● ✅ Crawling test passed!


In [37]:
import asyncio
import nest_asyncio
nest_asyncio.apply()
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())

In [55]:
import subprocess

# Define input arguments
script_path = "scrapped_crawler.py"
output_file = "crawl_output.txt"
webpage_url = "https://www.millerwelds.com/resources/article-library/understanding-the-basics-of-mig-welding-for-mild-steel"
model_name = "ollama/jacob-ebey/phi4-tools:latest"
base_url = "http://192.168.23.138:11439"
system_prompt = "Extract only the relevant main body content from the page."
schema_file = "schema.json"  # This file should contain the schema in JSON format

# Run the script as a subprocess with arguments
import subprocess
import sys

# Define the virtual environment's Python path
venv_python = ".venv/Scripts/python" if sys.platform == "win32" else ".venv/bin/python"

# Run the script using the virtual environment's Python
result = subprocess.run(
    [
        venv_python, script_path, output_file, webpage_url,
        "--model", model_name,
        "--base_url", base_url,
        "--system_prompt", system_prompt,
        "--schema_file", schema_file
    ],
    
    capture_output=True,
    text=True,
    encoding="utf-8"  # Explicitly set UTF-8 encoding
)

print("Subprocess Output:", result.stdout)
print("Subprocess Errors:", result.stderr)


# Print any errors from the subprocess (if any)
print("Subprocess stderr:", result.stderr)

# Read and display the content of the output file
with open(output_file, "r", encoding="utf-8") as f:
    print("Captured Output:\n", f.read())


Subprocess Output: 
--- Extracting Structured Data from https://www.millerwelds.com/resources/article-library/understanding-the-basics-of-mig-welding-for-mild-steel using ollama/jacob-ebey/phi4-tools:latest ---
[INIT].... → Crawl4AI 0.4.248
[FETCH]... ↓ https://www.millerwelds.com/resources/article-libr... | Status: True | Time: 12.07s
[SCRAPE].. ◆ Processed https://www.millerwelds.com/resources/article-libr... | Time: 170ms
[EXTRACT]. ■ Completed for https://www.millerwelds.com/resources/article-libr... | Time: 22.537717799999882s
[COMPLETE] ● https://www.millerwelds.com/resources/article-libr... | Status: True | Total: 34.78s

Subprocess Errors: 
Subprocess stderr: 
Captured Output:
 [
    {
        "title": "How to MIG Weld Mild Steel",
        "content": "MIG welding mild steel is a popular method for joining metal pieces due to its speed and ease of use. This guide covers essential steps and tips for effective MIG welding on mild steel.\n\n**Preparation:**\n- **Clean the Metal:** 

In [52]:
import json
from pydantic import BaseModel, Field

class ScrappedData(BaseModel):
    title: str = Field(..., description="The title of the webpage or the blog")
    content: str = Field(..., description="Entire Main content of the webpage which is relevant to the title")

# Get the JSON schema
schema_json = json.dumps(ScrappedData.model_json_schema(), indent=4)

# Save to file
with open("schema.json", "w", encoding="utf-8") as f:
    f.write(schema_json)

print("Schema saved successfully to schema.json")


Schema saved successfully to schema.json


In [32]:
import asyncio
import nest_asyncio
nest_asyncio.apply()
import os
import json
# import asyncio
from pydantic import BaseModel, Field
from crawl4ai import AsyncWebCrawler, CrawlerRunConfig
from crawl4ai.extraction_strategy import LLMExtractionStrategy
from typing import Dict
from crawl4ai import BrowserConfig,CacheMode

class OpenAIModelFee(BaseModel):
    model_name: str = Field(..., description="Name of the OpenAI model.")
    input_fee: str = Field(..., description="Fee for input token for the OpenAI model.")
    output_fee: str = Field(
        ..., description="Fee for output token for the OpenAI model."
    )

class ScrappedData(BaseModel):
    title: str = Field(...,description="The title of the webpage or the blog")
    content: str = Field(...,description="Entire Main content of the webpage which is relevant to the title")

async def extract_structured_data_using_llm(
    provider: str, api_token: str = None, extra_headers: Dict[str, str] = None
):
    print(f"\n--- Extracting Structured Data with {provider} ---")

    if api_token is None and provider != "ollama":
        print(f"API token is required for {provider}. Skipping this example.")
        return

    browser_config = BrowserConfig(headless=True)

    extra_args = {"temperature": 0, "top_p": 0.9, "max_tokens": 2000}
    if extra_headers:
        extra_args["extra_headers"] = extra_headers

    crawler_config = CrawlerRunConfig(
        cache_mode=CacheMode.BYPASS,
        word_count_threshold=1,
        page_timeout=80000,
        extraction_strategy=LLMExtractionStrategy(
            provider=provider,
            api_token=api_token,
            schema=ScrappedData.model_json_schema(),
            extraction_type="schema",
            instruction="""From the crawled content, extract all the Main content or the body content which any user would read and leave none""",
            extra_args=extra_args,
            base_url="http://192.168.23.138:11439"
        ),
    )

    async with AsyncWebCrawler(config=browser_config) as crawler:
        result = await crawler.arun(
            url="https://www.millerwelds.com/resources/article-library/understanding-the-basics-of-mig-welding-for-mild-steel", config=crawler_config
        )
        print(result.extracted_content)

if __name__ == "__main__":
    # Use ollama with llama3.3
    asyncio.run(
        extract_structured_data_using_llm(
            provider="ollama/jacob-ebey/phi4-tools:latest", api_token="no-token"
        )
    )

    # asyncio.run(
    #     extract_structured_data_using_llm(
    #         provider="openai/gpt-4o", api_token=os.getenv("OPENAI_API_KEY")
    #     )
    # )


--- Extracting Structured Data with ollama/jacob-ebey/phi4-tools:latest ---


Task exception was never retrieved
future: <Task finished name='Task-6' coro=<Connection.run() done, defined at d:\python\agents\.venv\Lib\site-packages\playwright\_impl\_connection.py:272> exception=NotImplementedError()>
Traceback (most recent call last):
  File "C:\Python312\Lib\asyncio\tasks.py", line 314, in __step_run_and_handle_result
    result = coro.send(None)
             ^^^^^^^^^^^^^^^
  File "d:\python\agents\.venv\Lib\site-packages\playwright\_impl\_connection.py", line 279, in run
    await self._transport.connect()
  File "d:\python\agents\.venv\Lib\site-packages\playwright\_impl\_transport.py", line 133, in connect
    raise exc
  File "d:\python\agents\.venv\Lib\site-packages\playwright\_impl\_transport.py", line 120, in connect
    self._proc = await asyncio.create_subprocess_exec(
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python312\Lib\asyncio\subprocess.py", line 224, in create_subprocess_exec
    transport, protocol = await loop.subprocess

NotImplementedError: 

In [2]:
import requests
import json
import time
import sys
import base64
import os
from typing import Dict, Any


class Crawl4AiTester:
    def __init__(self, base_url: str = "http://192.168.23.138:11235", api_token: str = None):
        self.base_url = base_url
        self.api_token = (
            api_token or os.getenv("CRAWL4AI_API_TOKEN") or "test_api_code"
        )  # Check environment variable as fallback
        self.headers = (
            {"Authorization": f"Bearer {self.api_token}"} if self.api_token else {}
        )

    def submit_and_wait(
        self, request_data: Dict[str, Any], timeout: int = 300
    ) -> Dict[str, Any]:
        # Submit crawl job
        response = requests.post(
            f"{self.base_url}/crawl", json=request_data, headers=self.headers
        )
        if response.status_code == 403:
            raise Exception("API token is invalid or missing")
        task_id = response.json()["task_id"]
        print(f"Task ID: {task_id}")

        # Poll for result
        start_time = time.time()
        while True:
            if time.time() - start_time > timeout:
                raise TimeoutError(
                    f"Task {task_id} did not complete within {timeout} seconds"
                )

            result = requests.get(
                f"{self.base_url}/task/{task_id}", headers=self.headers
            )
            status = result.json()

            if status["status"] == "failed":
                print("Task failed:", status.get("error"))
                raise Exception(f"Task failed: {status.get('error')}")

            if status["status"] == "completed":
                return status

            time.sleep(2)

    def submit_sync(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
        response = requests.post(
            f"{self.base_url}/crawl_sync",
            json=request_data,
            headers=self.headers,
            timeout=60,
        )
        if response.status_code == 408:
            raise TimeoutError("Task did not complete within server timeout")
        response.raise_for_status()
        return response.json()

    def crawl_direct(self, request_data: Dict[str, Any]) -> Dict[str, Any]:
        """Directly crawl without using task queue"""
        response = requests.post(
            f"{self.base_url}/crawl_direct", json=request_data, headers=self.headers
        )
        response.raise_for_status()
        return response.json()


def test_docker_deployment(version="basic"):
    tester = Crawl4AiTester(
        base_url="http://192.168.23.138:11235",
        # base_url="https://api.crawl4ai.com" # just for example
        # api_token="test" # just for example
    )
    print(f"Testing Crawl4AI Docker {version} version")

    # Health check with timeout and retry
    max_retries = 5
    for i in range(max_retries):
        try:
            health = requests.get(f"{tester.base_url}/health", timeout=10)
            print("Health check:", health.json())
            break
        except requests.exceptions.RequestException:
            if i == max_retries - 1:
                print(f"Failed to connect after {max_retries} attempts")
                sys.exit(1)
            print(f"Waiting for service to start (attempt {i+1}/{max_retries})...")
            time.sleep(5)

    # Test cases based on version
    # test_basic_crawl_direct(tester)
    # test_basic_crawl(tester)
    # test_basic_crawl(tester)
    # test_basic_crawl_sync(tester)

    # if version in ["full", "transformer"]:
    #     test_cosine_extraction(tester)

    # test_js_execution(tester)
    # test_css_selector(tester)
    # test_structured_extraction(tester)
    # test_llm_extraction(tester)
    test_llm_with_ollama(tester)
    test_screenshot(tester)


def test_basic_crawl(tester: Crawl4AiTester):
    print("\n=== Testing Basic Crawl ===")
    request = {
        "urls": "https://www.nbcnews.com/business",
        "priority": 10,
        "session_id": "test",
    }

    result = tester.submit_and_wait(request)
    print(f"Basic crawl result length: {len(result['result']['markdown'])}")
    assert result["result"]["success"]
    assert len(result["result"]["markdown"]) > 0


def test_basic_crawl_sync(tester: Crawl4AiTester):
    print("\n=== Testing Basic Crawl (Sync) ===")
    request = {
        "urls": "https://www.nbcnews.com/business",
        "priority": 10,
        "session_id": "test",
    }

    result = tester.submit_sync(request)
    print(f"Basic crawl result length: {len(result['result']['markdown'])}")
    assert result["status"] == "completed"
    assert result["result"]["success"]
    assert len(result["result"]["markdown"]) > 0


def test_basic_crawl_direct(tester: Crawl4AiTester):
    print("\n=== Testing Basic Crawl (Direct) ===")
    request = {
        "urls": "https://www.nbcnews.com/business",
        "priority": 10,
        # "session_id": "test"
        "cache_mode": "bypass",  # or "enabled", "disabled", "read_only", "write_only"
    }

    result = tester.crawl_direct(request)
    print(f"Basic crawl result length: {len(result['result']['markdown'])}")
    assert result["result"]["success"]
    assert len(result["result"]["markdown"]) > 0


def test_js_execution(tester: Crawl4AiTester):
    print("\n=== Testing JS Execution ===")
    request = {
        "urls": "https://www.nbcnews.com/business",
        "priority": 8,
        "js_code": [
            "const loadMoreButton = Array.from(document.querySelectorAll('button')).find(button => button.textContent.includes('Load More')); loadMoreButton && loadMoreButton.click();"
        ],
        "wait_for": "article.tease-card:nth-child(10)",
        "crawler_params": {"headless": True},
    }

    result = tester.submit_and_wait(request)
    print(f"JS execution result length: {len(result['result']['markdown'])}")
    assert result["result"]["success"]


def test_css_selector(tester: Crawl4AiTester):
    print("\n=== Testing CSS Selector ===")
    request = {
        "urls": "https://www.nbcnews.com/business",
        "priority": 7,
        "css_selector": ".wide-tease-item__description",
        "crawler_params": {"headless": True},
        "extra": {"word_count_threshold": 10},
    }

    result = tester.submit_and_wait(request)
    print(f"CSS selector result length: {len(result['result']['markdown'])}")
    assert result["result"]["success"]


def test_structured_extraction(tester: Crawl4AiTester):
    print("\n=== Testing Structured Extraction ===")
    schema = {
        "name": "Coinbase Crypto Prices",
        "baseSelector": ".cds-tableRow-t45thuk",
        "fields": [
            {
                "name": "crypto",
                "selector": "td:nth-child(1) h2",
                "type": "text",
            },
            {
                "name": "symbol",
                "selector": "td:nth-child(1) p",
                "type": "text",
            },
            {
                "name": "price",
                "selector": "td:nth-child(2)",
                "type": "text",
            },
        ],
    }

    request = {
        "urls": "https://www.coinbase.com/explore",
        "priority": 9,
        "extraction_config": {"type": "json_css", "params": {"schema": schema}},
    }

    result = tester.submit_and_wait(request)
    extracted = json.loads(result["result"]["extracted_content"])
    print(f"Extracted {len(extracted)} items")
    print("Sample item:", json.dumps(extracted[0], indent=2))
    assert result["result"]["success"]
    assert len(extracted) > 0


def test_llm_extraction(tester: Crawl4AiTester):
    print("\n=== Testing LLM Extraction ===")
    schema = {
        "type": "object",
        "properties": {
            "model_name": {
                "type": "string",
                "description": "Name of the OpenAI model.",
            },
            "input_fee": {
                "type": "string",
                "description": "Fee for input token for the OpenAI model.",
            },
            "output_fee": {
                "type": "string",
                "description": "Fee for output token for the OpenAI model.",
            },
        },
        "required": ["model_name", "input_fee", "output_fee"],
    }

    request = {
        "urls": "https://openai.com/api/pricing",
        "priority": 8,
        "extraction_config": {
            "type": "llm",
            "params": {
                "provider": "openai/gpt-4o-mini",
                "api_token": os.getenv("OPENAI_API_KEY"),
                "schema": schema,
                "extraction_type": "schema",
                "instruction": """From the crawled content, extract all mentioned model names along with their fees for input and output tokens.""",
            },
        },
        "crawler_params": {"word_count_threshold": 1},
    }

    try:
        result = tester.submit_and_wait(request)
        extracted = json.loads(result["result"]["extracted_content"])
        print(f"Extracted {len(extracted)} model pricing entries")
        print("Sample entry:", json.dumps(extracted[0], indent=2))
        assert result["result"]["success"]
    except Exception as e:
        print(f"LLM extraction test failed (might be due to missing API key): {str(e)}")


def test_llm_with_ollama(tester: Crawl4AiTester):
    print("\n=== Testing LLM with Ollama ===")
    schema = {
        "type": "object",
        "properties": {
            "article_title": {
                "type": "string",
                "description": "The main title of the news article",
            },
            "summary": {
                "type": "string",
                "description": "A brief summary of the article content",
            },
            "main_topics": {
                "type": "array",
                "items": {"type": "string"},
                "description": "Main topics or themes discussed in the article",
            },
        },
    }

    request = {
        "urls": "https://www.nbcnews.com/business",
        "priority": 8,
        "extraction_config": {
            "type": "llm",
            "params": {
                "provider": "ollama/jacob-ebey/phi4-tools:latest",
                "base_url":"http://192.168.23.138:11439",
                "schema": schema,
                "extraction_type": "schema",
                "instruction": "Extract the main article information including title, summary, and main topics.",
            },
        },
        "extra": {"word_count_threshold": 1},
        "crawler_params": {"verbose": True},
    }

    try:
        result = tester.submit_and_wait(request)
        extracted = json.loads(result["result"]["extracted_content"])
        print("Extracted content:", json.dumps(extracted, indent=2))
        assert result["result"]["success"]
    except Exception as e:
        print(f"Ollama extraction test failed: {str(e)}")


def test_cosine_extraction(tester: Crawl4AiTester):
    print("\n=== Testing Cosine Extraction ===")
    request = {
        "urls": "https://www.nbcnews.com/business",
        "priority": 8,
        "extraction_config": {
            "type": "cosine",
            "params": {
                "semantic_filter": "business finance economy",
                "word_count_threshold": 10,
                "max_dist": 0.2,
                "top_k": 3,
            },
        },
    }

    try:
        result = tester.submit_and_wait(request)
        extracted = json.loads(result["result"]["extracted_content"])
        print(f"Extracted {len(extracted)} text clusters")
        print("First cluster tags:", extracted[0]["tags"])
        assert result["result"]["success"]
    except Exception as e:
        print(f"Cosine extraction test failed: {str(e)}")


def test_screenshot(tester: Crawl4AiTester):
    print("\n=== Testing Screenshot ===")
    request = {
        "urls": "https://www.nbcnews.com/business",
        "priority": 5,
        "screenshot": True,
        "crawler_params": {"headless": True},
    }

    result = tester.submit_and_wait(request)
    print("Screenshot captured:", bool(result["result"]["screenshot"]))

    if result["result"]["screenshot"]:
        # Save screenshot
        screenshot_data = base64.b64decode(result["result"]["screenshot"])
        with open("test_screenshot.jpg", "wb") as f:
            f.write(screenshot_data)
        print("Screenshot saved as test_screenshot.jpg")

    assert result["result"]["success"]


if __name__ == "__main__":
    version = sys.argv[1] if len(sys.argv) > 1 else "basic"
    # version = "full"
    test_docker_deployment(version)

Testing Crawl4AI Docker --f=c:\Users\rahul_78wxtz2\AppData\Roaming\jupyter\runtime\kernel-v36b5daab69732ebe7ef7f2a6db2136205ca1dba60.json version
Health check: {'status': 'healthy', 'available_slots': 9, 'memory_usage': 8.2, 'cpu_usage': 0.1}

=== Testing LLM with Ollama ===
Task ID: e18ad9a8-b32a-49e0-bed8-f3abd1997a16
Extracted content: [
  {
    "article_title": "Biden's Supreme Court Pick: A Moderate with a History of Bipartisan Support",
    "summary": "President Joe Biden has nominated Judge Ketanji Brown Jackson for the U.S. Supreme Court, emphasizing her qualifications and bipartisan support. The nomination is seen as a strategic move to maintain balance on the court while addressing diversity.",
    "main_topics": [
      "Supreme Court Nomination",
      "Bipartisan Support",
      "Judicial Qualifications",
      "Diversity in Judiciary"
    ],
    "error": false
  },
  {
    "article_title": "U.S. Supreme Court: A New Era of Judicial Influence",
    "summary": "The U.S. Sup