# Fixed HTML Slicing Pipeline

This notebook implements a working HTML slicing pipeline that:
- Uses LangChain and the existing `generate_slices.md` prompt
- Passes Pydantic model schema as input to avoid template conflicts
- Uses direct LLM calls to avoid ChatPromptTemplate + HTML issues
- Maintains compatibility with existing code structure

## Key Fixes Applied:
1. **Direct LLM invocation** instead of ChatPromptTemplate with HTML
2. **Schema injection** into prompt to maintain structure
3. **Robust JSON parsing** with fallback handling
4. **Conservative changes** to existing codebase


In [1]:
import os
import json
from typing import List
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama
from langchain_core.messages import HumanMessage, SystemMessage
from anthropic._exceptions import OverloadedError


In [None]:
# Existing Pydantic models (unchanged)
class Slice(BaseModel):
    """A contiguous, inclusive range of 0-based line numbers that contains
    content relevant to describing a business/service provider."""
    first_line: int = Field(..., ge=0, description="0-based inclusive start line")
    last_line: int = Field(..., ge=0, description="0-based inclusive end line")

class SliceSet(BaseModel):
    """Top-level container returned by the model."""
    slices: List[Slice] = Field(default_factory=list)

# Existing utility function (updated for tests/ subdirectory)
def load_prompt(prompt_name):
    """Load prompt from prompts directory"""
    with open(f"../prompts/{prompt_name}.md", "r") as f:
        return f.read()


In [None]:
def call_llm_local(model="gpt-oss:20b", prompt_content=None, temperature=0, keep_alive="30m", num_predict=2048):
    """Fixed LLM call that avoids ChatPromptTemplate issues with HTML content."""
    if prompt_content is None:
        raise ValueError("Prompt content is required")
    
    # Use direct LLM call instead of ChatPromptTemplate
    llm = ChatOllama(
        model=model,
        temperature=temperature,
        keep_alive=keep_alive,
        num_predict=num_predict,
        format="json"  # Encourage JSON output
    )
    
    # Direct invocation with HumanMessage
    response = llm.invoke([HumanMessage(content=prompt_content)])
    
    return response


In [None]:
def generate_html_slices_fixed(html_content: str, use_local_llm: bool = True) -> SliceSet:
    """Fixed HTML slicing function that uses the original generate_slices.md prompt
    but avoids ChatPromptTemplate issues by injecting schema and using direct calls."""

    prompt = load_prompt("generate_slices")
    json_schema = SliceSet.model_json_schema()
    prompt = prompt.format(html_content=html_content, output_schema=json_schema)
    
    try:
        # Use the fixed LLM call approach
        if use_local_llm:
            response = call_llm_local(
                model="gpt-oss:20b",
                prompt_content=prompt,
                temperature=0
            )
        
        if len(response.content.strip()) > 0:
            content = response.content.strip()
            
            # Handle different response formats (existing logic)
            if content.startswith('```json'):
                start_idx = content.find('\n', 7) + 1
                end_idx = content.rfind('```')
                if start_idx > 7 and end_idx > start_idx:
                    content = content[start_idx:end_idx].strip()
            elif content.startswith('```'):
                start_idx = content.find('\n', 3) + 1
                end_idx = content.rfind('```')
                if start_idx > 3 and end_idx > start_idx:
                    content = content[start_idx:end_idx].strip()
            
            # Parse JSON with existing robust logic
            try:
                parsed_json = json.loads(content)
                
                # Validate and create SliceSet
                html_lines = len(html_content.split('\n'))
                valid_slices = []
                
                for slice_data in parsed_json.get('slices', []):
                    first_line = slice_data.get('first_line', 0)
                    last_line = slice_data.get('last_line', 0)
                    
                    # Ensure valid range
                    first_line = max(0, min(first_line, html_lines - 1))
                    last_line = max(first_line, min(last_line, html_lines - 1))
                    
                    valid_slices.append(Slice(first_line=first_line, last_line=last_line))
                
                if valid_slices:
                    print(f"‚úÖ Successfully generated {len(valid_slices)} slices")
                    return SliceSet(slices=valid_slices)
                
            except json.JSONDecodeError as json_err:
                print(f"‚ö†Ô∏è  JSON parsing failed: {json_err}")
                print(f"Raw content preview: {content[:200]}...")
                
                # Try manual JSON extraction (existing logic)
                start_idx = content.find('{')
                if start_idx != -1:
                    brace_count = 0
                    end_idx = -1
                    for i in range(start_idx, len(content)):
                        if content[i] == '{':
                            brace_count += 1
                        elif content[i] == '}':
                            brace_count -= 1
                            if brace_count == 0:
                                end_idx = i + 1
                                break
                    
                    if end_idx != -1:
                        json_content = content[start_idx:end_idx]
                        try:
                            parsed_json = json.loads(json_content)
                            slice_set = SliceSet(**parsed_json)
                            print(f"‚úÖ Manual JSON extraction successful: {len(slice_set.slices)} slices")
                            return slice_set
                        except:
                            pass
                
    except Exception as e:
        print(f"‚ùå LLM call failed: {e}")
        # Safe fallback
        html_lines = len(html_content.split('\n'))
        return SliceSet(slices=[Slice(first_line=0, last_line=min(html_lines - 1, 20))])


In [5]:
# Existing utility functions (unchanged for compatibility)
def get_slices(html_content: str, slice_set: SliceSet) -> str:
    """Extract the relevant slices from HTML content based on SliceSet output."""
    if not slice_set.slices:
        return ""
    
    html_lines = html_content.split('\n')
    extracted_slices = []
    
    for slice_obj in slice_set.slices:
        start_line = max(0, slice_obj.first_line)
        end_line = min(len(html_lines) - 1, slice_obj.last_line)
        
        if start_line <= end_line:
            slice_content = '\n'.join(html_lines[start_line:end_line + 1])
            extracted_slices.append(slice_content)
            extracted_slices.append(f"\n<!-- SLICE {slice_obj.first_line}-{slice_obj.last_line} -->\n")
    
    return '\n'.join(extracted_slices)

def get_slices_with_metadata(html_content: str, slice_set: SliceSet) -> dict:
    """Extract slices with additional metadata for debugging."""
    if not slice_set.slices:
        return {
            'sliced_content': '',
            'slice_info': [],
            'stats': {'total_slices': 0, 'total_lines_extracted': 0, 'original_lines': len(html_content.split('\n'))}
        }
    
    html_lines = html_content.split('\n')
    extracted_slices = []
    slice_info = []
    total_lines_extracted = 0
    
    for i, slice_obj in enumerate(slice_set.slices):
        start_line = max(0, slice_obj.first_line)
        end_line = min(len(html_lines) - 1, slice_obj.last_line)
        
        if start_line <= end_line:
            slice_content = '\n'.join(html_lines[start_line:end_line + 1])
            lines_in_slice = end_line - start_line + 1
            total_lines_extracted += lines_in_slice
            
            extracted_slices.append(slice_content)
            extracted_slices.append(f"\n<!-- SLICE {i+1}: lines {slice_obj.first_line}-{slice_obj.last_line} ({lines_in_slice} lines) -->\n")
            
            slice_info.append({
                'slice_number': i + 1,
                'first_line': slice_obj.first_line,
                'last_line': slice_obj.last_line,
                'lines_count': lines_in_slice,
                'preview': slice_content[:200] + '...' if len(slice_content) > 200 else slice_content
            })
    
    return {
        'sliced_content': '\n'.join(extracted_slices),
        'slice_info': slice_info,
        'stats': {
            'total_slices': len(slice_set.slices),
            'total_lines_extracted': total_lines_extracted,
            'original_lines': len(html_lines),
            'compression_ratio': round((1 - total_lines_extracted / len(html_lines)) * 100, 2) if len(html_lines) > 0 else 0
        }
    }


## Test the Fixed Pipeline

Let's test the fixed implementation with realistic HTML content:


In [6]:
# Test with realistic business HTML
test_html = """<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>TechConsult Pro - Digital Transformation Experts</title>
    <meta name="description" content="Leading digital transformation consultancy helping businesses modernize their operations and technology stack.">
    <link rel="stylesheet" href="styles.css">
</head>
<body>
    <header class="main-header">
        <div class="container">
            <img src="logo.png" alt="TechConsult Pro Logo" class="logo">
            <h1>TechConsult Pro</h1>
            <p class="tagline">Transforming Businesses Through Technology</p>
        </div>
    </header>
    
    <nav class="main-nav">
        <ul>
            <li><a href="#home">Home</a></li>
            <li><a href="#about">About</a></li>
            <li><a href="#services">Services</a></li>
            <li><a href="#contact">Contact</a></li>
        </ul>
    </nav>
    
    <main>
        <section id="about" class="about-section">
            <div class="container">
                <h2>About TechConsult Pro</h2>
                <p>With over 15 years of experience in digital transformation, we help enterprises navigate the complex landscape of modern technology. Our team of certified experts specializes in cloud migration, AI implementation, and digital strategy.</p>
                <p>We've successfully transformed over 200 businesses across various industries, from startups to Fortune 500 companies.</p>
            </div>
        </section>
        
        <section id="services" class="services-section">
            <div class="container">
                <h2>Our Services</h2>
                <div class="service-grid">
                    <div class="service-item">
                        <h3>Cloud Migration</h3>
                        <p>Seamless migration to AWS, Azure, or Google Cloud with zero downtime.</p>
                        <p class="price">Starting at $15,000</p>
                    </div>
                    <div class="service-item">
                        <h3>AI & Machine Learning</h3>
                        <p>Custom AI solutions to automate processes and gain insights from your data.</p>
                        <p class="price">Starting at $25,000</p>
                    </div>
                    <div class="service-item">
                        <h3>Digital Strategy Consulting</h3>
                        <p>Comprehensive digital roadmaps tailored to your business objectives.</p>
                        <p class="price">Starting at $8,000</p>
                    </div>
                </div>
            </div>
        </section>
        
        <section id="contact" class="contact-section">
            <div class="container">
                <h2>Get in Touch</h2>
                <div class="contact-info">
                    <p><strong>Address:</strong> 1200 Tech Plaza, Suite 400, San Francisco, CA 94105</p>
                    <p><strong>Phone:</strong> +1 (555) 123-4567</p>
                    <p><strong>Email:</strong> contact@techconsultpro.com</p>
                    <p><strong>Business Hours:</strong> Mon-Fri 9:00 AM - 6:00 PM PST</p>
                </div>
                <div class="social-links">
                    <a href="https://linkedin.com/company/techconsultpro">LinkedIn</a>
                    <a href="https://twitter.com/techconsultpro">Twitter</a>
                </div>
            </div>
        </section>
    </main>
    
    <footer>
        <p>&copy; 2024 TechConsult Pro. All rights reserved.</p>
    </footer>
    
    <script src="analytics.js"></script>
    <script>
        // Contact form handling
        document.addEventListener('DOMContentLoaded', function() {
            console.log('Page loaded');
        });
    </script>
</body>
</html>"""

print("Testing fixed HTML slicing pipeline...")
print(f"Input HTML: {len(test_html)} characters, {len(test_html.split(chr(10)))} lines")
print("=" * 60)


Testing fixed HTML slicing pipeline...
Input HTML: 3826 characters, 89 lines


In [7]:
# Run the fixed slicing pipeline
try:
    slice_result = generate_html_slices_fixed(test_html, use_local_llm=True)
    
    print(f"\n‚úÖ SLICING SUCCESSFUL!")
    print(f"Generated {len(slice_result.slices)} slices:")
    
    # Get detailed results
    detailed_result = get_slices_with_metadata(test_html, slice_result)
    
    print("\nüìä Statistics:")
    for key, value in detailed_result['stats'].items():
        print(f"  {key}: {value}")
    
    print("\nüìã Slice Details:")
    for slice_info in detailed_result['slice_info']:
        print(f"  Slice {slice_info['slice_number']}: lines {slice_info['first_line']}-{slice_info['last_line']} ({slice_info['lines_count']} lines)")
        print(f"    Preview: {slice_info['preview'][:150]}...")
        print()
    
    print("\nüìÑ Extracted Content (first 1000 chars):")
    print("-" * 50)
    extracted_content = get_slices(test_html, slice_result)
    print(extracted_content[:1000] + "..." if len(extracted_content) > 1000 else extracted_content)
    print("-" * 50)
    
except Exception as e:
    print(f"‚ùå Pipeline failed: {e}")
    import traceback
    traceback.print_exc()



‚úÖ SLICING SUCCESSFUL!
‚ùå Pipeline failed: 'NoneType' object has no attribute 'slices'


Traceback (most recent call last):
  File "/tmp/ipykernel_90669/3737143613.py", line 6, in <module>
    print(f"Generated {len(slice_result.slices)} slices:")
                           ^^^^^^^^^^^^^^^^^^^
AttributeError: 'NoneType' object has no attribute 'slices'


In [8]:
# Debug: Prompt the LLM directly with the HTML content and print raw response
html_content = test_html  # reuse the test HTML from above

prompt = load_prompt("generate_slices")
json_schema = SliceSet.model_json_schema()
prompt = prompt.format(html_content=html_content, output_schema=json_schema)

print("Sending prompt to LLM with HTML content and schema...")
try:
    response = call_llm_local(model="gpt-oss:20b", prompt_content=prompt, temperature=0)
    print("\nRaw LLM response:\n")
    print(response.content if hasattr(response, "content") else response)
except Exception as e:
    print(f"LLM call error: {e}")


Sending prompt to LLM with HTML content and schema...

Raw LLM response:




In [9]:
# Deep diagnostics: verify Ollama is reachable and models are available
import os, time, json, traceback
import requests

# 1) Check Ollama base URL and health
base_url = os.getenv('OLLAMA_BASE_URL', 'http://localhost:11434')
print(f"Base URL: {base_url}")
try:
    r = requests.get(f"{base_url}/api/tags", timeout=3)
    print(f"/api/tags status: {r.status_code}")
    tags = r.json() if r.ok else {}
    print(f"Tags keys: {list(tags.keys())}")
except Exception as e:
    print(f"Failed to reach Ollama: {e}")
    tags = {}

# 2) Collect model names from tags + running processes
model_names = []
try:
    from itertools import chain
    import ollama
    client = ollama.Client(host=base_url)
    # from /api/tags
    tag_models = [m.get('name') or m.get('model') or m.get('tag') for m in (tags.get('models') or [])]
    # from ps (running models)
    try:
        ps = client.ps()
        ps_models = [m.get('name') or m.get('model') for m in ps.get('models', [])]
    except Exception:
        ps = {}
        ps_models = []
    # from client.list()
    try:
        listed = client.list()
        list_models = []
        for m in listed.get('models', []):
            if isinstance(m, dict):
                list_models.append(m.get('name') or m.get('model'))
            else:
                # object-like fallback
                list_models.append(getattr(m, 'name', None) or getattr(m, 'model', None))
    except Exception:
        list_models = []
    model_names = [n for n in chain(tag_models, ps_models, list_models) if isinstance(n, str) and n]
    # Ensure uniqueness, preserve order
    seen = set()
    model_names = [x for x in model_names if not (x in seen or seen.add(x))]
    print(f"Discovered models: {model_names}")
except Exception as e:
    print(f"Model discovery failed: {e}")

# 3) Pick a model, prefer gpt-oss:20b if present, else first discovered, else fallback
preferred = 'gpt-oss:20b'
selected_model = preferred if preferred in model_names else (model_names[0] if model_names else preferred)
print(f"Selected model: {selected_model}")

# 4) Minimal chat test
try:
    import ollama
    client = ollama.Client(host=base_url)
    t0 = time.time()
    resp = client.chat(model=selected_model, messages=[{"role":"user", "content":"Say 'ready'"}], options={"temperature":0})
    dt = time.time() - t0
    content = resp.get('message', {}).get('content') if isinstance(resp, dict) else getattr(getattr(resp, 'message', {}), 'content', '')
    print(f"Minimal chat took {dt:.2f}s, content: {repr(content)[:200]}")
except Exception as e:
    print(f"Minimal chat failed: {e}")

# 5) Minimal generate test
try:
    t0 = time.time()
    gen = client.generate(model=selected_model, prompt="Hello", options={"temperature":0})
    dt = time.time() - t0
    txt = gen.get('response', '') if isinstance(gen, dict) else getattr(gen, 'response', '')
    print(f"Generate took {dt:.2f}s, response: {repr(txt)[:200]}")
except Exception as e:
    print(f"Generate failed: {e}")

# 6) Re-run ChatOllama with base_url and introspect the response object
try:
    from langchain_ollama import ChatOllama
    from langchain_core.messages import HumanMessage
    llm = ChatOllama(model=selected_model, base_url=base_url, temperature=0)
    t0 = time.time()
    resp2 = llm.invoke([HumanMessage(content="Say 'ok'")])
    dt = time.time() - t0
    print(f"ChatOllama ping took {dt:.2f}s")
    print("Type:", type(resp2))
    try:
        print("Content:", repr(getattr(resp2, 'content', None)))
        print("Additional kwargs:", getattr(resp2, 'additional_kwargs', None))
        print("Response metadata:", getattr(resp2, 'response_metadata', None))
    except Exception:
        traceback.print_exc()
except Exception as e:
    print(f"ChatOllama ping failed: {e}")


Base URL: http://localhost:11434
/api/tags status: 200
Tags keys: ['models']
Discovered models: ['gpt-oss:20b']
Selected model: gpt-oss:20b
Minimal chat took 0.40s, content: 'ready'
Generate took 0.38s, response: 'Hello! How can I help you today?'
ChatOllama ping took 0.46s
Type: <class 'langchain_core.messages.ai.AIMessage'>
Content: 'ok'
Additional kwargs: {}
Response metadata: {'model': 'gpt-oss:20b', 'created_at': '2025-09-09T08:24:41.002132432Z', 'done': True, 'done_reason': 'stop', 'total_duration': 461649784, 'load_duration': 70430807, 'prompt_eval_count': 71, 'prompt_eval_duration': 28757062, 'eval_count': 42, 'eval_duration': 361889666, 'model_name': 'gpt-oss:20b'}


In [10]:
import os
import json

# Improved base URL and model selection

def _get_ollama_base_url() -> str:
    return os.getenv("OLLAMA_BASE_URL", "http://localhost:11434")


def _select_model(preferred: list[str] | None = None) -> str:
    preferred = preferred or ["gpt-oss:20b", "codellama:34b", "llama3.1:8b-instruct", "llama3.2:3b-instruct"]
    try:
        import ollama
        client = ollama.Client(host=_get_ollama_base_url())
        info = client.list()
        names = [m.get("name") for m in info.get("models", []) if m.get("name")]
        for cand in preferred:
            if cand in names:
                return cand
        if names:
            return names[0]
    except Exception:
        pass
    return preferred[0]


# Redefine call_llm_local with base_url and fallback
from langchain_ollama import ChatOllama
from langchain_core.messages import HumanMessage

class _ContentWrapper:
    def __init__(self, content: str):
        self.content = content
        self.response_metadata = {"source": "ollama.direct"}
        self.additional_kwargs = {}


def call_llm_local(model: str | None = None, prompt_content: str | None = None, temperature: float = 0, keep_alive: str = "30m", num_predict: int = 2048):
    if prompt_content is None:
        raise ValueError("Prompt content is required")
    base_url = _get_ollama_base_url()
    model = model or _select_model()

    try:
        llm = ChatOllama(
            model=model,
            base_url=base_url,
            temperature=temperature,
            keep_alive=keep_alive,
            num_predict=num_predict,
            format="json",
        )
        resp = llm.invoke([HumanMessage(content=prompt_content)])
        content = (getattr(resp, "content", None) or "").strip()
        if content:
            return resp
    except Exception:
        pass

    # Fallback: direct Ollama client
    try:
        import ollama
        client = ollama.Client(host=base_url)
        chat = client.chat(model=model, messages=[{"role": "user", "content": prompt_content}], options={"temperature": temperature})
        if isinstance(chat, dict):
            content = (chat.get("message", {}).get("content") or "").strip()
        else:
            content = (getattr(getattr(chat, "message", {}), "content", None) or "").strip()
        return _ContentWrapper(content)
    except Exception as e:
        return _ContentWrapper("")


# Redefine generate_html_slices_fixed to always return a SliceSet

def generate_html_slices_fixed(html_content: str, use_local_llm: bool = True) -> SliceSet:
    prompt = load_prompt("generate_slices")
    json_schema = SliceSet.model_json_schema()
    prompt = prompt.format(html_content=html_content, output_schema=json_schema)

    try:
        response = None
        if use_local_llm:
            response = call_llm_local(model=_select_model(), prompt_content=prompt, temperature=0)
        raw = (getattr(response, "content", None) or "").strip()
        if not raw:
            print("‚ö†Ô∏è Empty LLM content. Check diagnostics above.")
            return SliceSet(slices=[])

        content = raw
        if content.startswith('```json'):
            start_idx = content.find('\n', 7) + 1
            end_idx = content.rfind('```')
            if start_idx > 7 and end_idx > start_idx:
                content = content[start_idx:end_idx].strip()
        elif content.startswith('```'):
            start_idx = content.find('\n', 3) + 1
            end_idx = content.rfind('```')
            if start_idx > 3 and end_idx > start_idx:
                content = content[start_idx:end_idx].strip()

        try:
            parsed_json = json.loads(content)
            html_lines = len(html_content.split('\n'))
            valid_slices = []
            for slice_data in parsed_json.get('slices', []):
                first_line = max(0, min(int(slice_data.get('first_line', 0)), html_lines - 1))
                last_line = max(first_line, min(int(slice_data.get('last_line', 0)), html_lines - 1))
                valid_slices.append(Slice(first_line=first_line, last_line=last_line))
            print(f"‚úÖ Parsed {len(valid_slices)} slices")
            return SliceSet(slices=valid_slices)
        except json.JSONDecodeError as json_err:
            print(f"‚ö†Ô∏è JSON parsing failed: {json_err}")
            start_idx = content.find('{')
            if start_idx != -1:
                brace_count = 0
                end_idx = -1
                for i in range(start_idx, len(content)):
                    if content[i] == '{':
                        brace_count += 1
                    elif content[i] == '}':
                        brace_count -= 1
                        if brace_count == 0:
                            end_idx = i + 1
                            break
                if end_idx != -1:
                    try:
                        parsed_json = json.loads(content[start_idx:end_idx])
                        slice_set = SliceSet(**parsed_json)
                        print(f"‚úÖ Manual JSON extraction: {len(slice_set.slices)} slices")
                        return slice_set
                    except Exception:
                        pass
    except Exception as e:
        print(f"‚ùå LLM call failed: {e}")
        html_lines = len(html_content.split('\n'))
        return SliceSet(slices=[Slice(first_line=0, last_line=min(html_lines - 1, 20))])

    # Final safe return if nothing matched
    return SliceSet(slices=[]) 

print(f"Using Ollama at: {_get_ollama_base_url()} | model: {_select_model()}")


Using Ollama at: http://localhost:11434 | model: gpt-oss:20b
