In [5]:
from pathlib import Path
from docling.document_converter import DocumentConverter


class PDFParser:
    """
    ‚úÖ PDF Parser using Docling
    Converts PDF into Docling Document object.
    No Markdown export here.
    """

    def __init__(self):
        self.converter = DocumentConverter()
        self.doc = None

    def parse_pdf(self, pdf_path: str):
        """
        Parse PDF into Docling Document object.

        Args:
            pdf_path (str): Path to input PDF

        Returns:
            bool: True if parse successful, False otherwise
        """
        try:
            result = self.converter.convert(pdf_path)
            self.doc = result.document
            return True
        except Exception as e:
            print(f"‚ùå Failed to parse PDF: {e}")
            return False

    def analyze_cleaning(self):
        """
        Optional: placeholder for any post-processing or cleaning.
        """
        if self.doc is None:
            print("‚ö†Ô∏è No document to analyze.")
            return
        # Example: count pages
        page_count = getattr(self.doc, 'page_count', 0)
        print(f"üìÑ Document has {page_count} pages.")

    def print_report(self):
        """Print basic document info"""
        if self.doc is None:
            print("‚ö†Ô∏è No document parsed yet.")
            return
        print("üìä Docling Document Report")
        print(f"   Title: {getattr(self.doc, 'title', 'Unknown')}")
        print(f"   Pages: {getattr(self.doc, 'page_count', 'Unknown')}")
        print(f"   Sections: {len(getattr(self.doc, 'sections', []))}")


In [6]:
import json
import textwrap
from typing import List
from dataclasses import dataclass
from pathlib import Path
from docling.chunking import HybridChunker
from docling_core.transforms.chunker.tokenizer.huggingface import HuggingFaceTokenizer
from transformers import AutoTokenizer


@dataclass
class RAGChunk:
    content: str
    metadata: dict
    token_count: int


class DoclingHybridChunker:
    """
    ‚úÖ Hierarchical + Hybrid Chunker
    Tables are chunked as a whole
    """

    def __init__(self, max_tokens: int = 1024, overlap_tokens: int = 128):
        embed_model = "NousResearch/Meta-Llama-3-8B-Instruct"
        tokenizer = AutoTokenizer.from_pretrained(embed_model)

        self.hf_tokenizer = HuggingFaceTokenizer(
            tokenizer=tokenizer,
            max_tokens=max_tokens,
            overlap_tokens=overlap_tokens
        )

        self.chunker = HybridChunker(
            tokenizer=self.hf_tokenizer,
            merge_peers=True,
            descriptive_titles=True
        )
        self.chunks: List[RAGChunk] = []

    def chunk_document(self, docling_doc) -> List[RAGChunk]:
        chunk_iter = self.chunker.chunk(dl_doc=docling_doc)
        chunk_id = 0

        for chunk in chunk_iter:
            content = chunk.text.strip() if hasattr(chunk, 'text') else ""
            if len(content) < 50 and not getattr(chunk, 'tables', []):
                continue

            # Tables as whole chunks
            if getattr(chunk, 'tables', []):
                for table_idx, table in enumerate(chunk.tables):
                    table_content = str(table)
                    metadata = {
                        'chunk_id': f'hybrid_{chunk_id:04d}_table_{table_idx}',
                        'page_no': getattr(chunk, 'page_number', None) or getattr(chunk, 'page_no', 1),
                        'hierarchy_level': getattr(chunk, 'hierarchy_level', 0),
                        'doc_items_count': getattr(chunk, 'num_items', 1),
                        'has_tables': True,
                        'token_count_original': len(table_content.split()),
                        'title': getattr(chunk, 'title', None)
                    }
                    token_count = len(self.hf_tokenizer.tokenizer.encode(table_content))
                    self.chunks.append(RAGChunk(
                        content=table_content,
                        metadata=metadata,
                        token_count=token_count
                    ))
                    chunk_id += 1

            # Text chunk
            if content:
                metadata = {
                    'chunk_id': f'hybrid_{chunk_id:04d}',
                    'page_no': getattr(chunk, 'page_number', None) or getattr(chunk, 'page_no', 1),
                    'hierarchy_level': getattr(chunk, 'hierarchy_level', 0),
                    'doc_items_count': getattr(chunk, 'num_items', 1),
                    'has_tables': len(getattr(chunk, 'tables', [])) > 0,
                    'token_count_original': getattr(chunk, 'token_count', len(content.split())),
                    'title': getattr(chunk, 'title', None)
                }
                token_count = len(self.hf_tokenizer.tokenizer.encode(content))
                self.chunks.append(RAGChunk(
                    content=content,
                    metadata=metadata,
                    token_count=token_count
                ))
                chunk_id += 1

        print(f"‚úÖ HYBRID CHUNKER: {len(self.chunks)} semantic chunks created!")
        print(f"   üìä Chunks per page: avg {len(self.chunks)/getattr(docling_doc, 'page_count', 1):.1f}")
        return self.chunks

    def print_samples(self, n=3):
        print("\n" + "="*80)
        print("üè∑Ô∏è  DOC LING HYBRID CHUNKS (Hierarchical + Token-aware)")
        print("="*80)
        for i, chunk in enumerate(self.chunks[:n]):
            print(f"\n{i+1}. [{chunk.metadata['chunk_id']}] {chunk.token_count} tokens")
            print(f"   üìÑ Page: {chunk.metadata['page_no']} | Level: {chunk.metadata['hierarchy_level']}")
            print(f"   üìã Tables: {chunk.metadata['has_tables']} | Items: {chunk.metadata['doc_items_count']}")
            print("-" * 70)
            wrapped = textwrap.fill(chunk.content[:400], width=90)
            print(wrapped)
            print()

    def save_rag_chunks(self, output_file: str):
        Path(output_file).parent.mkdir(parents=True, exist_ok=True)
        with open(output_file, 'w', encoding='utf-8') as f:
            for chunk in self.chunks:
                f.write(json.dumps({
                    'content': chunk.content,
                    'metadata': chunk.metadata,
                    'token_count': chunk.token_count
                }, ensure_ascii=False) + '\n')
        print(f"üíæ Saved {len(self.chunks)} RAG chunks ‚Üí {output_file}")


In [7]:
# from pdf_parser import PDFParser
# from hybrid_chunker import DoclingHybridChunker

def run_pipeline(pdf_path: str, rag_output_path: str):
    """
    Full PDF ‚Üí Docling ‚Üí Hybrid Chunking ‚Üí RAG JSONL pipeline

    Args:
        pdf_path (str): Path to input PDF
        rag_output_path (str): Path to save RAG-ready JSONL
    """
    print("üîÑ PHASE 1: Docling PDF Parsing...")
    parser = PDFParser()
    if not parser.parse_pdf(pdf_path):
        print("‚ùå Parsing failed. Exiting pipeline.")
        return

    parser.analyze_cleaning()
    parser.print_report()

    print("\nüî™ PHASE 2: Hybrid Hierarchical Chunking...")
    chunker = DoclingHybridChunker(max_tokens=1024, overlap_tokens=128)
    chunks = chunker.chunk_document(parser.doc)

    chunker.print_samples(n=3)
    chunker.save_rag_chunks(rag_output_path)

    print(f"\nüéâ PIPELINE COMPLETE! {len(chunks)} HIGH-QUALITY CHUNKS READY!")
    print("   ‚úÖ Ready for LanceDB embedding ‚Üí RAG queries")
    return chunks


if __name__ == "__main__":
    pdf_path = r"C:\Users\birok\Python\LLMOPs\docling-tutorials\data\R0r4e.pdf"
    rag_output_path = r"./chunks/R0r4e_hybrid_tables.jsonl"

    run_pipeline(pdf_path, rag_output_path)


2026-01-08 21:25:35,903 - INFO - detected formats: [<InputFormat.PDF: 'pdf'>]
2026-01-08 21:25:35,922 - INFO - Going to convert document batch...
2026-01-08 21:25:35,923 - INFO - Initializing pipeline for StandardPdfPipeline with options hash e15bc6f248154cc62f8db15ef18a8ab7
2026-01-08 21:25:35,933 - INFO - Loading plugin 'docling_defaults'
2026-01-08 21:25:35,936 - INFO - Registered picture descriptions: ['vlm', 'api']
2026-01-08 21:25:35,943 - INFO - Loading plugin 'docling_defaults'
2026-01-08 21:25:35,946 - INFO - Registered ocr engines: ['auto', 'easyocr', 'ocrmac', 'rapidocr', 'tesserocr', 'tesseract']
2026-01-08 21:25:35,947 - INFO - rapidocr cannot be used because onnxruntime is not installed.
2026-01-08 21:25:35,948 - INFO - easyocr cannot be used because it is not installed.
2026-01-08 21:25:36,049 - INFO - Accelerator device: 'cpu'
[32m[INFO] 2026-01-08 21:25:36,059 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2026-01-08 21:25:36,063 [RapidOCR] device_con

üîÑ PHASE 1: Docling PDF Parsing...


[32m[INFO] 2026-01-08 21:25:36,182 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2026-01-08 21:25:36,183 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2026-01-08 21:25:36,185 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\birok\Python\LLMOPs\docling-tutorials\docling-venv\Lib\site-packages\rapidocr\models\ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2026-01-08 21:25:36,185 [RapidOCR] main.py:50: Using C:\Users\birok\Python\LLMOPs\docling-tutorials\docling-venv\Lib\site-packages\rapidocr\models\ch_ptocr_mobile_v2.0_cls_infer.pth[0m
[32m[INFO] 2026-01-08 21:25:36,231 [RapidOCR] base.py:22: Using engine_name: torch[0m
[32m[INFO] 2026-01-08 21:25:36,232 [RapidOCR] device_config.py:50: Using CPU device[0m
[32m[INFO] 2026-01-08 21:25:36,247 [RapidOCR] download_file.py:60: File exists and is valid: C:\Users\birok\Python\LLMOPs\docling-tutorials\docling-venv\Lib\site-packages\rapidocr\models\ch_PP-OCRv4_rec_infer.pth[0m
[3

üìÑ Document has 0 pages.
üìä Docling Document Report
   Title: Unknown
   Pages: Unknown
   Sections: 0

üî™ PHASE 2: Hybrid Hierarchical Chunking...
‚úÖ HYBRID CHUNKER: 42 semantic chunks created!
   üìä Chunks per page: avg 42.0

üè∑Ô∏è  DOC LING HYBRID CHUNKS (Hierarchical + Token-aware)

1. [hybrid_0000] 76 tokens
   üìÑ Page: 1 | Level: 0
   üìã Tables: False | Items: 1
----------------------------------------------------------------------
Concerning the Adoption of Harmonized Technical United Nations Regulations for Wheeled
Vehicles, Equipment and Parts which can be Fitted and/or be Used on Wheeled Vehicles and
the Conditions for Reciprocal Recognition of Approvals Granted on the Basis of these
United Nations Regulations * (Revision 3, including the amendments which entered into
force on 14 September 2017) _________


2. [hybrid_0001] 49 tokens
   üìÑ Page: 1 | Level: 0
   üìã Tables: False | Items: 1
---------------------------------------------------------------------

In [9]:
"""
LanceDB RAG Store
Embedding ‚Üí LanceDB ingestion ‚Üí Vector search

- LanceDB >= 0.5
- SentenceTransformers embeddings
- JSONL hybrid chunks input (table-aware)
"""

from __future__ import annotations

import json
from dataclasses import dataclass
from pathlib import Path
from typing import List

import lancedb
import pyarrow as pa
import pandas as pd
from sentence_transformers import SentenceTransformer


# ---------------------------------------------------------------------
# Data model
# ---------------------------------------------------------------------

@dataclass
class LanceDBChunk:
    id: str
    vector: List[float]
    content: str
    metadata: str
    token_count: int
    page_no: int | None
    has_tables: bool


# ---------------------------------------------------------------------
# LanceDB RAG Store
# ---------------------------------------------------------------------

class LanceDBRAGStore:
    """
    Hybrid RAG Vector Store using LanceDB

    - Embedding: SentenceTransformers
    - Storage: LanceDB
    - Input: JSONL chunks (from DoclingHybridChunker)
    """

    def __init__(
        self,
        db_path: str = "./lancedb_rag",
        table_name: str = "rag_hybrid_chunks",
        embed_model: str = "BAAI/bge-base-en-v1.5",
    ):
        self.db_path = Path(db_path)
        self.db_path.mkdir(parents=True, exist_ok=True)

        self.embedder = SentenceTransformer(embed_model)
        self.embedding_dim = self.embedder.get_sentence_embedding_dimension()

        self.db = lancedb.connect(str(self.db_path))
        self.table_name = table_name
        self.table = None

    # -----------------------------------------------------------------
    # Table creation
    # -----------------------------------------------------------------

    def create_table(self, overwrite: bool = True):
        if overwrite and self.table_name in self.db.list_tables():
            self.db.drop_table(self.table_name)

        schema = pa.schema([
            ("id", pa.string()),
            ("vector", pa.list_(pa.float32(), self.embedding_dim)),
            ("content", pa.string()),
            ("metadata", pa.string()),
            ("token_count", pa.int32()),
            ("page_no", pa.int32()),
            ("has_tables", pa.bool_()),
        ])

        mode = "overwrite" if overwrite else "create"

        self.table = self.db.create_table(
            self.table_name,
            schema=schema,
            mode=mode,  # <-- updated
        )

        print(f"‚úÖ Created LanceDB table '{self.table_name}' with mode='{mode}'")
        print(f"   Vector dim: {self.embedding_dim}")


    # -----------------------------------------------------------------
    # Load chunks
    # -----------------------------------------------------------------

    def load_chunks_from_jsonl(self, jsonl_path: str) -> List[LanceDBChunk]:
        chunks: List[LanceDBChunk] = []

        with open(jsonl_path, "r", encoding="utf-8") as f:
            for idx, line in enumerate(f):
                record = json.loads(line)
                metadata = record.get("metadata", {})

                chunks.append(
                    LanceDBChunk(
                        id=f"chunk_{idx:06d}",
                        vector=[],
                        content=record["content"],
                        metadata=json.dumps(metadata),
                        token_count=record.get("token_count", 0),
                        page_no=metadata.get("page_no", 0),
                        has_tables=metadata.get("has_tables", False),
                    )
                )

        print(f"‚úÖ Loaded {len(chunks)} chunks from JSONL")
        return chunks

    # -----------------------------------------------------------------
    # Embedding
    # -----------------------------------------------------------------

    def embed_chunks(self, chunks: List[LanceDBChunk]) -> List[LanceDBChunk]:
        texts = [c.content for c in chunks]

        embeddings = self.embedder.encode(
            texts,
            normalize_embeddings=True,
            show_progress_bar=True,
        )

        for chunk, vector in zip(chunks, embeddings):
            chunk.vector = vector.tolist()

        print(f"‚úÖ Embedded {len(chunks)} chunks")
        return chunks

    # -----------------------------------------------------------------
    # Storage
    # -----------------------------------------------------------------

    def store_chunks(self, chunks: List[LanceDBChunk]):
        df = pd.DataFrame([{
            "id": c.id,
            "vector": c.vector,
            "content": c.content,
            "metadata": c.metadata,
            "token_count": c.token_count,
            "page_no": c.page_no or 0,
            "has_tables": c.has_tables,
        } for c in chunks])

        self.table.add(df)
        print(f"‚úÖ Stored {len(chunks)} chunks in LanceDB")

    # -----------------------------------------------------------------
    # End-to-end ingestion
    # -----------------------------------------------------------------

    def ingest(self, jsonl_path: str):
        self.create_table(overwrite=True)
        chunks = self.load_chunks_from_jsonl(jsonl_path)
        chunks = self.embed_chunks(chunks)
        self.store_chunks(chunks)

        return len(chunks)

    # -----------------------------------------------------------------
    # Query
    # -----------------------------------------------------------------

    def query(self, query_text: str, k: int = 5):
        query_vec = self.embedder.encode(
            query_text,
            normalize_embeddings=True,
        )

        results = (
            self.table.search(query_vec)
            .limit(k)
            .to_pandas()
        )

        print(f"\nüîç QUERY: {query_text}")
        print("=" * 80)

        for _, row in results.iterrows():
            score = 1 - row["_distance"]
            print(f"üÜî {row['id']} | Score: {score:.3f}")
            print(f"üìÑ Page {row['page_no']} | {row['token_count']} tokens")
            print(f"üìù {row['content'][:160]}...")
            print("-" * 80)

        return results

    # -----------------------------------------------------------------
    # Stats
    # -----------------------------------------------------------------

    def stats(self):
        df = self.table.to_pandas()

        print("\nüìä LANCE DB STATS")
        print(f"   Total chunks: {len(df)}")
        print(f"   Avg tokens: {df['token_count'].mean():.0f}")
        print(f"   Chunks with tables: {df['has_tables'].sum()}")


# ---------------------------------------------------------------------
# Main
# ---------------------------------------------------------------------

def main():
    store = LanceDBRAGStore()

    chunks_file = "./chunks/R0r4e_hybrid_tables.jsonl"  # Updated for table-aware chunks
    count = store.ingest(chunks_file)

    print(f"\nüéâ Successfully stored {count} chunks")

    store.query("Give me the regulation scope")
    store.stats()


if __name__ == "__main__":
    main()


2026-01-08 21:30:54,460 - INFO - Use pytorch device_name: cpu
2026-01-08 21:30:54,461 - INFO - Load pretrained SentenceTransformer: BAAI/bge-base-en-v1.5


‚úÖ Created LanceDB table 'rag_hybrid_chunks' with mode='overwrite'
   Vector dim: 768
‚úÖ Loaded 42 chunks from JSONL


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2/2 [00:09<00:00,  4.88s/it]


‚úÖ Embedded 42 chunks
‚úÖ Stored 42 chunks in LanceDB

üéâ Successfully stored 42 chunks


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 35.21it/s]


üîç QUERY: Give me the regulation scope
üÜî chunk_000004 | Score: 0.418
üìÑ Page 1 | 602 tokens
üìù Regulation,  = . Regulation, Page = . 1.,  = Scope.................................................................................................................
--------------------------------------------------------------------------------
üÜî chunk_000002 | Score: 0.275
üìÑ Page 1 | 34 tokens
üìù This document is meant purely as documentation tool. The authentic and legal binding text is: ECE/TRANS/WP.29/2021/84.
_________...
--------------------------------------------------------------------------------
üÜî chunk_000033 | Score: 0.267
üìÑ Page 1 | 243 tokens
üìù Supply the information required by the following table in respect of the applicable subjects for this vehicle in Annex 4. All relevant approvals for each subjec...
--------------------------------------------------------------------------------
üÜî chunk_000005 | Score: 0.238
üìÑ Page 1 | 32 tokens
üìù - 1.1




In [10]:
"""
‚úÖ FIXED LanceDB ‚Üí ChatGroq RAG (Connect & QA Only)
WORKS WITH YOUR EXISTING DATABASE!
"""

from pathlib import Path
import lancedb
from sentence_transformers import SentenceTransformer
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate


class LanceDBChatGroq:
    """Connect to existing LanceDB ‚Üí ChatGroq RAG"""

    def __init__(self, db_path: str = "./lancedb_rag", table_name: str = "rag_hybrid_chunks"):
        self.db_path = Path(db_path)
        self.table_name = table_name

        # Load existing LanceDB table
        self.db = lancedb.connect(str(self.db_path))
        self.table = self.db.open_table(self.table_name)

        # ‚úÖ Match the embedding model used during ingestion
        self.embedder = SentenceTransformer("BAAI/bge-base-en-v1.5")

        # ‚úÖ ChatGroq LLM
        self.llm = ChatGroq(
            model="llama-3.3-70b-versatile",
            temperature=0.1
        )

        print(f"‚úÖ Connected to LanceDB table '{self.table_name}'")
        print(f"‚úÖ Embedder: BAAI/bge-base-en-v1.5")
        print(f"‚úÖ ChatGroq: llama-3.3-70b-versatile")

    # -----------------------------------------------------------------
    # Retrieval
    # -----------------------------------------------------------------

    def retrieve(self, query: str, k: int = 4):
        """Vector search using stored embeddings"""
        query_emb = self.embedder.encode([query])[0]
        results = self.table.search(query_emb).limit(k).to_pandas()

        context_docs = []
        for _, row in results.iterrows():
            doc = {
                "page_no": row["page_no"],
                "content": row["content"],
                "score": 1 - row["_distance"]
            }
            context_docs.append(doc)

        return context_docs

    def format_context(self, docs):
        """Format retrieved chunks for the LLM"""
        context = ""
        for i, doc in enumerate(docs, 1):
            context += f"\n\n--- SOURCE {i} (Page {doc['page_no']}, Score: {doc['score']:.3f}) ---\n"
            context += doc["content"]
        return context

    # -----------------------------------------------------------------
    # Response generation
    # -----------------------------------------------------------------

    def generate_response(self, question: str, context: str):
        """Generate answer with ChatGroq"""
        prompt_template = """
You are a helpful assistant specialized in regulation documents.

IMPORTANT RULES:
1. Use ONLY the provided context below
2. Cite specific page numbers when possible
3. If answer not in context, say "Not found in document"
4. Be precise and professional

CONTEXT:
{context}

QUESTION: {question}

ANSWER (include page citations):
"""
        prompt = ChatPromptTemplate.from_template(prompt_template)
        chain = prompt | self.llm

        response = chain.invoke({
            "context": context,
            "question": question
        })

        return response.content

    # -----------------------------------------------------------------
    # Full RAG query
    # -----------------------------------------------------------------

    def query(self, question: str, k: int = 4):
        """Complete RAG pipeline: retrieve ‚Üí LLM answer"""
        print(f"\nüîç Q: {question}")
        print("=" * 80)

        # Retrieve relevant chunks
        docs = self.retrieve(question, k=k)
        context = self.format_context(docs)

        # Generate answer
        answer = self.generate_response(question, context)

        print(f"ü§ñ A: {answer}")
        print(f"\nüìÑ SOURCES ({len(docs)} found):")
        for i, doc in enumerate(docs, 1):
            print(f"  {i}. Page {doc['page_no']} (Score: {doc['score']:.3f})")
        print("-" * 80)

        return answer, docs


# ---------------------------------------------------------------------
# RUN INTERACTIVE CHAT
# ---------------------------------------------------------------------

if __name__ == "__main__":
    rag_chat = LanceDBChatGroq("./lancedb_rag")

    print("\nüéâ RAG CHAT READY!")

    # Optional: test query
    rag_chat.query("What is the scope of this regulation?")

    print("\nüî• Interactive chat (type 'quit' to exit):")
    while True:
        question = input("\n‚ùì Ask: ").strip()
        if question.lower() in ["quit", "exit", "q"]:
            print("üëã Exiting chat.")
            break
        rag_chat.query(question)


  from pydantic.v1.fields import FieldInfo as FieldInfoV1
2026-01-08 21:33:05,874 - INFO - Use pytorch device_name: cpu
2026-01-08 21:33:05,876 - INFO - Load pretrained SentenceTransformer: BAAI/bge-base-en-v1.5


‚úÖ Connected to LanceDB table 'rag_hybrid_chunks'
‚úÖ Embedder: BAAI/bge-base-en-v1.5
‚úÖ ChatGroq: llama-3.3-70b-versatile

üéâ RAG CHAT READY!

üîç Q: What is the scope of this regulation?


Batches: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 1/1 [00:00<00:00, 16.52it/s]
2026-01-08 21:33:11,297 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


ü§ñ A: The scope of this regulation is found on Page 1 and further detailed on Page 4 of SOURCE 1. According to SOURCE 1 (Page 1), the regulation outlines its structure, including the scope, which is elaborated on Page 4. Additionally, SOURCE 2 (Page 1) specifies that this regulation applies to vehicles of category M1 and outlines requirements for the type approval of a whole vehicle.

üìÑ SOURCES (4 found):
  1. Page 1 (Score: 0.359)
  2. Page 1 (Score: 0.311)
  3. Page 1 (Score: 0.264)
  4. Page 1 (Score: 0.248)
--------------------------------------------------------------------------------

üî• Interactive chat (type 'quit' to exit):
üëã Exiting chat.
