docker exec rag-mcq-airflow airflow dags unpause pdf_ingestion_dag

docker exec rag-mcq-airflow airflow dags trigger pdf_ingestion_dag

In [None]:
import os

# Ghi ƒë√® connection string ƒë·ªÉ d√πng localhost thay v√¨ postgres
os.environ['POSTGRES_HOST'] = 'localhost'

In [None]:
!curl -X DELETE "http://localhost:9200/mcq-documents" -u admin:admin

### B∆∞·ªõc 0

In [None]:
import torch
# torch.cuda.empty_cache()
print(torch.cuda.is_available())

In [None]:
from src.models.document import Base
from src.db.session import engine

In [None]:
from sqlalchemy import text

with engine.connect() as conn:
    conn.execute(text("""
        DO
        $$
        DECLARE
            r RECORD;
        BEGIN
            FOR r IN (SELECT tablename FROM pg_tables WHERE schemaname = 'public') LOOP
                EXECUTE 'DROP TABLE IF EXISTS ' || quote_ident(r.tablename) || ' CASCADE';
            END LOOP;
        END
        $$;
    """))
    conn.commit()

print("‚úÖ Dropped all tables successfully ‚Äî fresh start!")

In [None]:
Base.metadata.create_all(bind=engine)

### B∆∞·ªõc 1

In [None]:
from pathlib import Path

API_BASE_URL = "http://localhost:8000"
AIRFLOW_URL = "http://localhost:8080"

# Host & port c√°c service
POSTGRES_HOST = "localhost"
POSTGRES_PORT = 5432          # ho·∫∑c 5433 n·∫øu mu·ªën connect airflow-db
REDIS_HOST = "localhost"
REDIS_PORT = 6379
OPENSEARCH_HOST = "http://localhost:9200"
OLLAMA_HOST = "http://localhost:11434"

# C·∫•u h√¨nh RAG
TOP_K = 5
USE_HYBRID = True
TIMEOUT = 60

In [None]:
from src.config import get_settings

settings = get_settings()
pdf_dir = Path(settings.data.pdf_dir)

pdf_files = list(pdf_dir.glob("*.pdf"))
pdf_paths = [str(f) for f in pdf_files]

print(f"Found {len(pdf_paths)} PDF files:")
pdf_paths

### B∆∞·ªõc 2 

In [None]:
def sanitize_metadata(metadata: dict):
    safe = {}
    for k, v in metadata.items():
        if callable(v):
            try:
                safe[k] = v()  # g·ªçi method n·∫øu c·∫ßn
            except Exception:
                safe[k] = None
        else:
            safe[k] = v
    return safe

In [None]:
import sys
import os
import locale
import io

def calculate_optimal_workers() -> int:
    """Auto-calculate optimal workers for current system."""
    cpu_count = os.cpu_count() or 1
    return max(3, int(cpu_count * 0.7))

# Auto-calculate
optimal_workers = calculate_optimal_workers()

# optimal_workers = 7
print(optimal_workers)

In [None]:
from src.db.session import get_db_context
from src.models.document import Document
from src.services.factories import make_pdf_parser
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
import time

def _parse_and_save_single_pdf(args: tuple) -> tuple:
    """
    Parse a single PDF and save to database (for parallel execution).

    Args:
        args: Tuple of (pdf_path, processed_count, total_count)

    Returns:
        tuple: (doc_id, success, error_message)
    """
    pdf_path, processed_count, total_count = args
    doc_id = Path(pdf_path).stem

    try:
        parser = make_pdf_parser()

        # Parse PDF
        parsed = parser.parse_pdf(pdf_path) # Write pdf to markdown

        # Check if parsing failed
        if parsed is None:
            return (doc_id, False, "Parsing returned None")

        safe_metadata = sanitize_metadata(parsed["metadata"])

        # Create document object
        document = Document(
            doc_id=doc_id,
            filename=safe_metadata.get("file_name", ""),
            file_path=pdf_path,
            title=doc_id + parsed.get("sections", {})[3]['title'],
            full_text=doc_id + ' - ' + parsed.get("sections", {})[3]['title'] + '\n' + parsed.get("full_text", ""),
            raw_content=parsed.get("full_text", ""),
            page_count=safe_metadata.get("page_count", 0),
            sections=parsed.get("sections", {}),
            tables=parsed.get("tables", {}),
            doc_metadata=safe_metadata,
            source_folder=Path(pdf_path).parent.name,
            processing_status="completed",
        )

        # Save to DB
        with get_db_context() as db:
            # Check if already exists
            existing = db.query(Document).filter(Document.doc_id == doc_id).first()
            if existing:
                return (doc_id, False, "Already exists")

            db.add(document)
            db.commit()

        return (doc_id, True, None)

    except Exception as e:
        return (doc_id, False, str(e))


def extract_pdfs_debug(pdf_paths: list, max_workers: int = 4):
    """
    Extract and save PDFs to database with parallel processing.

    Args:
        pdf_paths: List of PDF file paths to process
        max_workers: Number of parallel workers (default: 4)
    """
    processed = 0
    errors = 0
    doc_ids = []
    failed_docs = []

    total = len(pdf_paths)

    print(f"\n{'='*80}")
    print(f"üöÄ Starting parallel PDF extraction")
    print(f"üìÅ Total PDFs to process: {total}")
    print(f"‚öôÔ∏è  Workers: {max_workers}")
    print(f"{'='*80}\n")

    start_time = time.time()

    # Prepare task arguments
    tasks = [(pdf_path, i+1, total) for i, pdf_path in enumerate(pdf_paths)]
    print('PATH:', tasks)
    print(Path('data\\pdf\\Public_427.pdf').stem)
    # Process PDFs in parallel
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {
            executor.submit(_parse_and_save_single_pdf, task): task[0]
            for task in tasks
        }

        completed_count = 0
        for future in as_completed(futures):
            pdf_path = futures[future]
            doc_id = Path(pdf_path).stem

            try:
                result = future.result(timeout=300)  # 5 minutes per PDF
                doc_id_result, success, error = result

                completed_count += 1
                progress = (completed_count / total) * 100

                if success:
                    doc_ids.append(doc_id_result)
                    processed += 1
                    print(f"[{completed_count}/{total}] ({progress:.1f}%) ‚úÖ {doc_id_result}")
                else:
                    errors += 1
                    failed_docs.append((doc_id_result, error))
                    if error == "Already exists":
                        print(f"[{completed_count}/{total}] ({progress:.1f}%) ‚è© {doc_id_result} - Already exists")
                    else:
                        print(f"[{completed_count}/{total}] ({progress:.1f}%) ‚ùå {doc_id_result} - {error[:50]}")

            except Exception as e:
                completed_count += 1
                errors += 1
                failed_docs.append((doc_id, str(e)))
                progress = (completed_count / total) * 100
                print(f"[{completed_count}/{total}] ({progress:.1f}%) ‚è±Ô∏è  {doc_id} - Timeout/Error: {str(e)[:50]}")

    total_time = time.time() - start_time

    # Print summary
    print(f"\n{'='*80}")
    print(f"üìä EXTRACTION SUMMARY")
    print(f"{'='*80}")
    print(f"‚úÖ Successfully processed: {processed}")
    print(f"‚è© Already existed: {len([d for d in failed_docs if 'Already exists' in str(d[1])])}")
    print(f"‚ùå Errors: {errors}")
    print(f"‚è±Ô∏è  Total time: {total_time:.1f}s")
    print(f"‚ö° Speed: {total/total_time:.2f} PDF/s")

    if failed_docs:
        print(f"\n‚ö†Ô∏è  Failed documents:")
        for doc_id, error in failed_docs:
            if error != "Already exists":
                print(f"  ‚Ä¢ {doc_id}: {error[:80]}")

    print(f"{'='*80}\n")

    return {"processed": processed, "errors": errors, "doc_ids": doc_ids, "total_time": total_time}

In [None]:
extract_result = extract_pdfs_debug(pdf_paths, max_workers=7)

### B∆∞·ªõc 3

In [None]:
from pathlib import Path
from src.db.session import get_db_context
from src.models.document import Document, DocumentChunk
from src.services.factories import make_text_chunker
import re

def chunk_documents_from_markdown(markdown_dir: str = 'private_test_output_html'):
    """
    Chunk documents directly from markdown files in a folder.

    Args:
        markdown_dir: Directory containing markdown files to chunk

    Returns:
        Dictionary with chunking statistics
    """
    chunker = make_text_chunker()
    total_chunks = 0
    processed = 0
    chunk_ids = []

    # Get all markdown files
    md_path = Path(markdown_dir)
    md_files = list(md_path.glob('*.md'))

    if not md_files:
        print(f"‚ùå No markdown files found in {markdown_dir}")
        return {"processed": 0, "chunks_created": 0, "chunk_ids": []}

    print(f"‚Ñπ Found {len(md_files)} markdown files to chunk")

    with get_db_context() as db:
        for md_file in md_files:
            doc_id = md_file.stem  # e.g., "Public_427" from "Public_427.md"

            try:
                # Read markdown file
                content = md_file.read_text(encoding='utf-8')

                # Extract title from markdown (first ## header)
                title_match = re.search(r'^##\s+(.+)$', content, re.MULTILINE)
                title = title_match.group(1).strip() if title_match else doc_id

                # Prepare document data
                doc_data = {
                    "title": title,
                    "full_text": content,
                    "sections": [],
                }

                # Chunk the document
                chunks = chunker.chunk_document(doc_data, doc_id)

                if not chunks:
                    print(f"‚ö† No chunks created for document {doc_id}")
                    continue

                # Check if document exists in DB
                document = db.query(Document).filter(Document.doc_id == doc_id).first()

                # Create or use existing document
                if not document:
                    document = Document(
                        doc_id=doc_id,
                        filename=md_file.name,
                        file_path=str(md_file),
                        title=title,
                        full_text=content,
                        raw_content=content,
                        page_count=len(content.split('\n')),  # Approximate
                        sections={},
                        tables={},
                        doc_metadata={"source": "markdown"},
                        source_folder="private_test_output_html",
                        processing_status="completed",
                    )
                    db.add(document)
                    db.flush()  # Get the ID without committing

                # Add chunks to database
                for chunk in chunks:
                    db_chunk = DocumentChunk(
                        chunk_id=chunk["chunk_id"],
                        document_id=document.id,
                        document_file_name=document.doc_id,
                        document_title=document.title,
                        chunk_text=document.title + "\n" + chunk["chunk_text"],
                        chunk_index=chunk["chunk_index"],
                        section_name=chunk.get("section_name", "markdown"),
                        chunk_type=chunk.get("chunk_type", "text"),
                        word_count=chunk.get("word_count", 0),
                        char_count=chunk.get("char_count", 0),
                        chunk_metadata=chunk,
                        embedding_status="pending",
                        indexed_in_opensearch="pending",
                    )
                    db.add(db_chunk)
                    chunk_ids.append(chunk["chunk_id"])

                processed += 1
                total_chunks += len(chunks)
                print(f"‚úÖ Chunked document {doc_id}: {len(chunks)} chunks")

            except Exception as e:
                print(f"‚ùå Error chunking document {doc_id}: {e}")
                continue

        # Commit all changes at the end
        db.commit()

    print(f"\nüìä Summary: processed={processed}, chunks_created={total_chunks}, chunk_ids={len(chunk_ids)}")
    return {"processed": processed, "chunks_created": total_chunks, "chunk_ids": chunk_ids}

In [None]:
chunk_result = chunk_documents_from_markdown()
print(chunk_result)

### TODO: Extract Title c·ªßa file PDF. L√†m sao ƒë·ªÉ 

In [113]:
sections = [{"title":"1. B√ÄN DI CHU ·ªò T","level":3,"content":""},{"title":"C√†i ƒë·∫∑ t B√†n di chu ·ªô t","level":3,"content":"-ƒê·ªÉ ƒëi·ªÅ u ch ·ªânh c√†i ƒë·∫∑ t v√† c ·ª≠ ch ·ªâ c ·ªß a b√†n di chu ·ªô t, ho ·∫∑c ƒë·ªÉ t ·∫Ø t b√†n di chu ·ªô t:\n\n-Nh ·∫≠p touchpad settings (c√†i ƒë·∫∑ t b√†n di chu ·ªô t) v√†o √¥ t√¨m ki ·∫ø m tr√™n thanh t√°c v ·ª• , sau ƒë√≥ nh·∫• n enter.\n\n-Ch ·ªç n m ·ªôt c√†i ƒë·∫∑ t.\n\n"},{"title":"ƒê·ªÉ b ·∫≠ t b√†n di chu ·ªô t","level":3,"content":"-Nh ·∫≠p touchpad settings (c√†i ƒë·∫∑ t b√†n di chu ·ªô t) v√†o √¥ t√¨m ki ·∫ø m tr√™n thanh t√°c v ·ª• , sau ƒë√≥ nh·∫• n enter.\n\n-S ·ª≠ d ·ª• ng chu ·ªô t ngo√†i nh ·∫• p v√†o n√∫t touchpad (b√†n di chu ·ªô t).\n\n-ho ·∫∑ c -\n\nNh ·∫• n ph√≠m Tab nhi ·ªÅ u l ·∫ßn ƒë·∫ø n khi con tr ·ªè n ·∫± m tr√™n n√∫t touchpad (b√†n di chu ·ªô t). Sau ƒë√≥, nh·∫•n ph√≠m c√°ch ƒë·ªÉ ch ·ªç n n√∫t.\n\n"},{"title":"VIETTEL AI RACE M√î T ·∫¢ V√ôNG B√ÄN PH√çM M√ÅY T√çNH","level":3,"content":""},{"title":"HP","level":3,"content":"TD655\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE M√î T ·∫¢ V√ôNG B√ÄN PH√çM M√ÅY T√çNH HP","level":3,"content":"TD655\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"2. ƒê√àN","level":3,"content":""},{"title":"VIETTEL AI RACE M√î T ·∫¢ V√ôNG B√ÄN PH√çM M√ÅY T√çNH HP","level":3,"content":"TD655\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"3. N√öT","level":3,"content":"TD655\n\n"},{"title":"4. PH√çM ƒê·∫∂ C BI ·ªÜ T","level":3,"content":""},{"title":"VIETTEL AI RACE M√î T ·∫¢ V√ôNG B√ÄN PH√çM M√ÅY T√çNH","level":3,"content":""},{"title":"HP","level":3,"content":"ƒë·ªô Ng ·ªß (ch ·ªâ m ·ªô t s ·ªë s ·∫£ n ph ·∫© m).\n\nKhi m√°y t√≠nh ƒëang ·ªü tr ·∫° ng th√°i Ng ·ªß ƒë√¥ng, nh·∫•n nhanh n√∫t n√†y ƒë·ªÉ tho√°t ch ·∫ø ƒë·ªô Ng ·ªß ƒë√¥ng.\n\nQUAN TR ·ªå NG: Nh ·∫• n v√† gi ·ªØ n√∫t ngu ·ªì n s ·∫Ω l√†m m ·∫• t nhfing th√¥ng tin ch∆∞a l∆∞u.\n\nN ·∫ø u m√°y t√≠nh ng ·ª´ ng ph ·∫£ n h ·ªì i v√† quy tr√¨nh t ·∫Ø t m√°y v√¥ hi ·ªá u, nh ·∫• n v√† nh ·∫• n gifi n√∫t ngu ·ªì n trong √≠t nh ·∫•t 10 gi√¢y ƒë·ªÉ t ·∫Ø t m√°y t√≠nh.\n\nƒê·ªÉ t√¨m hi ·ªÉ u th√™m v ·ªÅ c√°c c√†i ƒë·∫∑ t ngu ·ªìn ƒëi·ªá n, h√£y xem c√°c t√πy ch ·ªç n ngu ·ªìn ƒëi·ªá n c ·ªß a b ·∫° n:\n\nNh ·∫• p chu ·ªô t ph ·∫£ i v√†o bi ·ªÉu t∆∞·ª£ ng Power (Ngu ·ªì n) , r ·ªìi sau ƒë√≥ ch ·ªç n Power Options (T√πy ch ·ªç n Ngu ·ªìn ƒëi·ªá n).\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE M√î T ·∫¢ V√ôNG B√ÄN PH√çM M√ÅY T√çNH HP","level":3,"content":"TD655\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE M√î T ·∫¢ V√ôNG B√ÄN PH√çM M√ÅY T√çNH HP","level":3,"content":"TD655\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"5. PH√çM THAO T√ÅC","level":3,"content":"C√°c ph√≠m thao t√°c s ·∫Ω th ·ª± c thi c√°c ch ·ª©c nƒÉng h·ªá th ·ªë ng sfi d ·ª•ng th∆∞·ªùng xuy√™n nh∆∞ ƒë∆∞·ª£c ƒë·ªãnh nghƒ©a b·ªü i c√°c k√Ω hi ·ªá u bi ·ªÉu t∆∞·ª£ ng tr√™n c√°c ph√≠m ch ·ª©c nƒÉng t ·ª´ f1 ƒë·∫ø n f12. C√°c ph√≠m thao t√°c n√†y s ·∫Ω kh√°c nhau t√πy theo m√°y t√≠nh.\n\nƒê·ªÉ s ·ª≠ d ·ª• ng m ·ªô t ph√≠m thao t√°c, nh ·∫• n fn, r ·ªì i nh ·∫• n m ·ªôt trong c√°c ph√≠m ƒë∆∞·ª£ c li ·ªá t k√™ trong b ·∫£ ng sau.\n\nTD655\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE M√î T ·∫¢ V√ôNG B√ÄN PH√çM M√ÅY T√çNH","level":3,"content":""},{"title":"HP","level":3,"content":""}]
print(sections[3]['title'])

sections = [{"title":"Introduction","level":1,"content":"TD004\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"H∆∞·ªõ ng d ·∫´ n t ·ª± l ·∫Øp ƒêi·ªá n M ·∫∑ t Tr ·ªù i","level":3,"content":"Tr∆∞·ªõ c khi l ·∫Øp ƒë·∫∑t, ng∆∞·ªù i  thi  c√¥ng  s ·∫Ω x√°c nh ·∫≠n ƒë·ªã a h√¨nh, v ·ªã tr√≠,  h∆∞·ªõ ng v√† g√≥c nghi√™ng theo b ·∫£ n thi ·∫ø t k ·∫ø v ·ªõi ƒë·ªã a h√¨nh th ·ª± c t ·∫ø m ·ªô t l ·∫ß n n ·ªØa. Sau khi x√°c ƒë·ªã nh v ·ªã tr√≠ l ·∫Øp ƒë·∫∑ t t ·ª´ ng b ·ªô ph ·∫≠ n s ·∫Ω b ·∫Ø t tay v√†o thi c√¥ng.\n\n"},{"title":"1 C√°c b∆∞·ªõ c l ·∫Øp ƒêi·ªá n M ·∫∑ t tr ·ªù i","level":3,"content":""},{"title":"1.1 L ·∫Øp ƒë·∫∑t khung gi√° ƒë·ª° gi√†n pin","level":3,"content":"M ·ª•c ƒë√≠ch: Khung gi√° ƒë·ª° gi ·ªØ c ·ªë ƒë·ªã nh c√°c t ·∫• m pin m ·∫∑ t tr ·ªùi, ƒë·∫£ m b ·∫£o ƒë·ªô nghi√™ng v√† h∆∞·ªõ ng l ·∫Øp ƒë·∫∑ t t ·ªëi ∆∞u ƒë·ªÉ thu ƒë∆∞·ª£c l∆∞·ª£ ng √°nh s√°ng nhi ·ªÅ u nh ·∫• t.\n\n"},{"title":"¬∑ Ph∆∞∆°ng √°n l·∫Øp ƒë·∫∑ t:","level":3,"content":"o √Åp m√°i: L ·∫Ø p tr ·ª± c ti ·∫ø p tr√™n m√°i nh√†, chi ph√≠ th ·∫• p, ti ·∫ø t ki ·ªá m di ·ªá n t√≠ch.\n\no Gi√° ƒë·ª° ƒë·ªô c l ·∫≠ p: L ·∫Ø p tr√™n khung gi√†n b ·∫±ng th√©p/nh√¥m ƒë·∫∑ t ·ªü s√¢n, v∆∞·ªù n ho ·∫∑ c m√°i b ·∫±ng. Ph∆∞∆°ng √°n n√†y t·ªën k√©m h∆°n do ph·∫£ i thi c√¥ng khung, x√† g ·ªì v√† y√™u c ·∫ß u k ·∫ø t c ·∫• u ch ·ªã u t ·∫£ i v ·ªØ ng ch ·∫Ø c, ch ·ªë ng gi√≥ b√£o.\n\nL∆∞u √Ω: C ·∫ß n kh ·∫£o s√°t ƒë·ªãa h√¨nh, ƒë·ªô d ·ªë c m√°i, v ·∫≠ t li ·ªáu m√°i (t√¥n, b√™ t√¥ng, ng√≥i‚Ä¶) ƒë·ªÉ ch ·ªç n ph ·ª• ki ·ªá n li√™n k ·∫ø t ph√π h ·ª£ p.\n\n"},{"title":"1.2 G ·∫Ø n t ·∫•m pin l√™n gi√° ƒë·ª°","level":3,"content":"M ·ª•c ƒë√≠ch: C ·ªë ƒë·ªã nh c√°c t ·∫• m pin v√†o khung, t ·∫°o ƒëi·ªÅ u ki ·ªá n cho vi ·ªá c h ·∫• p th ·ª• √°nh s√°ng m ·∫∑ t tr ·ªù i hi ·ªá u qu ·∫£ .\n\n"},{"title":"¬∑ Kho ·∫£ ng c√°ch k ·ªπ thu ·∫≠ t:","level":3,"content":"o Gi ·ªØ a c√°c t ·∫• m pin: 10mm ƒë·ªÉ tr√°nh gi√£n n ·ªü nhi ·ªá t v√† thu ·∫≠ n ti ·ªá n cho tho√°t n∆∞·ªõ c.\n\no Gi ·ªØ a t ·∫• m pin v√† m√°i: ‚â•100mm ƒë·ªÉ t ·∫°o kh√¥ng gian l∆∞u th√¥ng gi√≥, gi√∫p t·∫£ n nhi ·ªát v√† tƒÉng tu·ªï i th ·ªç pin.\n\nL∆∞u √Ω: S ·ª≠ d ·ª• ng bulong, k ·∫π p pin v√† ph ·ª• ki ·ªán ch√≠nh h√£ng ƒë·ªÉ ƒë·∫£ m b ·∫£ o an to√†n, tr√°nh rung l ·∫Ø c khi c√≥ gi√≥ m ·∫° nh.\n\n"},{"title":"1.3 K ·∫ø t n ·ªë i t ·∫• m pin","level":3,"content":"M ·ª•c ƒë√≠ch: Li√™n k ·∫ø t c√°c t ·∫• m pin th√†nh m ·∫°ch ƒëi·ªán (string) ƒë·ªÉ t ·∫°o ƒëi·ªá n √°p v√† d√≤ng ƒëi·ªá n ph√π h ·ª£ p cho inverter.\n\n"},{"title":"¬∑ C√°ch th ·ª± c hi ·ªá n:","level":3,"content":"o K ·∫ø t n ·ªë i n ·ªë i ti ·∫ø p c√°c t ·∫• m pin b ·∫± ng d√¢y chuy√™n d ·ª• ng (MC4).\n\n1\n\nTD004\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"H∆∞·ªõ ng d ·∫´ n t ·ª± l ·∫Øp ƒêi·ªá n M ·∫∑ t Tr ·ªù i","level":3,"content":"o T√πy theo thi ·∫ø t k ·∫ø , c√°c string s ·∫Ω ƒë∆∞·ª£ c gom v ·ªÅ h ·ªôp combiner box tr∆∞·ªõ c khi ƒë∆∞a v√†o inverter.\n\n"},{"title":"¬∑ L∆∞u √Ω:","level":3,"content":"o B ·ªë tr√≠ string theo h∆∞·ªõng m√°i ƒë·ªÉ gi ·∫£ m t ·ªï n th ·∫• t do che b√≥ng.\n\no D√¢y d ·∫´ n c ·∫ß n c√≥ ti ·∫ø t di ·ªán ƒë·ªß l ·ªõ n, ch ·ªë ng tia UV v√† ch ·ªë ng th ·∫• m.\n\n"},{"title":"1.4 L ·∫Øp ƒë·∫∑ t bi ·∫ø n t ·∫ß n (Inverter)","level":3,"content":"M ·ª•c ƒë√≠ch: Bi ·∫øn ƒë·ªïi d√≤ng ƒëi·ªá n DC t ·ª´ t ·∫• m pin th√†nh AC s ·ª≠ d ·ª• ng cho thi ·∫ø t b ·ªã ƒëi·ªán trong gia ƒë√¨nh/doanh nghi·ªá p.\n\n"},{"title":"¬∑ Y√™u c ·∫ß u v ·ªã tr√≠:","level":3,"content":"o Cao r√°o, tho√°ng m√°t, tr√°nh m∆∞a tr·ª± c ti ·∫ø p, ·∫©m ∆∞·ªõ t, nhi ·ªát ƒë·ªô cao.\n\no C√≥ kho ·∫£ ng tr ·ªëng ƒë·ªÉ d ·ªÖ d√†ng quan s√°t m√†n h√¨nh hi ·ªÉ n th ·ªã v√† thao t√°c khi c ·∫ß n thi ·∫ø t.\n\nL∆∞u √Ω: Tr√°nh l ·∫Ø p g ·∫ß n b ·∫ø p, b√¨nh gas ho ·∫∑ c khu v ·ª±c c√≥ nguy c∆° ch√°y n·ªï .\n\n"},{"title":"1.5 L ·∫Øp ƒë·∫∑ t thi ·∫ø t b ·ªã ch ·ªëng ph√°t ng∆∞·ª£c l∆∞·ªõ i","level":3,"content":"M ·ª•c ƒë√≠ch: ƒê·∫£ m b ·∫£ o h ·ªá th ·ªë ng ch ·ªâ b√°m t ·∫£i v√† kh√¥ng ph√°t ƒëi·ªán ng∆∞·ª£c l√™n l∆∞·ªõ i ƒëi·ªá n qu ·ªë c gia (n ·∫øu kh√¥ng ƒë∆∞·ª£ c ph√©p).\n\n"},{"title":"¬∑ C√°ch th ·ª± c hi ·ªá n:","level":3,"content":"o G ·∫Ø n CT (Current Transformer) ƒë·ªÉ ƒëo d√≤ng ƒëi·ªá n.\n\no K ·∫ø t n ·ªë i Meter ƒë·ªÉ g ·ª≠ i t√≠n hi ·ªá u v ·ªÅ inverter.\n\nL∆∞u √Ω: B∆∞·ªõ c n√†y r ·∫• t quan tr ·ªçng ƒë·ªÉ h ·ªá th ·ªë ng v ·∫≠ n h√†nh an to√†n, tr√°nh vi ph ·∫° m quy ƒë·ªã nh c ·ªßa ng√†nh ƒëi·ªá n.\n\n"},{"title":"1.6 C ·∫• p ngu ·ªìn PV, AC, pin l∆∞u tr·ªØ v ·ªõ i bi ·∫ø n t ·∫ß n","level":3,"content":"M ·ª•c ƒë√≠ch: Kh ·ªüi ƒë·ªô ng h ·ªá th ·ªë ng v√† ki ·ªÉ m tra kh ·∫£ nƒÉng ho·∫°t ƒë·ªô ng c ·ªß a c√°c th√†nh ph ·∫ß n.\n\n"},{"title":"¬∑ Tr√¨nh t ·ª± th ·ª± c hi ·ªá n:","level":3,"content":"ƒê·∫£ m b ·∫£ o t ·∫• t c ·∫£ CB v√† c√¥ng t ·∫Ø c ·ªü tr ·∫° ng th√°i OFF .\n\nB ·∫≠ t c√¥ng t ·∫Ø c ON tr√™n inverter .\n\n2\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"H∆∞·ªõ ng d ·∫´ n t ·ª± l ·∫Øp ƒêi·ªá n M ·∫∑ t Tr ·ªù i","level":3,"content":"TD004\n\nL ·∫ß n ban h√†nh: 1\n\nB ·∫≠ t CB DC (t ·ª´ pin v ·ªÅ inverter), ch ·ªù kho ·∫£ ng 60 gi√¢y ƒë·ªÉ inverter nh ·∫≠ n t√≠n hi ·ªá u.\n\nB ·∫≠ t CB AC (t ·ª´ inverter ra t ·∫£i/l∆∞·ªõ i).\n\nN ·∫øu c√≥ pin l∆∞u tr·ªØ , b ·∫≠ t CB Battery cu ·ªë i c√πng.\n\nL∆∞u √Ω: Lu√¥n ƒë·ªÉ kho ·∫£ ng ngh ·ªâ 1 -2 ph√∫t gi ·ªØ a c√°c l ·∫ß n c ·∫• p ngu ·ªì n ƒë·ªÉ tr√°nh s ·ªë c ƒëi·ªá n cho thi ·∫ø t b ·ªã .\n\n"},{"title":"1.7 K ·∫ø t n ·ªë i Wifi v ·ªõ i bi ·∫ø n t ·∫ß n","level":3,"content":"M ·ª•c ƒë√≠ch: Gi√∫p gi√°m s√°t s ·∫£n l∆∞·ª£ ng v√† t√¨nh tr ·∫° ng ho ·∫°t ƒë·ªô ng c ·ªß a h ·ªá th ·ªë ng qua ƒëi·ªá n tho ·∫° i.\n\n"},{"title":"¬∑ C√°ch th ·ª± c hi ·ªá n:","level":3,"content":"o T√πy theo h√£ng inverter (Huawei, Sungrow, Growatt, Solis‚Ä¶), c√†i ƒë·∫∑ t App chuy√™n d ·ª• ng.\n\no K ·∫ø t n ·ªëi Wifi theo h∆∞·ªõ ng d ·∫´ n t ·ª´ nh√† s ·∫£ n xu ·∫• t.\n\nL∆∞u √Ω: C√°c nh√† ph√¢n ph ·ªëi (nh∆∞ DHC Solar) th∆∞·ªù ng h ·ªó tr ·ª£ k ·ªπ thu ·∫≠ t t ·ª´ xa trong b∆∞·ªõ c n√†y.\n\n"},{"title":"1.8 V ·∫≠ n h√†nh, b ·∫£ o tr√¨","level":3,"content":"Theo d√µi v ·∫≠ n h√†nh:\n\no Qua App, ng∆∞·ªù i d√πng c√≥ th ·ªÉ bi ·∫ø t: s ·∫£n l∆∞·ª£ng ƒëi·ªá n, c√¥ng su ·∫• t ti√™u th ·ª•, ƒëi·ªá n b√π l∆∞·ªõ i, ph ·∫ßn trƒÉm pin l∆∞u tr·ªØ , c ·∫£ nh b√°o l ·ªó i.\n\n"},{"title":"¬∑ B ·∫£o tr√¨ ƒë·ªã nh k ·ª≥ :","level":3,"content":"o V ·ªá sinh b ·ªÅ m ·∫∑ t t ·∫• m pin b ·∫±ng n∆∞·ªõ c s ·∫°ch, khƒÉn m·ªÅ m ho ·∫∑ c d ·ª• ng c ·ª• chuy√™n d ·ª• ng (tr√°nh d√πng h√≥a ch ·∫• t m ·∫° nh).\n\no C ·∫Ø t t ·ªâ a c√¢y c ·ªë i, d ·ªç n l√° r ·ª•ng ƒë·ªÉ kh√¥ng che b√≥ng t ·∫• m pin.\n\no Ki ·ªÉ m tra d√¢y d ·∫´n, CB, inverter ƒë·ªã nh k ·ª≥ 6 -12 th√°ng/l ·∫ß n.\n\n"},{"title":"2 L∆∞u √Ω khi t ·ª± l ·∫Øp ƒëi·ªán nƒÉng l∆∞·ª£ ng m ·∫∑ t tr ·ªù i","level":3,"content":"L∆∞u √Ω 1 : Ch ·ªâ c ·∫ß n m ·ªô t t ·∫• m pin m ·∫∑ t tr ·ªù i b ·ªã b√≥ng r√¢m che b√≥ng c√≥ th ·ªÉ khi ·∫ø n c ·∫£ h ·ªá th ·ªë ng ƒëi·ªá n m ·∫∑ t  tr ·ªù i  gi ·∫£ m hi ·ªá u su ·∫•t.  ƒê·ªì ng th ·ªù i  c ·∫ßn ƒë·∫£ m b ·∫£ o r ·∫± ng n ·∫øu m√πa thay ƒë·ªïi th√¨ c≈©ng kh√¥ng xu ·∫• t hi ·ªán b√≥ng r√¢m trong t∆∞∆°ng lai.\n\n3\n\nTD004\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"H∆∞·ªõ ng d ·∫´ n t ·ª± l ·∫Øp ƒêi·ªá n M ·∫∑ t Tr ·ªù i","level":3,"content":"L ·∫ß n ban h√†nh: 1\n\nL∆∞u √Ω 2 : N ·∫ø u di ·ªá n t√≠ch m√°i r ·ªô ng khi l ·∫Øp ƒë·∫∑ t c√°c t ·∫• m pin m ·∫∑ t tr ·ªù i n√™n l ·∫Ø p 1 v√≠ tr ·ªã nh ·∫• t ƒë·ªã nh, ch ·ª´ a ra m ·ªô t  kho ·∫£ng kh√¥ng gian ƒë√°ng k·ªÉ ƒë·ªÉ c√≥  th ·ªÉ b ·ªï sung th√™m nhi ·ªÅ u t ·∫• m pin nƒÉng l∆∞·ª£ ng m ·∫∑ t tr ·ªùi trong t∆∞∆°ng lai.\n\nL∆∞u √Ω 3 : C ·∫ß n c√≥ l ·ªëi ƒëi l·∫°i ƒë·ªÉ c√≥ th ·ªÉ v ·ªá sinh t ·∫• m pin, b ·∫£ o tr√¨, b ·∫£o d∆∞·ª°ng ƒë·ªã nh k ·ª≥ .\n\nL∆∞u √Ω 4 : ƒê·∫£ m b ·∫£ o c√°c d√¢y d ·∫´ n c ·∫ßn ƒë∆∞·ª£c c√°ch ƒëi·ªá n v√† ch ·ªë ng th ·∫•m. ƒê·ªÉ tr√°nh ƒëi·ªá n gi ·∫≠ t ta c≈©ng c·∫ß n k ·∫ø t n ·ªë i d√¢y ti ·∫øp ƒë·∫• t.\n\nL∆∞u √Ω 5 : Khung gi√° ƒë·ª° t ·∫• m pin theo thi ·∫ø t k ·∫ø k ·ªπ thu ·∫≠ t. Kh√¥ng n√™n t ·ª± ch ·∫ø c√°c lo ·∫° i k ·∫π p, gi√° ƒë·ª° kh√¥ng ƒë√∫ng k·ªπ thu ·∫≠ t.\n\nL∆∞u √Ω 6 : H√£y th ·ª± c hi ·ªán theo c√°c h∆∞·ªõ ng d ·∫´n ƒë∆∞·ª£c ƒëi k√®m thi·∫ø t b ·ªã c ·ªß a nh√† s ·∫£ n xu ·∫• t.\n\n"},{"title":"3 B ·∫£ng so s√°nh ƒêi·ªá n m ·∫∑ t tr ·ªùi v√† ƒêi·ªán l∆∞·ªõi nh√† n∆∞·ªõ c","level":3,"content":"4\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"H∆∞·ªõ ng d ·∫´ n t ·ª± l ·∫Øp ƒêi·ªá n M ·∫∑ t Tr ·ªù i","level":3,"content":"TD004\n\nL ·∫ß n ban h√†nh: 1\n\nTi√™u ch√≠\n\n"},{"title":"ƒêi·ªá n m ·∫∑ t tr ·ªù i","level":3,"content":""},{"title":"ƒêi·ªán l∆∞·ªõi nh√† n∆∞·ªõ c (EVN)","level":3,"content":"ti ·ªá n v√† ·ªïn ƒë·ªãnh h∆°n trong ng·∫Ø n h ·∫° n, song chi ph√≠ ph ·ª• thu ·ªô c v√†o gi√° ƒëi·ªá n v√† √≠t mang t√≠nh ch ·ªß ƒë·ªô ng.\n\n5\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"H∆∞·ªõ ng d ·∫´ n t ·ª± l ·∫Øp ƒêi·ªá n M ·∫∑ t Tr ·ªù i","level":3,"content":"TD004\n\nL ·∫ß n ban h√†nh: 1\n\nTi√™u ch√≠\n\nƒêi·ªá n m ·∫∑ t tr ·ªù i\n\nƒêi·ªán l∆∞·ªõi nh√† n∆∞·ªõ\n\nc (EVN)\n\n6\n\n"}]
print(sections[3]['title'])

sections = [{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\nC·∫§U H√åNH V√Ä TH√îNG S·ªê K·ª∏ THU·∫¨T CHI TI·∫æT C·ª¶A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢C HUY·∫æT ƒê·ªòNG V√Ä M√ÅY H·∫† TH√ÇN NHI·ªÜT CH·ªà HUY\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY","level":3,"content":"TD636\n\nL ·∫ß n ban h√†nh: 1\n\n"}]
print(sections[3]['title'])

# for section in sections:
#     print(f"Original: {section['title']}")

# sections = [{"title":"VIETTEL AI RACE M√ÄN H√åNH BI·ªÇU ƒê·ªí S·∫¢N XU·∫§T C·ª¶A","level":3,"content":"TD662\n\n"},{"title":"M√ÄN H√åNH C·∫¢M ·ª®NG E-MULTI","level":3,"content":"L·∫ßn ban h√†nh: 1\n\n"},{"title":"C√°c th√†nh ph·∫ßn tr√™n m√†n h√¨nh Bi·ªÉu ƒë·ªì s·∫£n xu·∫•t","level":3,"content":""},{"title":"C√°c th√†nh ph·∫ßn tr√™n m√†n h√¨nh","level":3,"content":""},{"title":"M√¥ t·∫£","level":3,"content":"M√†n h√¨nh n√†y hi·ªÉn th·ªã bi·ªÉu ƒë·ªì c·ªßa m·ªôt bi·∫øn ƒë∆∞·ª£c ch·ªçn. T√™n bi·∫øn v√† c√°c gi√° tr·ªã hi·ªán t·∫°i ƒë∆∞·ª£c tr√¨nh b√†y b√™n d∆∞·ªõi bi·ªÉu ƒë·ªì. C√≥ th·ªÉ s·ª≠ d·ª•ng c√°c ch·ª©c nƒÉng sau ƒë√¢y:\n\nƒê·ªãnh nghƒ©a bi·ªÉu ƒë·ªì tham chi·∫øu\n\nHi·ªÉn th·ªã bi·ªÉu ƒë·ªì xu h∆∞·ªõng cu·ªëi c√πng\n\nTheo d√µi b·∫±ng mi·ªÅn dung sai ƒë√£ ch·ªçn ƒêi·ªÉm chuy·ªÉn ti·∫øp (l√† ƒëi·ªÉm m√† t·∫°i ƒë√≥ h·ªá th·ªëng thay ƒë·ªïi t·ª´ √°p su·∫•t phun sang √°p su·∫•t gi·ªØ) hi·ªÉn th·ªã d∆∞·ªõi d·∫°ng ƒë∆∞·ªùng d·ªçc m√†u ng·ªçc lam. Ph·∫°m vi ƒëi·ªÉm chuy·ªÉn ti·∫øp hi·ªÉn th·ªã d∆∞·ªõi d·∫°ng thanh m√†u tr·∫Øng ·ªü c·∫°nh tr√™n c·ªßa s∆° ƒë·ªì.\n\nTrong quy tr√¨nh thi·∫øt l·∫≠p ƒë√∫ng c√°ch, thanh n√†y s·∫Ω r·∫•t h·∫πp. Gi√° tr·ªã trung b√¨nh c·ªßa t·∫•t c·∫£ c√°c ƒëi·ªÉm chuy·ªÉn ti·∫øp hi·ªÉn th·ªã d∆∞·ªõi d·∫°ng ƒë∆∞·ªùng m√†u ƒëen trong thanh m√†u tr·∫Øng. C√≥ th·ªÉ ƒë·∫∑t ch·∫ø ƒë·ªô hi·ªÉn th·ªã l√†:\n\nTh·ªùi gian (bi·ªÉu ƒë·ªì y/t)\n\nV·ªã tr√≠ (bi·ªÉu ƒë·ªì y/x)\n\nPh√¢n t√°ch (d·∫°ng h·ªón h·ª£p, c·∫£ hai lo·∫°i bi·ªÉu ƒë·ªì)\n\nTD662\n\nL·∫ßn ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE M√ÄN H√åNH BI·ªÇU ƒê·ªí S·∫¢N XU·∫§T C·ª¶A","level":3,"content":""},{"title":"M√ÄN H√åNH C·∫¢M ·ª®NG E-MULTI","level":3,"content":""},{"title":"¬∑ C√°c n√∫t menu ph√≠a d∆∞·ªõi","level":3,"content":""},{"title":"VIETTEL AI RACE M√ÄN H√åNH BI·ªÇU ƒê·ªí S·∫¢N XU·∫§T C·ª¶A M√ÄN H√åNH C·∫¢M ·ª®NG E-MULTI","level":3,"content":"TD662\n\nL·∫ßn ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE M√ÄN H√åNH BI·ªÇU ƒê·ªí S·∫¢N XU·∫§T C·ª¶A M√ÄN H√åNH C·∫¢M ·ª®NG E-MULTI","level":3,"content":"TD662\n\nL·∫ßn ban h√†nh: 1\n\nTD662\n\nL·∫ßn ban h√†nh: 1\n\n"},{"title":"VIETTEL AI RACE","level":3,"content":""},{"title":"M√ÄN H√åNH BI·ªÇU ƒê·ªí S·∫¢N XU·∫§T C·ª¶A M√ÄN H√åNH C·∫¢M ·ª®NG E-MULTI","level":3,"content":""}]
# for section in sections:
#     print(f"Original: {section['title']}")

VIETTEL AI RACE M√î T ·∫¢ V√ôNG B√ÄN PH√çM M√ÅY T√çNH
1 C√°c b∆∞·ªõ c l ·∫Øp ƒêi·ªá n M ·∫∑ t tr ·ªù i
C ·∫§ U H√åNH V√Ä TH√îNG S ·ªê K ·ª∏ THU ·∫¨ T CHI TI ·∫æ T C ·ª¶ A M√ÅY SI√äU √ÇM ICU ƒêO ƒê∆Ø·ª¢ C HUY ·∫æT ƒê·ªò NG V√Ä M√ÅY H ·∫† TH√ÇN NHI ·ªÜ T CH ·ªà HUY


### B∆∞·ªõc 4

In [None]:
import os
from opensearchpy import OpenSearch
from opensearchpy.helpers import bulk
from src.services.factories import make_embeddings_service

# --- L·∫•y th√¥ng s·ªë t·ª´ m√¥i tr∆∞·ªùng ---
OPENSEARCH_URL = os.environ.get("OPENSEARCH_URL", "http://localhost:9200")
OPENSEARCH_INDEX = os.environ.get("OPENSEARCH_INDEX", "mcq-documents")
EMBEDDING_DIM = int(os.environ.get("EMBEDDING_DIM", 768))

# --- T·∫°o client OpenSearch ---
client = OpenSearch(OPENSEARCH_URL)

# --- X√≥a index c≈© n·∫øu c√≥ ---
client.indices.delete(index=OPENSEARCH_INDEX, ignore=[400, 404])

# --- T·∫°o index m·ªõi v·ªõi mapping ƒë√∫ng cho OpenSearch k-NN ---
client.indices.create(
    index=OPENSEARCH_INDEX,
    body={
        "settings": {
            "index": {
                "knn": True,  # B·∫≠t k-NN plugin
                "knn.algo_param.ef_search": 100
            }
        },
        "mappings": {
            "properties": {
                "chunk_id": {"type": "keyword"},
                "document_id": {"type": "keyword"},
                "chunk_text": {"type": "text"},
                "embedding": {
                    "type": "knn_vector",  # ƒê·ªïi t·ª´ dense_vector sang knn_vector
                    "dimension": EMBEDDING_DIM,  # ƒê·ªïi t·ª´ dims sang dimension
                    "method": {
                        "name": "hnsw",
                        "space_type": "l2",
                        "engine": "nmslib",
                        "parameters": {
                            "ef_construction": 128,
                            "m": 24
                        }
                    }
                },
                "chunk_index": {"type": "integer"},
                "section_name": {"type": "keyword"},
                "chunk_type": {"type": "keyword"},
            }
        }
    }
)

print(f"‚úÖ Index '{OPENSEARCH_INDEX}' created successfully!")

In [None]:
# --- H√†m generate embeddings v√† bulk index ---
async def generate_embeddings_and_index(chunk_ids: list):
    embeddings_service = make_embeddings_service()

    with get_db_context() as db:
        chunks = db.query(DocumentChunk).filter(DocumentChunk.chunk_id.in_(chunk_ids)).all()
        if not chunks:
            print("No chunks found")
            return {"processed": 0, "indexed": 0}

        texts = [chunk.chunk_text for chunk in chunks]
        embeddings = await embeddings_service.embed_texts(texts)

        bulk_ops = []
        for chunk, embedding in zip(chunks, embeddings):
            chunk.embedding_status = "completed"
            chunk.embedding = embedding.tolist()
            chunk.chunk_metadata = chunk.chunk_metadata or {}
            chunk.chunk_metadata["embedding"] = embedding.tolist()

            # ‚úÖ Access via relationship (already loaded)
            doc = chunk.document  # No extra query needed!

            bulk_ops.append({
                "_op_type": "index",
                "_index": OPENSEARCH_INDEX,
                "_id": chunk.chunk_id,
                "_source": {
                    "chunk_id": chunk.chunk_id,
                    "document_id": chunk.document_id,
                    "chunk_index": chunk.chunk_index,
                    "chunk_text": chunk.chunk_text,
                    "section_name": chunk.section_name,
                    "chunk_type": chunk.chunk_type,
                    "embedding": chunk.embedding,
                    "document_file_name": doc.filename if doc else None,
                    "document_title": doc.title if doc else None
                }
            })

        db.commit()

        success, _ = bulk(client, bulk_ops, refresh=True)
        print(f"‚úÖ Indexed {success}/{len(chunks)} chunks")

    return {"processed": len(chunks), "indexed": success}

In [None]:
embed_index_result = await generate_embeddings_and_index(chunk_result["chunk_ids"])
print(embed_index_result)

### B∆∞·ªõc 6

In [None]:
"M·ª•c ƒë√≠ch c·ªßa t·ª´ ""lu√¥n lu√¥n"" v√† ""n√†o ƒë√≥"", theo danh s√°ch th·∫©m ƒë·ªãnh thu·∫≠t ng·ªØ trong t√†i li·ªáu Public_061, l√† h√†m √Ω t√≠nh kh√¥ng m∆° h·ªì v√† t√≠nh ki·ªÉm ch·ª©ng c·ªßa ƒë·∫∑c t·∫£?","T√≠nh nh·∫≠p nh·∫±ng, t√≠nh thi·∫øu ƒë·ªãnh l∆∞·ª£ng","T√≠nh tuy·ªát ƒë·ªëi, t√≠nh thuy·∫øt ph·ª•c","T√≠nh tuy·ªát ƒë·ªëi, t√≠nh nh·∫≠p nh·∫±ng","T√≠nh thuy·∫øt ph·ª•c, t√≠nh nh·∫≠p nh·∫±ng"

In [None]:
query="""
ƒê√¢u kh√¥ng ph·∫£i nguy√™n t·∫Øc c·ªët l√µi c·ªßa kh·∫£o s√°t ƒë·∫∑c t·∫£ m·ª©c cao theo t√†i li·ªáu Public_061?
- H√£y l√† kh√°ch h√†ng c·ªßa s·∫£n ph·∫©m",
- H√£y nghi√™n c·ª©u c√°c chu·∫©n v√† h∆∞·ªõng d·∫´n hi·ªán h√†nh",
- H√£y xem x√©t v√† ki·ªÉm th·ª≠ c√°c ph·∫ßn m·ªÅm t∆∞∆°ng t·ª±",
- H√£y ph·ªëi h·ª£p v·ªõi ƒë·ªôi nh√≥m v√† kh√°ch h√†ng"
"""

In [None]:
query="""
M·ª•c ƒë√≠ch c·ªßa t·ª´ ""lu√¥n lu√¥n"" v√† ""n√†o ƒë√≥"", theo danh s√°ch th·∫©m ƒë·ªãnh thu·∫≠t ng·ªØ trong t√†i li·ªáu Public_061, l√† h√†m √Ω t√≠nh kh√¥ng m∆° h·ªì v√† t√≠nh ki·ªÉm ch·ª©ng c·ªßa ƒë·∫∑c t·∫£?
- T√≠nh nh·∫≠p nh·∫±ng, t√≠nh thi·∫øu ƒë·ªãnh l∆∞·ª£ng
- T√≠nh tuy·ªát ƒë·ªëi, t√≠nh thuy·∫øt ph·ª•c
- T√≠nh tuy·ªát ƒë·ªëi, t√≠nh nh·∫≠p nh·∫±ng
- T√≠nh thuy·∫øt ph·ª•c, t√≠nh nh·∫≠p nh·∫±ng
"""

In [None]:
query="""
M·ªôt trong nh·ªØng ƒë·∫∑c ƒëi·ªÉm khi·∫øn Logic bomb nguy hi·ªÉm l√† g√¨?
- Lu√¥n xu·∫•t hi·ªán d∆∞·ªõi d·∫°ng file th·ª±c thi .exe
- D·ªÖ d√†ng ph√°t hi·ªán b·∫±ng m·∫Øt th∆∞·ªùng
- C√≥ kh·∫£ nƒÉng ·∫©n trong m√£ ngu·ªìn ph·∫ßn m·ªÅm h·ª£p ph√°p
- Kh√¥ng th·ªÉ k√≠ch ho·∫°t b·∫±ng s·ª± ki·ªán ng∆∞·ªùi d√πng
"""

In [None]:
import requests
url = "http://localhost:8000/api/v1/search/"
payload = {
    "query": query,
    "top_k": 30,
    "use_hybrid": True
}
r = requests.post(url, json=payload, timeout=30)
print(r.status_code)

In [None]:
from pprint import pprint
pprint(r.json())

In [None]:
# /Users/dohainam/test_rag_ask_api.py
import requests
import json
import os
from dotenv import load_dotenv

# 1. T·∫£i bi·∫øn m√¥i tr∆∞·ªùng (n·∫øu c√≥)
load_dotenv()

# C·∫•u h√¨nh API c·ªßa b·∫°n
# ƒê·∫£m b·∫£o bi·∫øn m√¥i tr∆∞·ªùng API_BASE_URL ƒë∆∞·ª£c ƒë·∫∑t, ho·∫∑c s·ª≠ d·ª•ng m·∫∑c ƒë·ªãnh
API_BASE_URL = os.getenv("API_BASE_URL", "http://localhost:8000")
ASK_API_ENDPOINT = f"{API_BASE_URL}/api/v1/ask"
TIMEOUT = 300 # Timeout t√≠nh b·∫±ng gi√¢y, tƒÉng n·∫øu LLM m·∫•t nhi·ªÅu th·ªùi gian

print(f"Ki·ªÉm tra API RAG t·∫°i: {ASK_API_ENDPOINT}\n")

# 2. Chu·∫©n b·ªã c√¢u h·ªèi m·∫´u
# THAY ƒê·ªîI C√ÇU H·ªéI, T√ôY CH·ªåN V√Ä source_folder (n·∫øu c√≥) CHO PH√ô H·ª¢P V·ªöI D·ªÆ LI·ªÜU C·ª¶A B·∫†N
question_to_ask = "ƒê√¢u kh√¥ng ph·∫£i nguy√™n t·∫Øc c·ªët l√µi c·ªßa kh·∫£o s√°t ƒë·∫∑c t·∫£ m·ª©c cao theo t√†i li·ªáu Public_061?"
options_for_question = {
    "A": "H√£y l√† kh√°ch h√†ng c·ªßa s·∫£n ph·∫©m",
    "B": "H√£y nghi√™n c·ª©u c√°c chu·∫©n v√† h∆∞·ªõng d·∫´n hi·ªán h√†nh",
    "C": "H√£y xem x√©t v√† ki·ªÉm th·ª≠ c√°c ph·∫ßn m·ªÅm t∆∞∆°ng t·ª±",
    "D": "H√£y ph·ªëi h·ª£p v·ªõi ƒë·ªôi nh√≥m v√† kh√°ch h√†ng"
}
# N·∫øu b·∫°n mu·ªën l·ªçc theo source_folder, h√£y b·ªè comment d√≤ng d∆∞·ªõi v√† thay ƒë·ªïi gi√° tr·ªã
# source_folder_filter = "my_research_papers" # THAY ƒê·ªîI TH√ÄNH source_folder C·ª¶A B·∫†N

payload = {
    "question": question_to_ask,
    "options": options_for_question,
    "top_k": 5,           # S·ªë l∆∞·ª£ng chunks ƒë·ªÉ l·∫•y v·ªÅ t·ª´ OpenSearch
    "use_hybrid": True    # S·ª≠ d·ª•ng t√¨m ki·∫øm k·∫øt h·ª£p (BM25 + Vector + RRF)
}

# Th√™m source_folder n·∫øu ƒë∆∞·ª£c ƒë·ªãnh nghƒ©a
# if 'source_folder_filter' in locals():
#     payload['source_folder'] = source_folder_filter

print("Payload g·ª≠i ƒëi:")
pprint(json.dumps(payload, indent=2, ensure_ascii=False)) # ensure_ascii=False ƒë·ªÉ hi·ªÉn th·ªã ti·∫øng Vi·ªát

# 3. G·ª≠i y√™u c·∫ßu ƒë·∫øn API
try:
    pprint(f"\nƒêang g·ª≠i y√™u c·∫ßu t·ªõi {ASK_API_ENDPOINT}...")
    response = requests.post(ASK_API_ENDPOINT, json=payload, timeout=TIMEOUT)
    response.raise_for_status() # N√©m ngo·∫°i l·ªá cho c√°c m√£ tr·∫°ng th√°i l·ªói (4xx ho·∫∑c 5xx)

    result = response.json()

    # 4. Hi·ªÉn th·ªã k·∫øt qu·∫£
    pprint("\n--- K·∫æT QU·∫¢ T·ª™ RAG API ---")
    pprint(f"C√¢u h·ªèi: {result.get('question', 'N/A')}")
    pprint(f"T√πy ch·ªçn ƒë√£ h·ªèi: {result.get('options', 'N/A')}")
    pprint(f"ƒê√°p √°n d·ª± ƒëo√°n: {result.get('predicted_option', 'N/A')}")
    pprint(f"VƒÉn b·∫£n tr·∫£ l·ªùi: {result.get('answer_text', 'N/A')}")
    pprint(f"L√Ω do: {result.get('reasoning', 'N/A')}")
    pprint(f"ƒê·ªô tin c·∫≠y: {result.get('confidence', 'N/A')}")
    pprint(f"Ch·∫ø ƒë·ªô t√¨m ki·∫øm: {result.get('search_mode', 'N/A')}")
    pprint(f"M√¥ h√¨nh LLM: {result.get('model', 'N/A')}")
    pprint(f"T·ª´ b·ªô nh·ªõ ƒë·ªám: {result.get('from_cache', False)}")
    pprint(f"Th·ªùi gian: {result.get('timing', {}).get('total_ms', 'N/A')}ms")

    sources = result.get('sources', [])
    if sources:
        print("\n--- NGU·ªíN T√ÄI LI·ªÜU (Sources) ---")
        for i, source in enumerate(sources):
            pprint(f"  Source {i+1}:")
            pprint(f"    Document ID: {source.get('document_id', 'N/A')}")
            pprint(f"    Chunk ID: {source.get('chunk_id', 'N/A')}")
            pprint(f"    Section: {source.get('section_name', 'N/A')}")
            pprint(f"    Score: {source.get('score', 'N/A'):.4f}")
            pprint(f"    Preview: {source.get('preview', 'N/A')[:150]}...") # Gi·ªõi h·∫°n ƒë·ªô d√†i preview
            pprint("-" * 20)
    else:
        print("\n‚ö†Ô∏è Kh√¥ng c√≥ ngu·ªìn t√†i li·ªáu n√†o ƒë∆∞·ª£c tr·∫£ v·ªÅ.")

except requests.exceptions.RequestException as e:
    print(f"\n‚ùå L·ªói khi g·ªçi API: {e}")
    if response is not None:
        print(f"M√£ tr·∫°ng th√°i HTTP: {response.status_code}")
        print(f"Ph·∫£n h·ªìi l·ªói t·ª´ server: {response.text}")
except json.JSONDecodeError:
    print(f"\n‚ùå L·ªói khi ph√¢n t√≠ch ph·∫£n h·ªìi JSON. Ph·∫£n h·ªìi th√¥:\n{response.text}")
except Exception as e:
    print(f"\n‚ùå ƒê√£ x·∫£y ra l·ªói kh√¥ng mong mu·ªën: {e}")