In [6]:
import gradio as gr
from unsloth import FastLanguageModel
import torch
from transformers import TextIteratorStreamer
import threading

# Cấu hình
max_seq_length = 512
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
     model_name=r"D:\Folder_HocTap\Đồ án tốt nghiệp\Code\Thesis_FineTune_MoE_ChatBotDental\Model\Model_demo_40K\DentalGPT_demo_40k",
     max_seq_length=max_seq_length,
     dtype=dtype,
     load_in_4bit=load_in_4bit,
 )
FastLanguageModel.for_inference(model)

system_prompt = (
    "Hãy là một chuyên gia về nha khoa đã có nhiều năm kinh nghiệm và có thể trả lời mọi câu hỏi một cách dễ hiểu "
    "cho người Việt Nam từ chuyên sâu đến đơn giản, dễ hiểu. Hãy trả lời câu hỏi dưới đây một cách đơn giản, đầy đủ, "
    "dễ hiểu, đúng trọng tâm và đúng ngữ cảnh bằng tiếng Việt:"
)

def build_prompt_cot(user_input, cot_goal, cot_reasoning, cot_justification):
    return (
        f"<|user|>\n{system_prompt}\n\n"
        f"<|thought|>\nCâu hỏi: {user_input}\n"
        f"Mục tiêu: {cot_goal}\n"
        f"Bước: {cot_reasoning}\n"
        f"Suy luận: {cot_justification}\n"
    )

def process_stream(stream_text):
    if "<|assistant|>" in stream_text:
        assistant_part = stream_text.split("<|assistant|>")[1].strip()
        if assistant_part.lower().startswith("câu trả lời:"):
            assistant_part = assistant_part[len("câu trả lời:"):].strip()
        return assistant_part
    return None

def chatbot_stream(user_input, cot_goal, cot_reasoning, cot_justification):
    prompt = build_prompt_cot(user_input, cot_goal, cot_reasoning, cot_justification)
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
    generation_kwargs = dict(
        **inputs,
        streamer=streamer,
        do_sample=True,
        temperature=0.4,
        top_p=0.85,
        repetition_penalty=1.25,
        pad_token_id=tokenizer.eos_token_id,
    )

    thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
    thread.start()

    partial_text = ""
    assistant_response = ""
    for new_text in streamer:
        partial_text += new_text
        processed = process_stream(partial_text)
        if processed is not None:
            assistant_response = processed
            yield partial_text, assistant_response
        else:
            yield partial_text, ""

# ---------------- GIAO DIỆN GRADIO -------------------
with gr.Blocks(
    css="""
    @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
    * { font-family: 'Inter', sans-serif; }
    body {
        background-color: #f5f7fa;
    }
    .chatbot-box {
        border-radius: 18px;
        overflow: hidden;
        box-shadow: 0 4px 20px rgba(0,0,0,0.1);
    }
    .user-message {
        background: linear-gradient(135deg, #4f46e5, #8b5cf6);
        color: white;
        border-radius: 18px 18px 4px 18px;
        padding: 12px;
    }
    .bot-message {
        background-color: white;
        border-radius: 18px 18px 18px 4px;
        padding: 12px;
        box-shadow: 0 2px 8px rgba(0, 0, 0, 0.05);
    }
    .send-btn {
        background: linear-gradient(135deg, #4f46e5, #8b5cf6);
        color: white;
        border-radius: 9999px;
        padding: 8px 16px;
        font-weight: 600;
        transition: transform 0.2s ease;
    }
    .send-btn:hover {
        transform: scale(1.05);
    }
    """
) as demo:

    gr.HTML("""
    <header class="bg-white shadow-sm py-4 px-6 rounded-lg mb-4 flex items-center justify-between">
        <div class="flex items-center space-x-3">
            <div class="tooth-icon bg-indigo-600 w-10 h-10 rounded-lg flex items-center justify-center text-white">
                <i class="fas fa-tooth"></i>
            </div>
            <div>
                <h1 class="text-xl font-bold text-gray-800">DentalGPT</h1>
                <p class="text-xs text-gray-500">Trợ lý Nha khoa Thông minh</p>
            </div>
        </div>
    </header>
    """)

    chatbot = gr.Chatbot(label="", show_copy_button=True, elem_classes="chatbot-box", height=500)

    with gr.Row():
        with gr.Column(scale=4):
            user_input = gr.Textbox(label="", placeholder="Nhập câu hỏi nha khoa tại đây...", lines=3)
        with gr.Column(scale=1):
            submit_btn = gr.Button("Gửi 🚀", elem_classes="send-btn")

    cot_goal = gr.Textbox(visible=False)
    cot_reasoning = gr.Textbox(visible=False)
    cot_justification = gr.Textbox(visible=False)

    with gr.Accordion("🧠 Chi tiết suy luận của AI", open=False):
        reasoning_output = gr.Textbox(label="Luồng suy luận", lines=8, interactive=False)
        final_output = gr.Textbox(label="Kết quả cuối cùng", lines=6, interactive=False)

    def respond(user_input, cot_goal, cot_reasoning, cot_justification, history):
        partial, answer = "", ""
        for partial, answer in chatbot_stream(user_input, cot_goal, cot_reasoning, cot_justification):
            yield history + [(user_input, answer)], partial, answer

    submit_btn.click(
        respond,
        inputs=[user_input, cot_goal, cot_reasoning, cot_justification, chatbot],
        outputs=[chatbot, reasoning_output, final_output]
    )

demo.launch(share=True)

ModuleNotFoundError: No module named 'triton'

In [None]:
!pip install h2ogpte==1.6.27
sk-fghVzpccmHmCu4hM1stYXP3LM1nzggIQCziKW4FiYjCE2gkU # một bộ
sk-GrUWAITYvE3nQ1d1ANrKF7mJ9GDSsKYOif4AfvYFA53MIbED # toàn bộ

In [27]:
from h2ogpte import H2OGPTE

client = H2OGPTE(
    address='https://h2ogpte.genai.h2o.ai',
    api_key='sk-fghVzpccmHmCu4hM1stYXP3LM1nzggIQCziKW4FiYjCE2gkU',
)

# Automatically connects to the collection from the
# collection-specific API key
chat_session_id = client.create_chat_session_on_default_collection()

# Query the collection
with client.connect(chat_session_id) as session:
    reply = session.query(
        """Triệu chứng liệt nửa người thì tắc MCA bao nhiêu %""",
        timeout=60,
    )
    print(reply.content)

# Summarize each document
default_collection = client.get_default_collection()
documents = client.list_documents_in_collection(default_collection.id, offset=0, limit=99)
documents

According to the provided document text, there is no specific information regarding the percentage of MCA (Middle Cerebral Artery) occlusion that leads to hemiplegia (liệt nửa người). The document primarily contains data related to a dental dataset, including questions, goals, reasoning, justifications, and answers related to dental research. Therefore, the requested information about MCA occlusion and hemiplegia is not available in the given context.


[DocumentInfo(id='f46fbb85-c2a8-45d3-8be6-f1d14015b624', username='caovu9523@gmail.com', name='Dental_CoT_dataset.pdf', type='PDF', size=20896, page_count=1, guardrails_settings=None, connector='Upload', uri=None, original_type='Excel', meta_data_dict={'ocr_model': 'Automatic'}, status='completed', updated_at=datetime.datetime(2025, 4, 25, 7, 40, 15, 123214, tzinfo=TzInfo(UTC)), user_source_file=None, page_ocr_model_dict=None, page_layout_dict=None)]

In [None]:
from h2ogpte import H2OGPTE

client = H2OGPTE(
    address='https://h2ogpte.genai.h2o.ai',
    api_key='sk-GrUWAITYvE3nQ1d1ANrKF7mJ9GDSsKYOif4AfvYFA53MIbED',
)

# Create a new collection
collection_id = client.create_collection(
    name='927fd1bf-e41b-46b4-b91c-a98337d32436',
    description='',
)

# Create documents
# Note: Done for demonstration purposes only (not usually needed)
with open('dunder_mifflin.txt', 'w') as f:
    f.write('There were 55 paper clips shipped, 22 to Scranton and 33 to Filmer.')

with open('initech.txt', 'w') as f:
    f.write('David Brent did not sign any contract with Initech.')

# Upload documents
# Many file types are supported: text/image/audio documents and archives
with open('dunder_mifflin.txt', 'rb') as f:
    dunder_mifflin = client.upload('Dunder Mifflin.txt', f)

with open('initech.txt', 'rb') as f:
    initech = client.upload('IniTech.txt', f)

# Ingest documents (Creates previews, chunks and embeddings)
client.ingest_uploads(collection_id, [dunder_mifflin, initech])

# Create a chat session
chat_session_id = client.create_chat_session(collection_id)

# Query the collection
with client.connect(chat_session_id) as session:
    reply = session.query(
        'How many paper clips were shipped to Scranton?',
        timeout=60,
    )
    print(reply.content)

    reply = session.query(
        'Did David Brent co-sign the contract with Initech?',
        timeout=60,
    )
    print(reply.content)

    # In case have multiple LLMs, route to LLM with best
    # price/performance below given max cost
    reply = session.query(
        'Did David Brent co-sign the contract with Initech?',
        llm='auto',
        llm_args=dict(cost_controls=dict(max_cost=1e-2)),
        timeout=60,
    )
    print(reply.content)

    # Classification
    reply = session.query(
        'Did David Brent co-sign the contract with Initech?',
        llm_args=dict(
            guided_choice=['yes', 'no', 'unclear'],
        ),
        timeout=60,
    )
    print(reply.content)

    # Create custom JSON
    reply = session.query(
        'How many paper clips were shipped to Scranton?',
        llm_args=dict(
            response_format='json_object',
            guided_json={
                '$schema': 'http://json-schema.org/draft-07/schema#',
                'type': 'object',
                'properties': {'count': {'type': 'integer'}},
                'required': [
                    'count',
                ],
            },
        ),
        timeout=60,
    )
    print(reply.content)

    # Force multimodal vision mode (requires vision-capable LLMs)
    reply = session.query(
        'How many paper clips were shipped to Scranton?',
        llm_args=dict(
            enable_vision='on',
        ),
        timeout=60,
    )
    print(reply.content)

# Summarize each document
documents = client.list_documents_in_collection(collection_id, offset=0, limit=99)
for doc in documents:
    summary = client.process_document(
        document_id=doc.id,
        pre_prompt_summary='Pay attention to the following text in order to summarize.',
        prompt_summary='Write a concise summary from the text above.',
        timeout=60,
    )
    print(summary.content)

# Chat with LLM without a collection
chat_session_id = client.create_chat_session()

with client.connect(chat_session_id) as session:
    reply = session.query(
        'Why is drinking water good for you?',
        timeout=60,
    )
    print(reply.content)

According to the information provided in the documents, there is no relevant data or context related to "KẾT QUẢ ĐIỀU TRỊ PHẨU THUẬT MỞ SỌ GIẢI ÉP BỆNH NH]NNHỒI MÁU N^O DO TẮC ĐỘNG MẠCH NhOTẠI BỆNH VIỆN ĐẠI HỌC Y H[ NỘI." The documents only contain information about David Brent not signing a contract with Initech and the shipment of paper clips to Scranton and Filmer by Dunder Mifflin.
According to the document titled "IniTech.pdf (516ef04d-c967-4e18-938c-4a8490565224)," David Brent did not sign any contract with Initech.
According to the document titled "IniTech.pdf (516ef04d-c967-4e18-938c-4a8490565224)," David Brent did not sign any contract with Initech.
{"count": 2}
According to the information provided in the documents and images, 22 paper clips were shipped to Scranton.
A total of 55 paper clips were shipped, with 22 sent to Scranton and 33 to Filmer.
David Brent did not sign a contract with Initech.
Drinking water is essential for maintaining good health and overall well-being.

In [16]:
from h2ogpte import H2OGPTE

client = H2OGPTE(
    address='https://h2ogpte.genai.h2o.ai',
    api_key='sk-GrUWAITYvE3nQ1d1ANrKF7mJ9GDSsKYOif4AfvYFA53MIbED',
)

# Create a new collection
collection_id = client.create_collection(
    name='927fd1bf-e41b-46b4-b91c-a98337d32436',
    description='',
)

In [None]:
# Create a chat session
chat_session_id = client.create_chat_session(collection_id)

# Query the collection
with client.connect(chat_session_id) as session:
    reply = session.query(
        'Triệu chứng liệt nửa người thì tắc MCA bao nhiêu %',
        timeout=60,
    )
    print(reply.content)

Chen B, Sun Y, Wei Z, et al có thể là các tác giả của một bài báo khoa học hoặc nghiên cứu nào đó. Trong các tài liệu khoa học, tên của các tác giả thường được liệt kê theo thứ tự đóng góp của họ vào công trình nghiên cứu. Để biết thêm chi tiết về những người này và công trình nghiên cứu cụ thể mà họ đã thực hiện, bạn cần cung cấp thêm thông tin về ngữ cảnh hoặc tiêu đề của bài báo, nghiên cứu hoặc lĩnh vực mà họ đang làm việc.


In [25]:
documents = client.list_documents_in_collection(collection_id, offset=0, limit=99)
for doc in documents:
    summary = client.process_document(
        document_id=doc.id,
        pre_prompt_summary='Triệu chứng liệt nửa người thì tắc MCA bao nhiêu %',
        prompt_summary='Triệu chứng liệt nửa người thì tắc MCA bao nhiêu %',
        timeout=60,
    )
    print(summary.content)