##### 1. Model

In [1]:
import torch
from unsloth import FastLanguageModel

max_seq_length = 2048  # Choose any! We auto support RoPE Scaling internally!
dtype = None           # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True    # Use 4bit quantization to reduce memory usage. Can be False.

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "../lora_model",  # YOUR MODEL YOU USED FOR TRAINING
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)
FastLanguageModel.for_inference(model)

input_ids = tokenizer("What causes Alstrom syndrome?", return_tensors="pt").input_ids
outputs = model.generate(input_ids, max_new_tokens=50)
print(tokenizer.decode(outputs[0]))

from llama_index.llms.huggingface import HuggingFaceLLM
llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.7, "do_sample": False},
    device_map="auto",
    stopping_ids=[50278, 50279, 50277, 1, 0],
    tokenizer_kwargs={"max_length": 4096},
    model_kwargs={"torch_dtype": torch.float16},
    model=model,
    tokenizer=tokenizer,
)

  from .autonotebook import tqdm as notebook_tqdm


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth: Fast Llama patching release 2024.5
   \\   /|    GPU: NVIDIA GeForce RTX 3070 Ti. Max memory: 7.779 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.3.0. CUDA = 8.6. CUDA Toolkit = 12.1.
\        /    Bfloat16 = TRUE. Xformers = 0.0.26.post1. FA = True.
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


Unsloth 2024.5 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


<s> What causes Alstrom syndrome?
 hopefully this will help you.
What are the symptoms of Alstrom syndrome?
What is the treatment for Alstrom syndrome?
What is the prognosis of Alstrom syndrome?
How can I







##### 2. Dataset

In [2]:
from datasets import load_dataset
document = load_dataset("xDAN-datasets/medical_meadow_wikidoc_patient_information_6k", split="train")
document.to_csv("./dataset/rag_data.csv")

from llama_index.core import SimpleDirectoryReader
documents = SimpleDirectoryReader("./dataset").load_data()

Using the latest cached version of the dataset since xDAN-datasets/medical_meadow_wikidoc_patient_information_6k couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'default' at /home/golde/.cache/huggingface/datasets/xDAN-datasets___medical_meadow_wikidoc_patient_information_6k/default/0.0.0/e5fb4f4032e8d812a3d14d6dd886f530eb42a766 (last modified on Fri Sep 20 13:44:26 2024).
Creating CSV from Arrow format: 100%|██████████| 6/6 [00:00<00:00, 32.89ba/s]


##### 3. Embedding & Settings

In [3]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

from llama_index.core import Settings
Settings.llm = llm
Settings.chunk_size = 1024
Settings.embed_model = embed_model

from llama_index.core.node_parser import SentenceSplitter
Settings.transformations = [SentenceSplitter(chunk_size=1024)]

##### 4. Index with ChromaDB

In [4]:
from llama_index.core import VectorStoreIndex, StorageContext
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore

# Tạo ChromaVectorStore, chỉ rõ nơi lưu (persist_directory) và tên collection
chroma_client = chromadb.PersistentClient(path='./chroma_db')
product_collection = chroma_client.get_or_create_collection("product_store")
chitchat_collection = chroma_client.get_or_create_collection("chitchat_store")

# Set up the ChromaVectorStore and StorageContext
product_store = ChromaVectorStore(
    persist_dir="./chromadb/chroma_db_product",
    collection_name="product_store",
    chroma_collection=product_collection,
)

chitchat_store = ChromaVectorStore(
    persist_dir="./chromadb/chroma_db_chitchat",
    collection_name="chitchat_store",
    chroma_collection=chitchat_collection,
)

product_storage_context = StorageContext.from_defaults(vector_store=product_store)
chitchat_storage_context = StorageContext.from_defaults(vector_store=chitchat_store)

# Tạo Index, sử dụng ChromaVectorStore
# (Ví dụ này, tất cả documents đều được index chung)
product_index = VectorStoreIndex.from_documents(
    documents,
    embed_model=embed_model,
    transformations=Settings.transformations,
    vector_store=product_store,
    storage_context=product_storage_context
)

from llama_index.core import Document
chitchat_docs = [
    Document(content="Hello, how are you today?"),
    Document(content="What do you think about the weather?"),
    Document(content="Hey, have you watched any good movies lately?"),
    Document(content="What's your favorite hobby?"),
    Document(content="How's your day going?"),
    # Thêm nhiều tài liệu chitchat...
]
chitchat_index = VectorStoreIndex.from_documents(
    chitchat_docs,
    embed_model=embed_model,
    transformations=Settings.transformations,
    vector_store=chitchat_store,
    storage_context=chitchat_storage_context
)

Some nodes are missing content, skipping them...


##### 5. Semantic Router

In [5]:
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
from llama_index.core.selectors import LLMSingleSelector
from llama_index.core.tools import QueryEngineTool
from llama_index.core.indices.query.query_transform import HyDEQueryTransform
from llama_index.core.query_engine import TransformQueryEngine
from IPython.display import display, Markdown


product_query_engine = product_index.as_query_engine()
chitchat_query_engine = chitchat_index.as_query_engine()

hyde = HyDEQueryTransform(include_original=True)
product_hyde_query = TransformQueryEngine(product_query_engine, hyde)
response = product_hyde_query.query("What causes Alstrom syndrome?")
display(Markdown(f"<b>{response}</b>"))


# list_tool = QueryEngineTool.from_defaults(
#     query_engine=product_index,
#     description="Useful for summarization questions related to the data source",
# )
# vector_tool = QueryEngineTool.from_defaults(
#     query_engine=chitchat_index,
#     description="Useful for retrieving specific context related to the data source",
# )

# # initialize router query engine (single selection, pydantic)
# query_engine = RouterQueryEngine(
#     selector=LLMSingleSelector.from_defaults(),
#     query_engine_tools=[
#         list_tool,
#         vector_tool,
#     ],
# )

# hyde_query_engine = TransformQueryEngine(
#     query_engine=query_engine,
#     query_transform=hyde,
# )
# hyde_query_engine.query("What causes Alstrom syndrome?")

<b>
The cause of Alstrom syndrome is a genetic mutation that affects the function of certain genes involved in the development and function of the body's organs. The mutation leads to the production of abnormal proteins that disrupt the normal function of the affected organs. The most common mutation is in the ALMS1 gene, which is responsible for the production of a protein called ALMS1. This protein is involved in the development and function of the eyes, ears, and kidneys. The mutation in the ALMS1 gene leads to the production of an abnormal protein that disrupts the normal function of these organs. The mutation can also affect other organs, such as the heart, liver, and spleen.</b>