# Preparations:
initialize environment and install dependencies.
```bash
    python -m venv .venv
    .venv\Scripts\activate
    pip install chromadb sentence-transformers transformers gradio
    pip install ipykernel
    python -m ipykernel install --user --name=.venv --display-name "Python (.venv)"
```

if there is problem, try:
```bash
    pip install --upgrade jupyter ipywidgets
```


# Code:

In [None]:
import chromadb
from chromadb.utils import embedding_functions
from transformers import pipeline
import gradio as gr

In [None]:
# 1. Inisialisasi ChromaDB
chroma_client = chromadb.PersistentClient(path="./chroma_db")
collection = chroma_client.get_or_create_collection(name="faq")

In [None]:
# 2. Model untuk embedding (all-MiniLM-L6-v2)
embedding_func = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [None]:
# 3. Model LLM untuk Generasi Jawaban (Flan-T5 Base)
generator = pipeline(
    "text2text-generation", 
    model="google/flan-t5-base",
    device=-1,  # -1 untuk CPU, 0 untuk GPU pertama
    framework="pt",  # Gunakan PyTorch (bisa "tf" untuk TensorFlow)
    model_kwargs={"torch_dtype": "auto"}  # Optimasi tipe data otomatis
)

In [None]:
# 4. Dataset Kecil (FAQ Chatbot)
dataset = [
    {"question": "Apa itu AI?", "answer": "AI adalah kecerdasan buatan yang memungkinkan komputer melakukan tugas seperti manusia."},
    {"question": "Apa itu Machine Learning?", "answer": "Machine Learning adalah cabang AI yang memungkinkan komputer belajar dari data."},
    {"question": "Apa itu Chatbot?", "answer": "Chatbot adalah program yang dapat berinteraksi dengan manusia menggunakan bahasa alami."}
]

In [None]:
collection.delete(ids=[str(i) for i in range(len(dataset))])  # Hapus data lama berdasarkan ID

In [None]:
# 5. Simpan Dataset ke ChromaDB
for i, data in enumerate(dataset):
    embedding = embedding_func([data["question"]])[0]
    collection.add(ids=[str(i)], documents=[data["answer"]], embeddings=[embedding])

In [None]:
print("Stored Data:", collection.get())
print('\n')
print("Stored IDs:", collection.get()["ids"])

In [None]:
# 6. Fungsi Retrieve & Generate Jawaban
def retrieve_answer(query):
    query_embedding = embedding_func([query])[0]
    results = collection.query(query_embeddings=[query_embedding], n_results=1)
    print(f"Distances: {results["distances"][0][0]}")  # Debugging
    if not results["documents"] or results["distances"][0][0] > 0.4:  # Threshold 0.8 (atur sesuai kebutuhan)
        return None  # Jika tidak ada dokumen relevan
    return results["documents"][0][0]

def chat(query):
    context = retrieve_answer(query)
    print(f"Context: {context}")  # Debugging
    if context is None:
        return "Saya tidak bisa menjawabnya."  # Respon default jika tidak ada jawaban di database
    prompt = f"Context: {context}\nQuestion: {query}\nAnswer:"
    print(f"Prompt: {prompt}")  # Debugging
    response = generator(prompt, max_length=100)[0]["generated_text"]
    return response

In [None]:
# 7. Buat UI Chatbot dengan Gradio
demo = gr.Interface(fn=chat, inputs="text", outputs="text", title="FAQ Chatbot")

demo.launch()