In [1]:
pip install sentence-transformers



In [2]:
pip install --upgrade gradio



In [3]:
pip install scikit-learn



In [4]:
pip install openai



In [5]:
import gradio as gr
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import os
import openai


In [6]:
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
document_chunks = []
document_embeddings = None

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [7]:
try:
    if 'google.colab' in str(get_ipython()):
        from google.colab import userdata
        openai.api_key = userdata.get('OPENAI_API_KEY')
        if not openai.api_key:
            print("PERINGATAN: API Key OpenAI tidak ditemukan di Colab Secrets. Silakan atur.")
    else:
        openai.api_key = os.getenv("OPENAI_API_KEY")
        if not openai.api_key:
            print("PERINGATAN: Variabel lingkungan OPENAI_API_KEY tidak diatur.")
except Exception as e:
    print(f"Error saat mengatur API Key OpenAI: {e}")
    print("Pastikan Anda telah mengatur API Key OpenAI dengan benar.")

In [8]:
def process_document(uploaded_file):
    global document_chunks, document_embeddings
    document_chunks = []
    document_embeddings = None

    if uploaded_file is not None:
        try:
            file_path = uploaded_file.name
            with open(file_path, "r", encoding="utf-8") as f:
                text_content = f.read()

            chunk_size = 500
            overlap = 50
            current_pos = 0
            while current_pos < len(text_content):
                end_pos = min(current_pos + chunk_size, len(text_content))
                document_chunks.append(text_content[current_pos:end_pos])
                current_pos += chunk_size - overlap
                if current_pos >= len(text_content) and end_pos < len(text_content) and len(text_content[end_pos:]) > 0 : # menangkap sisa chunk terakhir jika ada
                     document_chunks.append(text_content[end_pos:])


            if not document_chunks:
                return "Dokumen kosong atau gagal diproses.", ""

            document_embeddings = embedding_model.encode(document_chunks)
            return f"Dokumen '{os.path.basename(file_path)}' berhasil diproses. Jumlah potongan teks: {len(document_chunks)}", ""
        except Exception as e:
            return f"Error memproses dokumen: {e}", ""
    return "Silakan unggah dokumen terlebih dahulu.", ""

def get_relevant_chunks(query_text, top_n=3):
    global document_chunks, document_embeddings
    if document_embeddings is None or not document_chunks:
        return []

    query_embedding = embedding_model.encode([query_text])
    similarities = cosine_similarity(query_embedding, document_embeddings)

    top_indices = np.argsort(similarities[0])[-top_n:][::-1]
    relevant_chunks_text = [document_chunks[i] for i in top_indices]
    return relevant_chunks_text

In [9]:
def ask_llm_openai(question, context_chunks):
    if not openai.api_key:
        return "Error: API Key OpenAI belum diatur atau tidak valid. Mohon periksa pengaturan Anda."

    context_str = "\n\n---\n\n".join(context_chunks)

    prompt = f"""
Anda adalah asisten AI yang bertugas menjawab pertanyaan berdasarkan potongan teks yang diberikan dari sebuah dokumen.
Gunakan HANYA informasi dari teks yang diberikan di bawah ini. Jangan menggunakan pengetahuan eksternal.
Jika informasi tidak ada dalam konteks, jawab dengan jujur "Saya tidak menemukan informasi tersebut dalam dokumen yang diberikan."

Konteks Dokumen:
---
{context_str}
---

Pertanyaan Pengguna: {question}

Jawaban Anda:
    """

    try:
        response = openai.chat.completions.create(
            model="gpt-4o",
            messages=[
                {"role": "system", "content": "Anda adalah asisten AI yang membantu menjawab pertanyaan berdasarkan dokumen yang diberikan. Gunakan hanya informasi dari dokumen."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.2
        )
        return response.choices[0].message.content.strip()
    except openai.APIConnectionError as e:
        return f"Error koneksi ke OpenAI: {e}"
    except openai.RateLimitError as e:
        return f"Error Rate Limit OpenAI: Anda mengirim terlalu banyak permintaan. Coba lagi nanti. {e}"
    except openai.AuthenticationError as e:
        return f"Error Autentikasi OpenAI: API Key tidak valid atau salah. {e}"
    except openai.APIError as e:
        return f"Error API OpenAI lainnya: {e}"
    except Exception as e:
        return f"Terjadi error tak terduga saat menghubungi LLM: {e}"

In [10]:
def chat_with_document(user_input, history):
    global document_chunks, document_embeddings

    if document_embeddings is None or not document_chunks:
        if not openai.api_key:
             return "Error: API Key OpenAI belum diatur. Mohon atur API Key dan unggah dokumen."
        return "Mohon unggah dan proses dokumen terlebih dahulu."

    relevant_chunks = get_relevant_chunks(user_input, top_n=5)

    if not relevant_chunks:
        return "Tidak ada bagian relevan yang ditemukan dalam dokumen untuk pertanyaan Anda."

    answer = ask_llm_openai(user_input, relevant_chunks)
    return answer

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("# Chatbot Analisis Dokumen")
    gr.Markdown("Unggah dokumen Anda (.txt), lalu ajukan pertanyaan mengenai isinya.")

    with gr.Row():
        file_upload = gr.File(label="Unggah Dokumen (.txt)")
        status_output = gr.Textbox(label="Status Pemrosesan Dokumen", interactive=False)

    chatbot_instance = gr.Chatbot(
        height=400,
        avatar_images=(None, "https://upload.wikimedia.org/wikipedia/commons/thumb/0/04/ChatGPT_logo.svg/1024px-ChatGPT_logo.svg.png"),
        type='messages'
    )

    chatbot_interface = gr.ChatInterface(
        fn=chat_with_document,
        title="Chat Analisis Dokumen",
        chatbot=chatbot_instance,
        textbox=gr.Textbox(placeholder="Ketik pertanyaanmu di sini...", container=False, scale=7)
    )

    file_upload.upload(fn=process_document, inputs=file_upload, outputs=[status_output, chatbot_interface.chatbot])


demo.launch(share=True)



Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://29395ec5186db2566a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [11]:
if 'OPENAI_API_KEY' in os.environ and 'google.colab' in str(get_ipython()):
     del os.environ['OPENAI_API_KEY']