In [2]:
import os
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain.prompts import PromptTemplate

if not os.getenv("OPENAI_API_KEY"):
    raise ValueError("OPENAI_API_KEY tidak ditemukan. Pastikan sudah diatur di file .env.")

llm = ChatOpenAI(
    model="gpt-5-nano", 
    temperature=0.7 # Atur kreativitas model
)

  from .autonotebook import tqdm as notebook_tqdm
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.


In [4]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"source": "fish-pets-doc"},
    ),
    Document(
        page_content="Parrots are intelligent birds capable of mimicking human speech.",
        metadata={"source": "bird-pets-doc"},
    ),
    Document(
        page_content="Rabbits are social animals that need plenty of space to hop around.",
        metadata={"source": "mammal-pets-doc"},
    ),
]


In [5]:
from langchain_openai import OpenAIEmbeddings

embedding_model = OpenAIEmbeddings(
    model="text-embedding-3-small"
)

In [6]:
teks = 'Hallo nama saya farhan'

query_vector=embedding_model.embed_query(teks)

print(f"Dimensi Vektor (Panjang List): {len(query_vector)}")
print(f"Contoh Vektor (10 elemen pertama): {query_vector[:10]}")

Dimensi Vektor (Panjang List): 1536
Contoh Vektor (10 elemen pertama): [-0.037101149559020996, -0.04677020013332367, -0.04082001745700836, 0.021733487024903297, 0.01770836114883423, -0.05013905465602875, -0.06396448612213135, 0.022510074079036713, -0.034891705960035324, -0.027388349175453186]


In [11]:
from langchain_chroma import Chroma

vectorstore = Chroma.from_documents(documents, embedding= embedding_model)
vectorstore

<langchain_chroma.vectorstores.Chroma at 0x1b0c4b01810>

In [12]:
vectorstore.similarity_search("dog")

[Document(id='26995114-e8a3-4016-85a2-d0f800230714', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(id='e29e571b-f27e-45d9-84d4-f8b70e697bef', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(id='a6859816-adc0-4073-a2c8-512a7b0e5a58', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(id='6af95bc8-c26c-49c0-aa8e-14aaba56d52e', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')]

In [13]:
# Async query
await vectorstore.asimilarity_search("dog")

[Document(id='26995114-e8a3-4016-85a2-d0f800230714', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(id='e29e571b-f27e-45d9-84d4-f8b70e697bef', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(id='a6859816-adc0-4073-a2c8-512a7b0e5a58', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(id='6af95bc8-c26c-49c0-aa8e-14aaba56d52e', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')]

In [16]:
vectorstore.similarity_search_with_score("cat")

[(Document(id='a6859816-adc0-4073-a2c8-512a7b0e5a58', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
  1.2405281066894531),
 (Document(id='6af95bc8-c26c-49c0-aa8e-14aaba56d52e', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
  1.2405281066894531),
 (Document(id='26995114-e8a3-4016-85a2-d0f800230714', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
  1.550061821937561),
 (Document(id='e29e571b-f27e-45d9-84d4-f8b70e697bef', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
  1.550061821937561)]

In [17]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(vectorstore.similarity_search).bind(k=1)
retriever.batch(["cat", "dog"])


[[Document(id='6af95bc8-c26c-49c0-aa8e-14aaba56d52e', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')],
 [Document(id='e29e571b-f27e-45d9-84d4-f8b70e697bef', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.')]]

In [19]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k":1}
)

retriever.batch(["cat", "Parrot"])

[[Document(id='6af95bc8-c26c-49c0-aa8e-14aaba56d52e', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')],
 [Document(id='683ac6bb-0a62-43b0-ad13-4684a4d2f77a', metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.')]]

In [22]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message= """
Jawab pertanyaan ini berdasarkan hanya dari context

{question}

Context: {context}
"""

prompt = ChatPromptTemplate.from_messages(
    [
        "human", message
    ]
)

rag_chain = {"context":retriever, "question": RunnablePassthrough()} | prompt | llm

response = rag_chain.invoke(
    "beri tahu aku tentang Dog"
)

response

AIMessage(content='Anjing adalah teman yang hebat, dikenal karena kesetiaan dan keramahan.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 410, 'prompt_tokens': 86, 'total_tokens': 496, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 384, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-5-nano-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-CVudh1arKlIPWtKSYvJ67PcmswKIF', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--042d5f08-7ee3-497f-b058-10922137edc3-0', usage_metadata={'input_tokens': 86, 'output_tokens': 410, 'total_tokens': 496, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 384}})

In [23]:
response.content

'Anjing adalah teman yang hebat, dikenal karena kesetiaan dan keramahan.'

In [24]:
rag_chain.invoke("beri tahu aku tentang ikan")

AIMessage(content='Menurut konteks, ikan yang dibahas adalah goldfish. Goldfish adalah hewan peliharaan yang populer untuk pemula dan perawatannya relatif sederhana.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 425, 'prompt_tokens': 83, 'total_tokens': 508, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 384, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-5-nano-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-CVueRxqx7u5gTt6T4b5UIkiPnlPzK', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='run--c46e175b-7161-4550-96c4-baac8d2dd26e-0', usage_metadata={'input_tokens': 83, 'output_tokens': 425, 'total_tokens': 508, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 384}})

In [25]:
try:
    semua_isi = vectorstore._collection.get(
        limit=5, 
        include=["metadatas", "documents"] # Minta Chroma menyertakan metadata dan teks
    )

    print("\n--- ISI CHROMA (5 DATA PERTAMA) ---")
    
    for i, doc in enumerate(semua_isi['documents']):
        metadata = semua_isi['metadatas'][i]
        
        print(f"\n[{i+1}] Teks Asli:")
        print(f"  {doc[:100]}...") # Tampilkan 100 karakter pertama
        
        print(f"Metadata:")
        print(f"  {metadata}")

except Exception as e:
    print(f"Error saat mengambil data dari Chroma: {e}")
    print("Pastikan objek 'db' Anda adalah instance dari Chroma yang sudah terisi.")


--- ISI CHROMA (5 DATA PERTAMA) ---

[1] Teks Asli:
  Dogs are great companions, known for their loyalty and friendliness....
Metadata:
  {'source': 'mammal-pets-doc'}

[2] Teks Asli:
  Cats are independent pets that often enjoy their own space....
Metadata:
  {'source': 'mammal-pets-doc'}

[3] Teks Asli:
  Goldfish are popular pets for beginners, requiring relatively simple care....
Metadata:
  {'source': 'fish-pets-doc'}

[4] Teks Asli:
  Parrots are intelligent birds capable of mimicking human speech....
Metadata:
  {'source': 'bird-pets-doc'}

[5] Teks Asli:
  Rabbits are social animals that need plenty of space to hop around....
Metadata:
  {'source': 'mammal-pets-doc'}
