<a href="https://colab.research.google.com/github/DikaWasHere/simpanan-koding/blob/master/copy_of_llm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install -U -q google-generativeai langchain langchain-google-genai langchain_community pypdf chromadb

In [None]:
from google.colab import userdata
from langchain_text_splitters import NLTKTextSplitter
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.vectorstores import Typesense
from IPython.display import Markdown as md

In [None]:
import nltk
nltk.download('punkt_tab')

##Download file dan Load (PDF)

In [None]:
!curl -o coba.pdf https://eprints.widyayuwana.ac.id/id/eprint/115/1/Adelina%20Damayanti%20%28Paper%20UTS%20Kewarganegaraan%29.pdf

In [None]:
from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader('coba.pdf')
pages = loader.load_and_split()

In [None]:
md(pages[1].page_content)

In [None]:
len(pages)

##LLM

In [None]:
api_key = userdata.get('chatbot')
chat_model= ChatGoogleGenerativeAI(google_api_key=api_key,
                                   model='gemini-2.5-flash',
                                   temperature=0.9)

##chunking 42:30

In [None]:
simple_docs = '''halo nama saya sardi irfansyah. saya lahir di jakarta. saya irfan. tinggal di jakarta'''
print('total karakter:',len(simple_docs))
text_splitter = NLTKTextSplitter(separator='\n\n',chunk_size=67,
                             chunk_overlap=10)

chunks= text_splitter.split_text(simple_docs)
# print(len(chunks))

for i, chunk in enumerate(chunks):
    print(f'panjang chunk {i+1}: {len(chunk)}')
    print(f'chunk {i+1}:')
    print(chunk)
    print('-'*50)

In [None]:
text_splitter= NLTKTextSplitter(chunk_size=500,
                             chunk_overlap=100)

chunks= text_splitter.split_documents(pages)
print(len(chunks))

In [None]:
chunks[3].page_content

##Embeding

In [None]:
embedding_model = GoogleGenerativeAIEmbeddings(google_api_key=api_key,model= "models/embedding-001")

In [None]:
embedding_model

##Vector Database

In [None]:
from langchain_community.embeddings import HuggingFaceEmbeddings

# Initialize a free embedding model from Hugging Face
# You can explore different models on the Hugging Face Hub
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

print("Hugging Face embedding model initialized.")

In [None]:
# Recreate the Chroma database with the Hugging Face embedding model
db_connection = Chroma.from_documents(chunks, embedding_model, persist_directory='./Chroma.db_')

In [None]:
retriever = db_connection.as_retriever(search_kwargs={"k": 14})
print(type(retriever))

In [None]:
check_response = retriever.invoke('jelaskan Undang-Undang Dasar mengenai Hak dan Kewajiban Warga Negara?')
md(check_response[0].page_content)

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

##Prompt

In [None]:
from langchain_core.messages import SystemMessage
from langchain_core.prompts import ChatPromptTemplate, HumanMessagePromptTemplate

In [None]:
chat_template = ChatPromptTemplate.from_messages([
    SystemMessage(content='Anda adalah AI pintar yang dapat menjawab pertanyaan sesuai konteks yang diberikan.'),
    HumanMessagePromptTemplate.from_template('''Jawab pertanyaan berikut berdasarkan konteks.
    konteks: {context}
    pertanyaan: {question}
    jawaban: ''')
])


In [None]:
from langchain_core.output_parsers import StrOutputParser
output_parser = StrOutputParser()

In [None]:
from langchain_core.runnables import RunnablePassthrough
rag_chain = (
    {
        "context": retriever | format_docs,
        "question": RunnablePassthrough()
    }
    | chat_template
    | chat_model
    | output_parser
)

In [None]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [None]:
formatted_docs = format_docs(chunks)
md(formatted_docs)

In [None]:
response = rag_chain.invoke("jelaskan Undang-Undang Dasar mengenai Hak dan Kewajiban Warga Negara?")
print(response)

In [None]:
response = rag_chain.invoke("apa itu Upaya Pewujudan Kewajiban sebagai Warga Negara?")
print(response)