In [22]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings


In [23]:
import ollama

In [24]:
# Load data
loader = TextLoader("/Users/rianrachmanto/miniforge3/project/RAG_Drill_Report/data/well_remark.txt")
docs = loader.load()

In [25]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter()
documents = text_splitter.split_documents(docs)


In [26]:
from langchain_community.embeddings import OllamaEmbeddings

In [27]:
from langchain_community.vectorstores import FAISS

In [28]:
db = FAISS.from_documents(documents, OllamaEmbeddings(model="bge-m3",show_progress=True))



OllamaEmbeddings: 100%|██████████| 1121/1121 [00:59<00:00, 18.79it/s]


In [29]:
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})

In [30]:
from langchain_community.chat_models import ChatOllama
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [31]:
# Set up the local model:
local_model = "llama3.2"
llm = ChatOllama(model=local_model, num_predict=400,
                 stop=["<|start_header_id|>", "<|end_header_id|>", "<|eot_id|>"])

# Set up the RAG chain:
prompt_template = """
<|start_header_id|>user<|end_header_id|>
Answer the following question based only on the provided context".
Question: {question}
Context: {context}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Querying the LLM (oviously to test here you must ask a relevant question of your data)
question = "List date of running tools that encountered an obstruction"
#List date of running tools that encountered an obstruction, and also please include if there is any sand encountered during running
print(question)
print(rag_chain.invoke(question))

List date of running tools that encountered an obstruction


OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 25.53it/s]


Based on the provided context, the following dates correspond to running tools that encountered an obstruction:

1. 01-Jun-2002 15:30 - Found plugged (pipe rusty) inside 1/4" orifice at 1.187" Flow Release External P/T.
   The tool was run in multiple times before finding the plug.

2. 01-Jun-2002 18:30 - Plugged (pipe rusty) inside 1/4" orifice at 1.187" Flow Release External P/T.
   Again, the issue was found after running the tool multiple times.

3. 10-Mar-2000 10:00 - Tool could not move due to high pressure and stuck in place at a depth of 7150'.
   The attempt to recover the tool was unsuccessful.

4. 30-Dec-2012 4:00 - High inclination, RMT tools couldn't pass through at a depth of 3100'.
   After adding a roller to the toolstring, it was successfully run in again.
