In [1]:
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings


In [2]:
import ollama

In [3]:
# Load data
loader = TextLoader("/Users/rianrachmanto/miniforge3/project/RAG_Drill_Report/data/remark.txt")
docs = loader.load()

In [4]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=2048, chunk_overlap=30)
documents = text_splitter.split_documents(docs)


In [5]:
from langchain_community.embeddings import OllamaEmbeddings

In [6]:
from langchain_community.vectorstores import FAISS

In [7]:
db = FAISS.from_documents(documents, OllamaEmbeddings(model="nomic-embed-text",show_progress=True))
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 4})


OllamaEmbeddings: 100%|██████████| 129/129 [00:17<00:00,  7.41it/s]


In [8]:
from langchain_community.chat_models import ChatOllama
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [9]:
# Set up the local model:
local_model = "llama3.1"
llm = ChatOllama(model=local_model, num_predict=400,
                 stop=["<|start_header_id|>", "<|end_header_id|>", "<|eot_id|>"])

# Set up the RAG chain:
prompt_template = """
<|start_header_id|>user<|end_header_id|>
You are a drilling engineer and an analyst. Your task is reading the drilling report,
withing the context of the drilling report, answer the question based on the context of the document.
All you need to answer what you can find in the document. If you can't find the answer, you can say "I don't know".
Question: {question}
Context: {context}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# Querying the LLM (oviously to test here you must ask a relevant question of your data)
question = "List all the problem during drilling?"
print(question)
print(rag_chain.invoke(question))

List all the problem during drilling?


OllamaEmbeddings: 100%|██████████| 1/1 [00:00<00:00, 40.34it/s]


Based on the drilling report, the following problems were encountered during drilling:

1. Continued slip and cut.
2. Troubleshoot issue with stuck lock pin on dead line anchor.
3. Slip and cut drill line.
4. Hung off block.
5. Trouble shot communication to tractor (Production Electronics Cartridge)
6. Trouble shot communication to tractor (Production Electronics Cartridge)
7. High current observed on tractor during RIH from 4400m to 4570m.

Note that these problems were either reported or encountered during the drilling operation, and may not be an exhaustive list of all potential issues.
