In [23]:
from langchain_community.document_loaders import WebBaseLoader
import os
from dotenv import load_dotenv

In [24]:
load_dotenv()

True

In [25]:
os.environ["LANGCHAIN_TRACING_V2"] = os.getenv("LANGCHAIN_TRACING_V2")
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

In [26]:
loader = WebBaseLoader("https://magazine.sebastianraschka.com/p/practical-tips-for-finetuning-llms")

In [27]:
docs = loader.load()

In [28]:
len(docs[0].page_content)

23213

In [29]:
docs[0].page_content = docs[0].page_content.strip()

In [30]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [31]:
splitter = RecursiveCharacterTextSplitter(chunk_size=2048, chunk_overlap=64)

In [32]:
documents = splitter.split_documents(docs)

In [33]:
from langchain_ollama import OllamaEmbeddings, ChatOllama

In [34]:
embeddings = OllamaEmbeddings(model="mxbai-embed-large")
llm = ChatOllama(model="llama3.1")

In [35]:
from langchain_chroma import Chroma

In [36]:
vector_store_db = Chroma.from_documents(documents=documents, embedding=embeddings)

In [74]:
retriver = vector_store_db.as_retriever(k=1)

#### create_stuff_documents_chain helper function to "stuff" all of the input documents into the prompt. It will also handle formatting the docs as strings.

In [45]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

In [90]:
prompt = ChatPromptTemplate.from_template(
    """
Answer the user input question based only on the provided contex. Make sure to keep the answer concise.

<question>
{input}
</question>

<context>
{context}
</context>
"""
)

In [91]:
document_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)

In [92]:
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), config={'run_name': 'format_inputs'})
| ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='\nAnswer the user input question based only on the provided contex. Make sure to keep the answer concise.\n\n<question>\n{input}\n</question>\n\n<context>\n{context}\n</context>\n'))])
| ChatOllama(model='llama3.1')
| StrOutputParser(), config={'run_name': 'stuff_documents_chain'})

In [62]:
from langchain.chains import create_retrieval_chain

In [93]:
create_retrieval_chain(retriver,document_chain)

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x10b9abf10>), config={'run_name': 'retrieve_documents'})
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), config={'run_name': 'format_inputs'})
            | ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='\nAnswer the user input question based only on the provided contex. Make sure to keep the answer concise.\n\n<question>\n{input}\n</question>\n\n<context>\n{context}\n</context>\n'))])
            | ChatOllama(model='llama3.1')
            | StrOutputParser(), config={'run_name': 'stuff_documents_chain

In [94]:
retrival_chain = create_retrieval_chain(retriver,document_chain)

In [96]:
response = retrival_chain.invoke(
    {
        "input":"Does LoRA Need to Be Enabled for All Layers ?"
    }
)

In [97]:
response['answer']

'No. LoRA needs to be enabled across all layers to maximize model performance.'

<img src="/Users/I544234/Documents/GitHub-repos/LLM-Experiments/ollama_langsmith/Screenshot 2024-08-16 at 2.10.27 PM.png">