In [1]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
import ollama
import logging
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferWindowMemory
from langchain_community.document_loaders import (
    PyPDFLoader,
    WebBaseLoader,
    UnstructuredExcelLoader,
    UnstructuredImageLoader,
    UnstructuredPowerPointLoader,
)

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:

def pdf_loader(file):
    try:
        return PyPDFLoader(file).load_and_split()
    except Exception as e:
        logging.error("An error occurred: %s", e, exc_info=True)


def ppt_loader(file):
    try:
        return UnstructuredPowerPointLoader(file).load_and_split()
    except Exception as e:
        logging.error("An error occurred: %s", e, exc_info=True)


def excel_loader(file):
    try:
        return UnstructuredExcelLoader(file).load_and_split()
    except Exception as e:
        logging.error("An error occurred: %s", e, exc_info=True)


def img_loader(file):
    try:
        return UnstructuredImageLoader(file).load()
    except Exception as e:
        logging.error("An error occurred: %s", e, exc_info=True)


def url_loader(url):
    try:
        return WebBaseLoader(url).load_and_split()
    except Exception as e:
        logging.error("An error occurred: %s", e, exc_info=True)


def chunks_embeddings(data):
    try:
        print(data)
        chunks = RecursiveCharacterTextSplitter(
            chunk_size=1000, chunk_overlap=200
        ).split_documents(data)
        embeddings = OllamaEmbeddings(model="nomic-embed-text")
        vector_store = Chroma.from_documents(documents=chunks, embedding=embeddings)
        return vector_store
    except Exception as e:
        logging.error("An error occured in chunk_embedding fn: %s", e, exc_info=True)


import logging
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferWindowMemory
import ollama

def ollama_llm(question, context):
    try:
        formatted_prompt = f"Question: {question}\n\nContext: {context}"
        memory = ConversationBufferWindowMemory(k=3)
        
        def get_response(prompt):
            response = ollama.chat(
                model="llama3",
                messages=[{"role": "user", "content": prompt}]
            )
            return response["message"]["content"]
        
        conversation = ConversationChain(
            llm=get_response, memory=memory, verbose=True
        )
        
        result = conversation.predict(input=formatted_prompt)
        
        return result
    except Exception as e:
        logging.error("An error occurred in ollama_llm function: %s", e, exc_info=True)


def combine_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)




In [3]:
def rag_chain(question, retriever):
    print("Rag_chain function called with question:", question)
    retrieved_docs = retriever.invoke(question)
    formatted_context = combine_docs(retrieved_docs)
    return ollama_llm(question, formatted_context)


file = pdf_loader('fizza.pdf')
print(file)
chunks = chunks_embeddings(file)
ret=chunks.as_retriever()
print("this is ret",ret)


rag = rag_chain(question="what i asked first",retriever=chunks.as_retriever())
print(rag)

[Document(metadata={'source': 'fizza.pdf', 'page': 0}, page_content='Electronically verified report. No signature required. Lab reports should be interpreted by a physician in correlation with clinical and radiologic findings.Department of HematologyReporting DateTime: 14-Jun-2024 11:50Collection DateTime: 14-Jun-2024 08:01\nBlood C/E (Complete, CBC)\nTest Reference Value Unit70003-14-06\n14-Jun-2024 08:01\nHb 11.5  -  14.5 g/dl 7.9\nTotal RBC 4  -  6 x10^12/l 3.4\nHCT 36  -  46 % 25\nMCV 75  -  95 fl 74\nMCH 26  -  32 pg 23\nRDW CV % 11.5  -  14.5 % 18.1\nMCHC 30  -  35 g/dl 32\nPlatelet Count 150  -  400 x10^9/l 218\nMPV fl 7  -  11 fl 10.9\nWBC Count (TLC) 4  -  12 x10^9/l 5.5\nNeutrophils 32.0  -  62.0 % 74.0\nLymphocytes 46.0  -  55.0 % 17.0\nMonocytes Less Than 5.0 % 8.0\nEosinophils Less Than 3.0 % 1.0\nAbs. Neutrophils 2.00  -  7.00 x10^9/l 4.08\nAbs. Lymphocytes 1.00  -  3.00 x10^9/l 0.88\nAbs. Monocytes 0.20  -  1.00 x10^9/l 0.46\nAbs. Eosinophils 0.02  -  0.50 x10^9/l 0.06\n

Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2
  warn_deprecated(
ERROR:root:An error occurred in ollama_llm function: 2 validation errors for ConversationChain
llm
  instance of Runnable expected (type=type_error.arbitrary_type; expected_arbitrary_type=Runnable)
llm
  instance of Runnable expected (type=type_error.arbitrary_type; expected_arbitrary_type=Runnable)
Traceback (most recent call last):
  File "/tmp/ipykernel_17588/3682013685.py", line 66, in ollama_llm
    conversation = ConversationChain(
                   ^^^^^^^^^^^^^^^^^^
  File "/workspace/.pyenv_mirror/user/current/lib/python3.12/site-packages/langchain_core/_api/deprecation.py", line 203, in warn_if_direct_instance
    return wrapped(self, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/workspace/.pyenv_mirror/user/current/lib/python3.12/site-packages/langchain_core/_api/deprecation.py", line 203, in warn_if_direct_instance
    return wrapped(

this is ret tags=['Chroma', 'OllamaEmbeddings'] vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x7f887e4ca2a0>
Rag_chain function called with question: what i asked first
None
