### Required Libraries

In [1]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.llms import Ollama
from langchain.prompts import PromptTemplate
from operator import itemgetter
from langchain.chains import ConversationChain
from langchain.memory import ConversationBufferMemory
from langchain_community.document_loaders import DirectoryLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders.pdf import PyMuPDFLoader

### Defining the models

In [2]:
Model = "llama3"; 
Embedding_Model = "mxbai-embed-large"
model = Ollama(model=Model)

### Loading data and splitting into chunks

In [3]:
loader = DirectoryLoader(path="testDataMD",glob="*.txt")
documents = loader.load()

text_splitter = CharacterTextSplitter(separator="##", chunk_size=1000, chunk_overlap=0)
docs2 = text_splitter.split_documents(documents)

Created a chunk of size 1161, which is longer than the specified 1000
Created a chunk of size 2033, which is longer than the specified 1000
Created a chunk of size 2603, which is longer than the specified 1000
Created a chunk of size 1782, which is longer than the specified 1000
Created a chunk of size 5099, which is longer than the specified 1000
Created a chunk of size 2110, which is longer than the specified 1000
Created a chunk of size 1398, which is longer than the specified 1000
Created a chunk of size 3672, which is longer than the specified 1000
Created a chunk of size 1463, which is longer than the specified 1000
Created a chunk of size 1654, which is longer than the specified 1000
Created a chunk of size 2628, which is longer than the specified 1000
Created a chunk of size 1706, which is longer than the specified 1000
Created a chunk of size 1773, which is longer than the specified 1000
Created a chunk of size 2232, which is longer than the specified 1000
Created a chunk of s

### Creating the vectorstore

In [4]:
vectors = FAISS.from_documents(docs2, OllamaEmbeddings(model = Model))
retriever2 = vectors.as_retriever()

### Defining the RAG Model

In [5]:
# A function to combine retrieved documents into a string 

def combineDocs(docs):
    context = "\n\n".join(f'Document [ Metadata : \n{doc.metadata} \nPage Content : \n{doc.page_content} ]' for doc in docs)
    return context

#### Method 1 (Complicated internet answer)

In [6]:
def create_model(vectorDB):
    
    # This is where the conversation will be stored
    memory = ConversationBufferMemory()

    # Creating a conversation model for ConvChain

    def rag_retrieval(question, memory):

        # Retrieve docs from vector DB
        docs = vectorDB.similarity_search(question, k = 4)
        context = combineDocs(docs)

        conversation = memory.load_memory_variables({}).get('history', '')

        # Add context to the prompt
        prompt = f'Answer the question based on the context below and the previous conversations. If you cannot\nanswer the question, reply "Oof that\'s a tough one, i don\'t really know this"\n\nPrevious Conversation : \n{conversation}\n\nContext : \n{context}\n\nQuestion from user: {question}'

        response = model.invoke(prompt)

        memory.add_user_message(question)
        memory.add_bot_message(response)

        return response
    
    chain = ConversationChain(memory = memory, conversation_model = rag_retrieval)

    return chain

#### Method 2 (Self)

In [53]:
chatHistory = "Conversation So Far : \n"

template = """
The following is a friendly conversation between a human and an AI. 
The AI is talkative and provides lots of specific details from it's knowledge base or from the previous conversation depending on what the question is. 
If the AI does not know the answer to a question, it truthfully says it does not know.


Conversation History : \n{conversation}

AI's Knowledge base : {context}

Human's Question : {question}

AI : 

"""

prompt = PromptTemplate.from_template(template)

def chat(question): 
    
    global chatHistory
    
    # Retrieve docs from vector DB
    docs = vectors.similarity_search(question, k = 4)
    contextString = combineDocs(docs)
    
    query = prompt.format(conversation = chatHistory, context = contextString, question = question)

    response = model.invoke(query)
    
    chatHistory = chatHistory + "\nHuman's question : " + question + "\nAI : " + response

    return response
