Importing the dependencies

In [2]:
import os
import warnings
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
from langchain_ollama import OllamaEmbeddings
import faiss
from langchain_community.vectorstores import FAISS 
from langchain_community.docstore.in_memory import InMemoryDocstore

In [4]:
from langchain_ollama import ChatOllama 
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough 
from langchain_core.prompts import ChatPromptTemplate

from langchain import hub

IMPORTING LOCAL LLM

In [5]:
llm = ChatOllama(model='llama3.2:3b', base_url='http://localhost:11434')
llm1 = ChatOllama(model='llama3.2:3b', base_url='http://localhost:11434')
llm2 = ChatOllama(model='deepseek-r1:1.5b', base_url='http://localhost:11434')

EMBEDDING

In [7]:
embeddings = OllamaEmbeddings(model='nomic-embed-text', base_url='http://localhost:11434')

# db_name = r"D:\NLP\LLM\Langchain and Ollama\09. Vector Stores and Retrievals\health_supplements"
# vector_store = FAISS.load_local(db_name, embeddings, allow_dangerous_deserialization=True)

Chain Routing between two LLMS differentiating between Reasoning and Questioning Prompt

In [24]:
prompt = """Given the input below, classify it as either `Reason` if it involves reasoning, explanations, justifications, opinions, or causal statements,
            or `Question` if it is a direct inquiry seeking factual or specific information.
            
            If the input contains a mix of reasoning and a question, classify it as `Reason`.

            Input: {user_input}
            Classification:"""


template = ChatPromptTemplate.from_template(prompt)

chain = template | llm | StrOutputParser()

In [10]:
reason_prompt = """
                You are an expert in analyzing and responding to reasoning-based inputs.
                Your task is to acknowledge the reasoning, provide a concise and relevant response, 
                and encourage further discussion if needed.
                             
                Input: {user_input}
                Answer:"""

reason_template = ChatPromptTemplate.from_template(reason_prompt)
reason_chain =  reason_template | llm2 | StrOutputParser()

In [11]:
question_prompt = """
                You are an expert in answering direct questions concisely and accurately.
                Your task is to provide a clear, well-structured, and informative response to the user's question.
                
                Question: {user_input}
                Answer:"""

question_template = ChatPromptTemplate.from_template(question_prompt)
question_chain = question_template | llm1 | StrOutputParser()

In [54]:
def rout(info):
    question_type = info['question_type'].strip().lower()
    if question_type == "reason":
        print("Routing to: Reason Chain (Using Deepseek R1)")  # Debug print
        return reason_chain
    else:
        print("Routing to: Question Chain (Using Llama3.2)")  # Debug print
        return question_chain

In [13]:
from langchain_core.runnables import RunnableLambda

In [19]:
# full_chain = {"question_type": chain, 'user_input': lambda x: x['user_input']} | RunnableLambda(rout)

In [20]:
# full_chain

{
  question_type: ChatPromptTemplate(input_variables=['user_input'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['user_input'], input_types={}, partial_variables={}, template='Given the input below, classify it as either `Reason` if it involves reasoning, explanations, or justifications, \n            or `Question` if it is a direct question seeking information.\n            \n            Input: {user_input}\n            Classification:'), additional_kwargs={})])
                 | ChatOllama(model='llama3.2:3b', base_url='http://localhost:11434')
                 | StrOutputParser(),
  user_input: RunnableLambda(lambda x: x['user_input'])
}
| RunnableLambda(rout)

In [23]:
# user_input= "Cricket is a popular sport because it requires teamwork, strategy, and skill. Do you agree?"  

# output = full_chain.invoke({'user_input': user_input})
# print(output)

Routing to: Question Chain (Using llm1)
Yes, I agree. Cricket's popularity can be attributed to its unique combination of individual skill, tactical complexity, and reliance on effective team coordination. The intricacies of the game, including the various playing formats (e.g., Test matches, One-Day Internationals, Twenty20), make it appealing to players and spectators alike due to the need for a balance between physical prowess and mental acuity.


Making Vector Base

Making Chunks of the wikipedia_context data and using RecursiveCharacterTextSplitter

In [38]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document

# Load Wikipedia text from file
with open("/Users/abhishek/Desktop/ML/GenAI/wikipedia_content.txt", "r", encoding="utf-8") as file:
    wikipedia_text = file.read()

# Wrap text in a Document object
docs = [Document(page_content=wikipedia_text)]

# Initialize text splitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=700, chunk_overlap=200)

# Split the document into chunks
chunks = text_splitter.split_documents(docs)


In [39]:
len(chunks)

71

In [32]:
vector = embeddings.embed_query("Hello World")

In [40]:
len(vector)
index = faiss.IndexFlatL2(len(vector))
index.ntotal, index.d

(0, 768)

INDEXING OF THE SCRAPPED DATA INTO CHUNKS

In [41]:
vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [42]:
vector_store.index.ntotal, vector_store.index.d

(0, 768)

In [43]:
ids = vector_store.add_documents(documents=chunks)

In [44]:
len(ids), vector_store.index.ntotal

(71, 71)

In [45]:
### Retreival
question = "What is a transformer?"
docs = vector_store.search(query=question, k=5, search_type="similarity")

In [46]:
docs

[Document(id='fb988468-ee5f-4a92-93f4-412e4d0d0c8a', metadata={}, page_content='In 2017, the Transformer network enabled advancements in generative models compared to older Long-Short Term Memory models,[38] leading to the first generative pre-trained transformer (GPT), known as GPT-1, in 2018.[39] This was followed in 2019 by GPT-2 which demonstrated the ability to generalize unsupervised to many different tasks as a Foundation model.[40]'),
 Document(id='5eac05f2-73e6-4939-a164-79f88aefc1f3', metadata={}, page_content='Improvements in transformer-based deep neural networks, particularly large language models (LLMs), enabled an AI boom of generative AI systems in the 2020s. These include chatbots such as ChatGPT, Copilot, Gemini, and LLaMA; text-to-image artificial intelligence image generation systems such as Stable Diffusion, Midjourney, and DALL-E; and text-to-video AI generators such as Sora.[9][10][11][12] Companies such as OpenAI, Anthropic, Microsoft, Google, and Baidu as well 

In [47]:
def retrieve(question):
    retrieved_docs = vector_store.search(query=question, k=5, search_type="similarity")
    return "\n\n".join([doc.page_content for doc in retrieved_docs]) 

In [55]:
# ====== Create Full RAG Chain ======
full_chain = {
    "question_type": chain,
    "context": lambda x: retrieve(x['user_input']),  # Retrieve relevant documents
    "user_input": lambda x: x['user_input']
} | RunnableLambda(rout)

In [56]:
user_input = "Explain why transformers are better than RNNs."
output = full_chain.invoke({'user_input': user_input})

print(output)

Routing to: Question Chain (Using Llama3.2)
Transformers have several advantages over Recurrent Neural Networks (RNNs):

1. **Parallelization**: Transformers can process sequences in parallel, leveraging multiple GPU cores or even entire machines to speed up inference time. In contrast, RNNs are inherently sequential and require more computational resources for longer sequences.

2. **Long-range dependencies**: Transformers can capture long-range dependencies more effectively than RNNs due to their attention mechanism, which allows the model to weigh the importance of different parts of the input sequence relative to each other.

3. **Scalability**: Transformers can handle larger sequence lengths and more complex data sets than RNNs, making them a popular choice for natural language processing tasks like machine translation and text classification.

4. **Efficiency in terms of training time**: While transformers are computationally expensive during training due to the need for large me