In [1]:
pip install langchain

Collecting langchain
  Obtaining dependency information for langchain from https://files.pythonhosted.org/packages/72/e1/5b6da1b7feba08791e6351c323b0a6f6f4c0d553bc2fd9a9d8c4872373df/langchain-0.0.350-py3-none-any.whl.metadata
  Downloading langchain-0.0.350-py3-none-any.whl.metadata (13 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Obtaining dependency information for jsonpatch<2.0,>=1.33 from https://files.pythonhosted.org/packages/73/07/02e16ed01e04a374e644b575638ec7987ae846d25ad97bcc9945a3ee4b0e/jsonpatch-1.33-py2.py3-none-any.whl.metadata
  Downloading jsonpatch-1.33-py2.py3-none-any.whl.metadata (3.0 kB)
Collecting langchain-community<0.1,>=0.0.2 (from langchain)
  Obtaining dependency information for langchain-community<0.1,>=0.0.2 from https://files.pythonhosted.org/packages/f2/46/060f2f56d03a4db099ec9213e58ec971aec5d6922e5e16e8f29e840c1505/langchain_community-0.0.3-py3-none-any.whl.metadata
  Downloading langchain_community-0.0.3-py3-none-any.whl.metadata (7.0 kB)
Coll

In [2]:
pip install faiss-gpu

Collecting faiss-gpu
  Downloading faiss_gpu-1.7.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (85.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: faiss-gpu
Successfully installed faiss-gpu-1.7.2
Note: you may need to restart the kernel to use updated packages.


In [3]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader("/kaggle/input/rag-data/Kevin Gurney - An introduction to neural networks-UCL Press (1997).pdf")
pages = loader.load_and_split()

In [4]:
# Define our text splitter
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=64, separator=".")
docs = text_splitter.split_documents(pages)

In [5]:
import os

os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

In [6]:
from langchain.embeddings import HuggingFaceInferenceAPIEmbeddings

embeddings = HuggingFaceInferenceAPIEmbeddings(
    api_key=os.environ["HUGGINGFACEHUB_API_TOKEN"], model_name="sentence-transformers/all-MiniLM-l6-v2"
)

### FAISS vector database used.
### for deployment case; use other database such as Cromadb, Pinecone, ElasticSearch ...

In [7]:
from langchain.vectorstores import FAISS

faiss_index = FAISS.from_documents(docs, embeddings)

r_docs = faiss_index.similarity_search("What is nepse?", k=5)

r_docs

### No system level prompt required for Mistral

In [8]:
from langchain.prompts import ChatPromptTemplate

chat_template = ChatPromptTemplate.from_messages(
    [
        ("human", "You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, say this exact sentence: The question you are asking does not fall into our Knowledge base. Use about 200 words and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"),
    ]
)

In [9]:
chat_template

ChatPromptTemplate(input_variables=['context', 'question'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, say this exact sentence: The question you are asking does not fall into our Knowledge base. Use about 200 words and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"))])

### Using huggingfacehub computation for inference
### Use llm locally for relability

#### Below, the text generation seems to stop at mid, such cases are not seen when using llm locally.

In [10]:
from langchain.llms import HuggingFaceHub, HuggingFacePipeline

repo_id = 'mistralai/Mistral-7B-Instruct-v0.1'

llm = HuggingFaceHub(
    repo_id=repo_id, 
    model_kwargs={"temperature": 0.5, "max_length": 1024}
)



### Memory used to save previous conversations

In [11]:
from langchain.memory import ConversationSummaryMemory

memory = ConversationSummaryMemory(
    llm=llm, memory_key="chat_history", return_messages=True
)

### Pipeline chain for RAG
### Integrating llm, vector database, prompt, and memory

In [12]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=faiss_index.as_retriever(search_kwargs={"k": 3}),
    chain_type_kwargs={"prompt": chat_template},
    memory=memory
)

In [13]:
question = "What is a Neural Network and how does it work?"
qa_chain({"query": question})

{'query': 'What is a Neural Network and how does it work?',
 'chat_history': [SystemMessage(content='')],
 'result': ' A neural network is an interconnected assembly of simple processing elements, known as nodes, whose functionality is loosely based on the animal neuron. Neural networks are often used for statistical analysis and data modeling, in which their role is perceived as an alternative to standard nonlinear regression or cluster analysis techniques. They are typically used in problems that may be couched in terms of classification or forecasting, such as image and speech recognition.'}

### Checking memory

In [14]:
question = "Tell me more in detail"
qa_chain({"query": question})

{'query': 'Tell me more in detail',
 'chat_history': [SystemMessage(content='\nThe human asks what a Neural Network is and how it works. The AI explains that it is an interconnected assembly of simple processing elements, known as nodes, whose functionality is loosely based on the animal neuron. Neural networks are often used for statistical analysis and data modeling, in which their role is perceived as an alternative to standard nonlinear regression or cluster analysis techniques. They are typically used in problems that may be couched in terms of classification or forecasting, such')],
 'result': ' The von Neumann machine is a model of modern, general-purpose computers that describes their operation in a way that highlights their relation with the symbolic paradigm. It consists of a series of cycles that involve fetching instructions and data from memory, executing the instruction, and storing the result back in memory. This model was initially developed to understand the nature of 

In [15]:
question = "What are the different Neural Network architectures"
qa_chain({"query": question})

{'query': 'What are the different Neural Network architectures',
 'chat_history': [SystemMessage(content='\nThe human asks what a Neural Network is and how it works. The AI explains that it is an interconnected assembly of simple processing elements, known as nodes, whose functionality is loosely based on the animal neuron. Neural networks are often used for statistical analysis and data modeling, in which their role is perceived as an alternative to standard nonlinear regression or cluster analysis techniques. They are typically used in problems that may be couched in terms of classification or forecasting, such')],
 'result': ' Neural network architectures can be classified into three main types: recurrent, feedforward, and competitive. If a network is not one of these three types, it can be broken down into components of these forms. Recurrent networks have hidden units and may have reciprocal connections that are symmetric. The connectivity in recurrent networks is node to node, wh

In [16]:
question = "What is a Convolution?"
qa_chain({"query": question})

{'query': 'What is a Convolution?',
 'chat_history': [SystemMessage(content='\nThe human asks what a Neural Network is and how it works. The AI explains that it is an interconnected assembly of simple processing elements, known as nodes, whose functionality is loosely based on the animal neuron. Neural networks are often used for statistical analysis and data modeling, in which their role is perceived as an alternative to standard nonlinear regression or cluster analysis techniques. They are typically used in problems that may be couched in terms of classification or forecasting, such')],
 'result': ' A Convolution is a mathematical operation used in neural networks that models the basic "integrate-and-fire" mechanism of real neurons. It performs a weighted sum of its inputs and outputs a "1" if this sum exceeds a threshold, and a "0" otherwise. The Convolution can be thought of as a special case of the Perceptron, with a trivial set of A-units, each consisting of a single direct conne

In [17]:
question = "How is convolution neural network different from general Neural Networks?"
qa_chain({"query": question})

{'query': 'How is convolution neural network different from general Neural Networks?',
 'chat_history': [SystemMessage(content='\nThe human asks what a Neural Network is and how it works. The AI explains that it is an interconnected assembly of simple processing elements, known as nodes, whose functionality is loosely based on the animal neuron. Neural networks are often used for statistical analysis and data modeling, in which their role is perceived as an alternative to standard nonlinear regression or cluster analysis techniques. They are typically used in problems that may be couched in terms of classification or forecasting, such')],
 'result': ' A convolution neural network (CNN) is a specific type of neural network that is designed to process data with a grid-like topology, such as images. Unlike general neural networks, CNNs have a specialized architecture that includes convolutional layers, pooling layers, and fully connected layers. Convolutional layers use filters to extract

### Out of Context Data

In [18]:
question = "Tell me about Elon Musk's contribution in Neural Networks"
qa_chain({"query": question})

{'query': "Tell me about Elon Musk's contribution in Neural Networks",
 'chat_history': [SystemMessage(content='\nThe human asks what a Neural Network is and how it works. The AI explains that it is an interconnected assembly of simple processing elements, known as nodes, whose functionality is loosely based on the animal neuron. Neural networks are often used for statistical analysis and data modeling, in which their role is perceived as an alternative to standard nonlinear regression or cluster analysis techniques. They are typically used in problems that may be couched in terms of classification or forecasting, such')],
 'result': " Elon Musk's contribution in Neural Networks is not mentioned in the provided context."}