In [None]:
# We are going to use phi3 for our basic RAG implementation

In [None]:
#Installing various packages

!pip install langchain
!pip install langchain-core
!pip install langchain-community
!pip install gpt4all
!pip install pypdf
!pip install chromadb
!pip install colab-xterm

In [4]:
%load_ext colabxterm

In [None]:
#Load the terminal in colab to run and install ollama and run the Phi3 model
%xterm

In [25]:
#We are going to use Phi3

from langchain_community.llms import Ollama
llm = Ollama(model="phi3")

In [9]:
# Get the embeddings that is going to be used to embed both the user input and documents.

def get_embeddings():
  from langchain_community.embeddings import GPT4AllEmbeddings
  embedding = GPT4AllEmbeddings(
      model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf"
  )
  return embedding

In [10]:
# Get the document of interest for splitting and indexing

from langchain.document_loaders import PyPDFLoader
loader = PyPDFLoader(r"/content/research_paper_1.pdf")
pages = loader.load()
print(len(pages)) #This should match the number of pages in the document

75


In [11]:
# Spilt the document and Get chunks

def chunk(docs):
  from langchain.text_splitter import RecursiveCharacterTextSplitter
  text_splitter = RecursiveCharacterTextSplitter(
      separators=["\n\n", "\n", "(?<=\. )", " ", ""],
      chunk_size= 1000,
      chunk_overlap= 150,
      length_function= len
  )
  chunks = text_splitter.split_documents(docs)
  return chunks

In [12]:
chunks_of_pages = chunk(pages)
print(len(chunks_of_pages))

313


In [13]:
#Create a database to store the index and documents

def create_chromadb(chunks, persist_directory):

  from langchain.vectorstores import Chroma

  db = Chroma.from_documents(
  documents= chunks,
  embedding= get_embeddings(),
  persist_directory= persist_directory,
  )
  return db

In [14]:
#Pass the directory of folder to store the database

persist_directory = "/content/chroma"
embedded_database = create_chromadb(chunks_of_pages, persist_directory)
print(embedded_database._collection.count()) #This should be equal to the number of chunks created earlier

Downloading: 100%|██████████| 45.9M/45.9M [00:00<00:00, 126MiB/s]
Verifying: 100%|██████████| 45.9M/45.9M [00:00<00:00, 556MiB/s]


313


In [15]:
# Function to get the content of documents separated by new line character

def get_context(docs):
  return "\n\n---\n\n".join(doc.page_content for doc in docs)

In [16]:
#Create a prompt template which takes in the content of documents as context and user query.

from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import PromptTemplate

PROMPT_TEMPLATE= """ "You are a smart AI assisstant. You have access to this information {context}. Based ONLY on the the given information answer the question {question} """
prompt= prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=PROMPT_TEMPLATE,
)

In [17]:
#Get the earlier created database as retriever

retriever = embedded_database.as_retriever(search_type= "similarity", search_kwargs= {'k': 5})

In [18]:
#Create a chain which takes in the user query and will produce the output

rag_chain = (
    {'context': retriever | get_context, 'question': RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [27]:
question = "what is SuperGLUE?"
res = rag_chain.invoke(question)

In [29]:
res

"SuperGLUE is a standardized collection of datasets designed for evaluating and comparing performance across various NLP tasks, including Natural Language Inference (NLI), Winograd Schema Challenge (WSC), Multi-RC, SAT, WUP (Winograd Syntax Challenge), MNLI (Multi-Genre Natural Language Inference), MultiNLI (a large crowd-sourced dataset for NLI tasks), and others. It aims to provide a common framework for assessing the capabilities of different models like GPT-3, BERT variants, and their fine-tuned versions on these diverse challenges in natural language processing. The performance trends shown in Figure 3.8 indicate that model size and the number of context examples have a positive impact on SuperGLUE task performances, with few-shot learning capabilities also being significant as demonstrated by GPT-3's results."

In [32]:
question = "what is zero shot learner?"
res2 = rag_chain.invoke(question)

In [33]:
res2

'A zero-shot learner, in the context of natural language processing and machine learning, refers to a model that can perform tasks or understand concepts without having been explicitly trained (fine-tuned) with examples for those specific tasks. It relies solely on its pre-training knowledge acquired from a broad distribution of related tasks or data. This approach is most analogous to humans communicating instructions to the model using natural language descriptions, expecting it to apply general understanding and reasoning skills without direct task demonstrations. For instance, in Figure 2.1 mentioned in your text, zero-shot learning involves providing the model with a natural language instruction describing the task (and no examples) at test time. This method offers maximum convenience and robustness by avoiding spurious correlations but is challenging due to its reliance on generalizing from pre-training data without any gradient updates or fine-tuning during testing.\n\nZero-shot