In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'my-pdf:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F4435700%2F7616361%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240213%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240213T172735Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3Da0b0d0d35a1a466eea86e7daa08c18f4305c265fec67bfb14f824aa199b1a1806b8b80a793d1840d04d29c9bcbcd03cf4420106264861c05606b6a680c925ef0cbd9827dba15fbef8fa7fb9c488e18e31e20aa964a34a6dfe29af558e11bbb72e7758d6c1c721ab41ad95fd8db8f22b9efd7e05906865c7c1f6e996c0994f9998600093b7ac68a050387e1dff802e4be916f8e6d8c53883e2b102c82e360a986f248d697e76109ab59bbef4a2bf6b3e3e1de3edd6705b1b343c4b5245fcc6c054fa63f2d78f4aafda86574e042c7e34b3bbec88fee7ec292310111b084773ac17a7f117922b3cbb655d25b2258c7513a533a5733987acd734cf7e92ca5566072'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading my-pdf, 9700761 bytes compressed
Downloaded and uncompressed: my-pdf
Data source import complete.


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/my-pdf/ML_book.pdf


# Display a list of GPU devices
In google colab change runtime type to GPU

In [None]:
# Verify wheather GPU is available or not
! nvidia-smi -L

GPU 0: Tesla T4 (UUID: GPU-ca388afa-b8ae-c95a-7376-8b88f5f1e0a0)


# Installing Libraries

In [None]:
! pip install langchain
! pip install chromadb
! pip install pypdf
! pip install tiktoken
!pip install huggingface_hub
! pip install InstructorEmbedding
! pip install sentence_transformers==2.2.2

! pip install bitsandbytes
! pip install transformers
!pip install sentence_transformers
! pip install accelerate
! pip install faiss-gpu

Collecting langchain
  Downloading langchain-0.1.7-py3-none-any.whl (815 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m815.9/815.9 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.4-py3-none-any.whl (28 kB)
Collecting jsonpatch<2.0,>=1.33 (from langchain)
  Downloading jsonpatch-1.33-py2.py3-none-any.whl (12 kB)
Collecting langchain-community<0.1,>=0.0.20 (from langchain)
  Downloading langchain_community-0.0.20-py3-none-any.whl (1.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.7/1.7 MB[0m [31m25.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-core<0.2,>=0.1.22 (from langchain)
  Downloading langchain_core-0.1.23-py3-none-any.whl (241 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m241.2/241.2 kB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langsmith<0.1,>=0.0.83 (from langchain)
  Downloading langsmith-

# Importing the libraries/packages

In [None]:
import os
import langchain
import time
import textwrap

# loaders
from langchain.document_loaders import PyPDFLoader
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader

# retrievers
from langchain.chains import RetrievalQA

# models
from langchain.llms import HuggingFacePipeline
from langchain.embeddings import HuggingFaceInstructEmbeddings

# langchain embedding
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.embeddings import OpenAIEmbeddings

# splits
from langchain.text_splitter import CharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.text_splitter import SentenceTransformersTokenTextSplitter

# prompts
from langchain import PromptTemplate, LLMChain

# vector stores
from langchain.vectorstores import FAISS

# pytorch and transformer
import torch
import transformers
from transformers import RobertaTokenizer,AutoConfig
from transformers import AutoModelForCausalLM,AutoTokenizer, pipeline

## Other important packages ##
import chromadb
from langchain.vectorstores import Chroma
from chromadb.utils import embedding_functions
from langchain.embeddings import SentenceTransformerEmbeddings
from pypdf import PdfReader
from langchain_community.llms import LlamaCpp
from langchain.llms import CTransformers
from chromadb.utils.embedding_functions import SentenceTransformerEmbeddingFunction

print("All libraries has imported successfully.")

All libraries has imported successfully.


# Configuration class (CFG)

In [None]:
class CFG:
    # LLMs
    model_name = 'BERT'
    num_beams=1,
    repetition_penalty = 1.15

    # Path
    PDFs_path = '/content/ML_book.pdf'

    # splitting
    split_chunk_size = 800
    split_overlap = 0

    # similar passages
    k = 3

# Model selecting Function
we can pass multiple model inside this function and select the model according to the usage.

In [None]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch
def get_model(model = CFG.model_name):

    print('\nDownloading model: ', model, '\n\n')

    if model == 'BERT':

        model_name = 'gpt2'  # we can replace this with a more specific variant if needed
        tokenizer = GPT2Tokenizer.from_pretrained(model_name)
        model = GPT2LMHeadModel.from_pretrained(model_name)

        max_len=1024

    else:
        print("Not implemented model (tokenizer and backbone)")

    return tokenizer, model, max_len


In [None]:
tokenizer, model, max_len = get_model(model = CFG.model_name)


Downloading model:  BERT 




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
# Evaluating our model

model.eval()

GPT2LMHeadModel(
  (transformer): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
  (lm_head): Linear(in_features=768, out_features=50257, bias=False)
)

# Pipelines
Hugging Face pipeline The term "pipeline" is often used to describe the architecture or workflow of a model or system that processes input data through a series of stages, with each stage performing a specific operation.Various tasks are often broken down into subtasks, and a pipeline structure helps organize and execute these tasks in a coordinated manner.

In [None]:
pipe = pipeline(
    task = "text-generation",
    model = model,
    tokenizer = tokenizer,
    pad_token_id = tokenizer.eos_token_id,
    max_length = max_len,
   # temperature = CFG.temperature,
  #  top_p = CFG.top_p,
    repetition_penalty = CFG.repetition_penalty
)

llm = HuggingFacePipeline(pipeline = pipe)

In [None]:
print(llm)

[1mHuggingFacePipeline[0m
Params: {'model_id': 'gpt2', 'model_kwargs': None, 'pipeline_kwargs': None}


# Langchain
LangChain is an open source orchestration framework for the development of applications using large language models (LLMs). Available in both Python- and Javascript-based libraries, LangChain’s tools and APIs simplify the process of building LLM-driven applications like chatbots and virtual agents.

In [None]:
# Loading the document ,it may contains multiple pdf's but here i have passed only one (ML_book.pdf)

pdf_loader = PyPDFLoader(
    CFG.PDFs_path,
)

# Use the below code if there are multiple pdfs
# loader = DirectoryLoader(
#     CFG.PDFs_path,
#     glob="./*.pdf",
#     loader_cls=PyPDFLoader,
#     show_progress=True,
#     use_multithreading=True
# )

documents = pdf_loader.load()

In [None]:
# Counting the total number of pages in the book
print("Total number of pages is: ",len(documents))

Total number of pages is:  234


In [None]:
documents[9].page_content

'2 0 Preface\nStructure of the Book\nIntroductionDensity EstimationGraphical ModelsKernelsOptimizationConditional DensitiesConditional Random FieldsLinear ModelsStructured EstimationDuality and EstimationMomentMethodsReinforcement Learning\nIntroductionDensity EstimationGraphical ModelsKernelsOptimizationConditional DensitiesConditional Random FieldsLinear ModelsStructured EstimationDuality and EstimationMomentMethodsReinforcement Learning\nIntroductionDensity EstimationGraphical ModelsKernelsOptimizationConditional DensitiesConditional Random FieldsLinear ModelsStructured EstimationDuality and EstimationMomentMethodsReinforcement Learning\nCanberra, August 2008'

# API token

In [None]:
import os
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "HUGGINGFACEHUB_API_TOKEN"

# To set-up "HUGGINGFACEHUB_API_TOKEN" we need to login Huggingface.com and get our unique "HUGGINGFACEHUB_API_TOKEN"

print("This process is done")

This process is done


# Text splitter
Text are splitted or broken down into chunks for furthur storing these chunks in the vector database.

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = CFG.split_chunk_size,
    chunk_overlap = CFG.split_overlap
)

texts = text_splitter.split_documents(documents)

print(f'We have created {len(texts)} chunks from {len(documents)} pages')

We have created 616 chunks from 234 pages


# Embeddings
"Embedding" refers to the process of representing data, often categorical or discrete data, in a continuous vector space. This is commonly used in natural language processing (NLP) and other areas where the input data may have a discrete nature, such as categorical variables or words.

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings()

.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

# Loading Vector Databse

In [None]:
from langchain.vectorstores import FAISS

vectordb = FAISS.from_documents(texts, embeddings)

In [None]:
# we have just loaded the vector store and  now query the database using similarity search.

query = "what is machine learning"
docs = vectordb.similarity_search(query)
print(docs)

[Document(page_content='Search  Advanced Search  Preferences Web    Scholar   Results 1 - 10 of about 10,500,000 for machine learning. (0.06 seconds) Machine learning - Wikipedia, the free encyclopediaAs a broad subfield of artificial intelligence, machine learning is concerned with the designand development of algorithms and techniques that allow ...en.wikipedia.org/wiki/Machine_learning - 43k - Cached - Similar pagesMachine Learning textbookMachine Learning is the study of computer algorithms that improve automatically throughexperience. Applications range from datamining programs that ...www.cs.cmu.edu/~tom/mlbook.html - 4k - Cached - Similar pagesmachine learningwww.aaai.org/AITopics/html/machine.html - Similar pagesMachine LearningA list of links to papers and other resources on machine', metadata={'source': '/content/ML_book.pdf', 'page': 11}), Document(page_content="Similar pagesAmazon.com: Machine Learning: Tom M. Mitchell: BooksAmazon.com: Machine Learning: Tom M. Mitchell: Bo

In [None]:
# Testing whether vector DB was loaded or not
vectordb.similarity_search('Linear Regression')

[Document(page_content='when attempting to classify webpages, ymight be a permutation, when\nattempting to match objects, to perform collaborative ﬁltering, or to rank\ndocuments in a retrieval setting. Equally well, ymight be an annotation of\na text, when performing named entity recognition. Each of those problems\nhas its own properties in terms of the set of ywhich we might consider\nadmissible, or how to search this space. We will discuss a number of those\nproblems in Chapter ??.\nRegression is another prototypical application. Here the goal is to esti-\nmate a real-valued variable y∈Rgiven a pattern x(see e.g. Figure 1.7). For\ninstance, we might want to estimate the value of a stock the next day, the\nyield of a semiconductor fab given the current process, the iron content of', metadata={'source': '/content/ML_book.pdf', 'page': 18}),
 Document(page_content='Introduction to Machine Learning\nAlex Smola and S.V.N. Vishwanathan\nYahoo! Labs\nSanta Clara\n–and–\nDepartments of Sta

In [None]:
# Testing MMR (Max Marginal Relevance) search
question = "What is machine learnng"
vectordb.max_marginal_relevance_search(question, k = CFG.k)

[Document(page_content='Search  Advanced Search  Preferences Web    Scholar   Results 1 - 10 of about 10,500,000 for machine learning. (0.06 seconds) Machine learning - Wikipedia, the free encyclopediaAs a broad subfield of artificial intelligence, machine learning is concerned with the designand development of algorithms and techniques that allow ...en.wikipedia.org/wiki/Machine_learning - 43k - Cached - Similar pagesMachine Learning textbookMachine Learning is the study of computer algorithms that improve automatically throughexperience. Applications range from datamining programs that ...www.cs.cmu.edu/~tom/mlbook.html - 4k - Cached - Similar pagesmachine learningwww.aaai.org/AITopics/html/machine.html - Similar pagesMachine LearningA list of links to papers and other resources on machine', metadata={'source': '/content/ML_book.pdf', 'page': 11}),
 Document(page_content='10 1 Introduction\nFig. 1.6. Left: binary classiﬁcation. Right: 3-class classiﬁcation. Note that in the\nlatter c

In [None]:
from langchain.chains.question_answering import load_qa_chain
from langchain import HuggingFaceHub

# Prompt Template
When working with LLMs, we typically provide a prompt or input text to generate a desired output. The structure of the prompt can vary based on the task you want the model to perform. Here's a general template for a prompt:

In [None]:
prompt_template = """
If you don't know the answer of any particular question asked by
user then just say that you don't know.
Answer in the same language the question was asked.
{context}

Question: {question}
Answer:"""



PROMPT = PromptTemplate(
    template = prompt_template,
    input_variables = ["context", "question"]
)

In [None]:
llm_chain = LLMChain(prompt=PROMPT, llm=llm)
llm_chain

LLMChain(prompt=PromptTemplate(input_variables=['context', 'question'], template="\nIf you don't know the answer of any particular question asked by\nuser then just say that you don't know.\nAnswer in the same language the question was asked.\n{context}\n\nQuestion: {question}\nAnswer:"), llm=HuggingFacePipeline(pipeline=<transformers.pipelines.text_generation.TextGenerationPipeline object at 0x77fc489bf3a0>))

In [None]:
retriever = vectordb.as_retriever(search_kwargs = {"k": CFG.k, "search_type" : "similarity"})

qa_chain = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "stuff", # map_reduce, map_rerank, stuff, refine
    retriever = retriever,
    chain_type_kwargs = {"prompt": PROMPT},
    return_source_documents = True,
    verbose = False
)

# Formating LLM Response

In [None]:
def wrap_text_preserve_newlines(text, width=500):
    # Split the input text into lines based on newline characters
    lines = text.split('\n')

    # Wrap each line individually
    wrapped_lines = [textwrap.fill(line, width=width) for line in lines]

    # Join the wrapped lines back together using newline characters
    wrapped_text = '\n'.join(wrapped_lines)

    return wrapped_text
def process_llm_response(llm_response):
    ans = wrap_text_preserve_newlines(llm_response['result'])

    sources_used = ' \n'.join(
        [
            source.metadata['source'].split('/')[-1][:-4] + ' - page: ' + str(source.metadata['page'])
            for source in llm_response['source_documents']
        ]
    )

    ans = ans + '\n\nSources: \n' + sources_used
    return ans

# Creating llm_ans funtion
This function will formate the response in well mannerd form.

In [None]:
def llm_ans(query):
    start = time.time()

    llm_response = qa_chain.invoke(query)
    ans = process_llm_response(llm_response)

    end = time.time()

    time_elapsed = int(round(end - start, 0))
    time_elapsed_str = f'\n\nTime elapsed: {time_elapsed} s'
    return ans + time_elapsed_str

In [None]:
# Checking model_max_length and config.max_position_embedding

print(tokenizer.model_max_length)
print(model.config.max_position_embeddings)

1024
1024


# Testing llm_ans function
This function may take few minutes to run

In [None]:
# query = "what is machine learning"
# print(llm_ans(query))

# RAG Technique
Advanced RAG (Retrieval-Augmented Generation) In the context of natural language processing, RAG refers to a model architecture that combines retrieval-based methods with generative models. It is often used for tasks like open-domain question answering, where a retriever selects relevant passages, and a generator creates a coherent response.

In [None]:
def load_doc(path):
    reader = PdfReader(path)
    pdf_texts = [p.extract_text().strip() for p in reader.pages]

    # Filter the empty strings will have only those pages that have the text
    pdf_texts = [text for text in pdf_texts if text]

    return pdf_texts

# Function for dividing documents into chunks

In [None]:
def chunks(path_of_doc):

    #we will use the path of doc to pass it to load_doc
    pdf_texts= load_doc(path_of_doc)
    character_splitter = RecursiveCharacterTextSplitter(

        separators=["\n\n", "\n", ". ", " ", ""],
        chunk_size=2000,
        chunk_overlap=20
    )

    character_split_texts = character_splitter.split_text('\n\n'.join(pdf_texts))

    #we want every token to have 256 characters
    token_splitter = SentenceTransformersTokenTextSplitter(chunk_overlap=0, tokens_per_chunk=256)
    token_split_texts = []
    for text in character_split_texts:
    #splitting the text and storing in list
        token_split_texts += token_splitter.split_text(text)

    return token_split_texts

# Embedding Function
used Embedding function as sentenceteanformerembedding function.

In [None]:
def embedding_function():
    embedding_function = SentenceTransformerEmbeddingFunction()
    return embedding_function

# Function for putting chunks into vector database

In [None]:
def doc_put_in_vectordb(path_of_doc):
    token_split_texts= chunks(path_of_doc)
    embedding_func= embedding_function()

    #making chromadb client object good for testing
    chroma_client = chromadb.Client()

    #making the collection of chroma database
    chroma_collection = chroma_client.create_collection("Doc-Collection", embedding_function=embedding_func)

    ids = [str(i) for i in range(len(token_split_texts))]

    chroma_collection.add(ids=ids, documents=token_split_texts)
    return chroma_collection

In [None]:
def retrieve_doc(query, chroma_collection):
    #we will pass the document path to doc_put_in_vectordb() so that can ingest it in vector db after chunking and loading
    # chroma_collection= doc_put_in_vectordb(path_to_doc)

    #lets get the 5 relevant results
    results = chroma_collection.query(query_texts=[query], n_results=5)

    #[0] means give the result of the first query, right now we have only 1 query
    retrieved_documents = results['documents'][0]

    return retrieved_documents

# RAG Function

In [None]:
def rag(query, retrieved_documents, model, tokenizer):

    #we will join the retrieved documents (that are the relavant documents) into one variable
    messages = f"""
          You are very supportive expert of Machine Learning and AI.
          Your users are asking questions about information contained in an Machine Learning and Deep Learning Book."
          Answer the user's question using only this relevant information. Try to build a good answer uing this information.


        User's Question: {query}. \n Information from the book of Machine Learning and Deep Learning: {retrieved_documents}"
    """
    pipe = pipeline("text-generation",
                        model=model,
                        tokenizer=tokenizer)

    llm = HuggingFacePipeline(pipeline=pipe)
    # checking again that our model is working fine--->Asking LLM model the same question we asked our document
    content= llm(prompt=messages)

    return content

# Using Hypothetical answer from LLM

In [None]:
# 1. Using the Hypothetical Answer from LLM:

def hypothetical_answer(query, model, tokenizer):
    messages = f"""
        You are a very supportive expert Machine Learning and AI.
        Provide an example answer to the given question,
        that might be found in a documents like from Book related to machine Learning and Deep Learning.

        The Question about which you have to give example answer is: {query}
    """
    pipe = pipeline("text-generation",
                    model=model,
                    tokenizer=tokenizer)

    llm = HuggingFacePipeline(pipeline=pipe)
    # checking again that our model is working fine--->Asking LLM model the same question we asked our document
    content= llm(prompt=query)

    #joining the original query and hypothetical answer
    joint_query = f"{query} {content}"

    return joint_query

In [None]:
original_query = "what is multi Regression?"
joint_query = hypothetical_answer(original_query, model, tokenizer)
print(joint_query)

  warn_deprecated(
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


what is multi Regression?  -- Mike


In the current context, you might be wondering that regressive regressive regression is not a good option. Well, what would the first thing look like if you were to give up one or two reg


In [None]:
path_to_doc= "/content/ML_book.pdf"
chroma_collection= doc_put_in_vectordb(path_to_doc)

.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

train_script.py:   0%|          | 0.00/13.2k [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

# Using Relative Docs and passing it to RAG
Getting the relative documents using the joint query

In [None]:
retrieved_documents= retrieve_doc(joint_query, chroma_collection)
retrieved_text= ""
for doc in retrieved_documents:
  retrieved_text+=doc

In [None]:
print(retrieved_text)

ywhich we might consider admissible, or how to search this space. we will discuss a number of those problems in chapter??. regression is another prototypical application. here the goal is to esti - mate a real - valued variable y∈rgiven a pattern x ( see e. g. figure 1. 7 ). for instance, we might want to estimate the value of a stock the next day, the yield of a semiconductor fab given the current process, the iron content of ore given mass spectroscopy measurements, or the heart rate of an athlete, given accelerometer data. one of the key issues in which regression problems [UNK] from each other is the choice of a loss. for instance, when estimating stock values our loss for a put option will be decidedly one - sided. on the other hand, a hobby athlete might only care that our estimate of the heart rate matches the actual on average.7. 3 support vector regression 185 lation can be written in a uniﬁed fashion as follows min w, b, ξ +, ξ−1 [UNK] + c [UNK] i = 1l + ( ξ + i ) + l− ( ξ− i

# Joining the retrieved documents into one variable

In [None]:
messages = f"""
  You're a knowledgeable expert in Machine Learning and Deep Learning, assisting users with
  questions from a dedicated book on these topics. Answer user queries precisely using
  information from the book,whether they seek

  User's Question: {original_query}. \n Information from the book of Machine Learning and Deep Learning: {retrieved_text}"
"""
pipe = pipeline("text-generation",
                model=model,
                tokenizer=tokenizer)

llm = HuggingFacePipeline(pipeline=pipe)
# checking again that our model is working fine--->Asking LLM model the same question we asked our document
# content= llm(prompt=messages)

In [None]:
def generate_multiple_queries(query, model, tokenizer):
    messages = f"""
        You're a knowledgeable expert in Machine Learning and Deep Learning, assisting users with
  questions from a dedicated book on these topics. Answer user queries precisely using
  information from the book,whether they seek

               The question about which you have to generate the question is {query}"""

    pipe = pipeline("text-generation",
                    model=model,
                    tokenizer=tokenizer)

    llm = HuggingFacePipeline(pipeline=pipe)
    content= llm(prompt=messages)
    content = content.split("\n")
    return content

In [None]:
original_query = "What is deep learning?"
augmented_queries = generate_multiple_queries(original_query, model, tokenizer)

for query in augmented_queries:
    print(query)

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


 The


In [None]:
queries = [original_query] + augmented_queries
retrieved_documents= retrieve_doc(queries, chroma_collection)
retrieved_text= ""
for doc in retrieved_documents:
  retrieved_text+=doc

In [None]:
print(retrieved_text)

15k - cached - similar pagesmachine learning - artificial intelligence ( incl. robotics... machine learning - artificial intelligence. machine learning is an international forum forresearch on computational approaches to learning. www. springer. com / computer / artificial / journal / 10994 - 39k - cached - similar pagesmachine learning ( theory ) graduating students in statistics appear to be at a substantial handicap compared tograduating students in machine learning, despite being in substantially... hunch. net / - 94k - cached - similar pagesamazon. com : machine learning : tom m. mitchell : booksamazon. com : machine learning : tom m. mitchell : books. www. amazon. com / machine - learning - tom - m - mitchell / dp / 0070428077 - 210k - cached - similar pagesmachine learning journalmachine learning publishes articles on the mechanisms through which intelligent systemsimprove their performance over time( 21 ) $ 39. 66 › explore similar items : books ( 50 ) editorial reviewsbook des

# Cross Encoder Re-Ranking

In [None]:
from sentence_transformers import CrossEncoder
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

config.json:   0%|          | 0.00/794 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/316 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [None]:
pairs = [[query, doc] for doc in retrieved_documents]
scores = cross_encoder.predict(pairs)
print("Scores:")
for score in scores:
    print(score)

Scores:
-5.7282147
-5.5705304
-5.6301236
-5.328144
-6.36014


In [None]:
retrieved_documents[1]

'( 21 ) $ 39. 66 › explore similar items : books ( 50 ) editorial reviewsbook descriptionthis exciting addition to the mcgraw - hill series in computer science focuses on the concepts and techniques that contribute to the rapidlychanging field of machine learning - - including probability and statistics, artificial intelligence, and neural networks - - unifying them all in a logicaland coherent manner. machine learning serves as a useful reference tool for software developers and researchers, as well as an outstanding textfor college students. - - this text refers to the hardcover edition. book infopresents the key algorithms and theory that form the core of machine learning. discusses such theoretical issues as how does learningperformance vary with the number of training examples presented? and which learning algorithms are most appropriate for various types oflearning tasks? dlc : computer algorithms. - - this text refers to the hardcover edition. product detailspaperback : 352 page

# Creating UI(User Interface) using Gradio
At the moment this part only works on Google Colab. Gradio and Kaggle started having compatibility issues recently.

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

In [None]:
# install gradio library to create User interface

! pip install -U gradio

Collecting gradio
  Downloading gradio-4.18.0-py3-none-any.whl (16.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.8/16.8 MB[0m [31m41.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==0.10.0 (from gradio)
  Downloading gradio_client-0.10.0-py3-none-any.whl (307 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.7/307.7 kB[0m [31m32.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx (from gradio)
  Downloading httpx-0.26.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.9/75.9 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.9.13-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (138 kB)

# Check if GPU is available and supported

In [None]:
import torch

# Check if CUDA (GPU support) is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

Device: cuda


# Import gradio and checking its version

In [None]:
import gradio as gr
print(gr.__version__)

4.18.0


# Run the below cell for UI based chatbot
This cell may take few minutes to load the UI(User interfce)

In [None]:
def predict(message, history):

    output = str(llm_ans(message)).replace("\n", "<br/>")
    return output

demo = gr.ChatInterface(
    predict,
    title = f' Open-Source LLM for our chatbot'
)

demo.queue().launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://4ba3bff83ce4066e86.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


