In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'wikipedia-crypto-articles:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F4264382%2F7344152%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240605%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240605T132852Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3Daeba2962a379009d70c32b40633038a3fb9ab0f1c40337847e699c8ba8355f5e88d7ac38f524d970f5361360124e3fc3eb72d0b029db7bdea31088f6d2c6c9302f670955d5a8ad748e25c10328c49ac0cad28d8c4adf0f2b4b89df95d8300967e48991aae625d9adff8c7abbca742d3003fee15ea5d2e7f646daae146420d9d82ca08656f213d01d55a5207dbb47e2dd6c13b1334c5ec991d19d45b4b8f59c46ed3eeeeccda9e7cde2a7ecfa38cdb13ff06e4eede3658bae2b6dd188c7b0d1cdec6144e678f495e93b57f31902774e984419945fabda8c8541f36ec3e71a0298e245c8a5a1d3ead2478a61e1850b6caf5d3445619e93c0f68e3588be0d6265b1,mistral/pytorch/7b-instruct-v0.1-hf/1:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-models-data%2F3900%2F5112%2Fbundle%2Farchive.tar.gz%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240605%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240605T132853Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D0a9afb25b3c04467b999bd76d6aed3340940d14e154a31617bbc1a5adfd4a3164127120500abbeb7b043e9902323104c84ee48593891c38836416829065e7a0a234b1bc23cdd3783abfaec7d4290bbb73b794056d4e1d27795d4315cd299c9e8a3d3d47ebed37eac4c8e828ab4894b1ffabd56d71e11a3dc58eccf5436e7f2f15411eb874bec23854c766d803c08ee9612ca781934b49eb36d9cc822b4cb447ec9ada32faa285cfffe675caa0b6b5fd6f1347509c41e0eb983014866a986ac05b43c7619d351a05c3c18636abb287493122a77a7a1f71518fe08cccfa83fa525ece6ece0af5d61ed4fe18b0272eae5eb23866337b4b9b932e18272eb1b953433'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


<div style="font-family: 'Roboto', Arial, sans-serif; text-align: center;">
    <img src = "https://i.imgur.com/sicKkOA.png" width = 500, height= 500>
    <div style="font-size: 42px; font-weight: bold; color: #353935; text-shadow: 1px 2px 3px rgba(0,0,0,0.1);">
        <b>Retrieval Augmented Generation (RAG)<br> with Mistral 7b</b>
    </div>
    <hr style="border: none;
               border-top: 1.25px solid #E0E0E0;
               width: 75%;
               margin-top: 20px;
               margin-bottom: 20px;
               margin-left: auto;
               margin-right: auto;">
</div>

<br><br><br>
<div style="font-family: 'Roboto', Arial, sans-serif; text-align: left; font-size: 14px; letter-spacing: 1.5px; margin-top: 25px; margin-bottom: 25px;font-weight: bold;">
    Table of Contents
</div>
<br>

- [Introduction](#intro)<br><br>
- [Data Inspection](#eda)<br><br>
- [Storing Data](#storing)<br><br>
- [Loading Mistral 7b Model](#model)<br><br>
- [Querying Model](#queries)<br><br>
- [Conclusion](#conclusion)<br><br>

<div id = 'intro'
     style="font-size: 42px; font-weight: bold; color: #353935; text-shadow: 1px 2px 3px rgba(0,0,0,0.1);">
        <b>Introduction</b>
    <hr style="border: none;
               border-top: 1.25px solid #E0E0E0;
               width: 100%;
               margin-top: 20px;
               margin-bottom: 20px;
               margin-left: 0px;
               margin-right: auto;">
</div>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">Large language models are amazing tools that can help humans obtain answers to questions, summarize extensive texts, translate documents from one language to another, and help us code, among others.</p>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">However, LLMs have one major issue: <b>hallucinations</b>. Hallucinations happen when an LLM spits out random facts from its training data, even if it may have no real connection to the user's prompt. Large language models have a hard time saying <i>"I don't know"</i> to questions they don't have an answer to.</p>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;"><b>Retrieval-augmented generation (RAG)</b> is an AI framework that has two main objectives: Improve the quality of responses generated by connecting the model to an external source of knowledge and ensure that users have access to the model's sources so you can fact-check its answers for accuracy.</p>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">With RAG, we can also ensure that the large language model has access to <b>proprietary data</b> by connecting it to custom sources of data from where it can retrieve information.</p>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">The image below provides a clear understanding of how RAG works. First, a user makes a question to the LLM. Before reaching the model, the question reaches a <b>retriever</b>. This retriever will be responsible for looking up and retrieving relevant documents to answer the question from the <b>knowledge base</b>. The question, plus the relevant documents, will then be sent to the LLM, which will be able to generate a source-informed answer according to the sources from the documents it received.</p>

<center>
    <img src = "https://assets-global.website-files.com/63f3993d10c2a062a4c9f13c/64593ba041a4ff8dfef73f30_1*LYApKuxzzmvFECqwYk61wg.png">
<p style = "font-size: 16px;
            font-family: 'Roboto', sans-serif;
            text-align: center;
            margin-top: 10px;">Source: <a href = "https://www.ml6.eu/blogpost/leveraging-llms-on-your-domain-specific-knowledge-base">ml6.eu</a></p>
</center>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">In this notebook, we will implement a retrieval-augmented generation system for an LLM using the <a href="https://www.kaggle.com/datasets/lusfernandotorres/wikipedia-crypto-articles">Wikipedia Crypto Articles</a>, a dataset that I uploaded a few days ago here on Kaggle.</p>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">Before getting our hands dirty with code, let's install some relevant packages. These are:</p>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">• <b><a href = "https://www.trychroma.com/">Chromadb</a>:</b> An open-source embedding database that allows us to plug LLMs to knowledge bases. It allows us to store and query embeddings and their metadata.</p>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">• <b><a href = "https://www.langchain.com/">LangChain</a>:</b> A framework that allows us to develop several applications powered by LLMs.</p>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">• <b><a href = "https://pypi.org/project/sentence-transformers/">Sentence Transformers</a>:</b> A framework that provides an easy method to compute dense vector representations for sentences, paragraphs, and images by leveraging pre-trained transformer models.</p>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">• <b><a href = "https://github.com/TimDettmers/bitsandbytes">bitsandbytes</a>:</b> A library designed to optimize the training and deployment of large models through 4-bit quantization of the model's weights, reducing memory footprint and enhancing memory efficiency.</p>

In [None]:
# Auto DataViz tool
!pip install ydata-profiling

In [None]:
# Chromadb
!pip install chromadb

In [None]:
# LangChain
!pip install langchain

In [None]:
# Sentence Transformers
!pip install sentence_transformers

In [None]:
# bitsandbytes
!pip install bitsandbytes

In [None]:
# Importing libs

# Data Handling
import pandas as pd
import numpy as np

# Auto EDA
from ydata_profiling import ProfileReport


# Torch and Transformers
import torch
from torch import bfloat16
import transformers
from transformers import AutoTokenizer

# LangChain
from langchain.llms import HuggingFacePipeline
from langchain.document_loaders import DataFrameLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.vectorstores import Chroma

# Hiding warnings
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Checking if GPU is available
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(torch.cuda.current_device())
    total_memory = torch.cuda.get_device_properties(0).total_memory
    total_memory_gb = total_memory / (1024**3) # Converting memory to Gb
    print("GPU is available. \nUsing GPU")
    print("\nGPU Name:", gpu_name)
    print(f"Total GPU Memory: {total_memory_gb:.2f} GB")

    device = torch.device('cuda')
else:
    print("GPU is not available. \nUsing CPU")
    device = torch.device('cpu')

<div id = 'eda'
     style="font-size: 42px; font-weight: bold; color: #353935; text-shadow: 1px 2px 3px rgba(0,0,0,0.1);">
        <b>Data Inspection</b>
    <hr style="border: none;
               border-top: 1.25px solid #E0E0E0;
               width: 100%;
               margin-top: 20px;
               margin-bottom: 20px;
               margin-left: 0px;
               margin-right: auto;">
</div>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">As I have previously mentioned, we are going to use the <a href="https://www.kaggle.com/datasets/lusfernandotorres/wikipedia-crypto-articles">Wikipedia Crypto Articles</a> dataset as a knowledge source base for the model.</p>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">We will use YData Profiling, an auto-visualization tool, to extract some info from the dataset with just a few lines of code.</p>

In [None]:
# Loading dataframe
df = pd.read_csv('/kaggle/input/wikipedia-crypto-articles/Wikipedia Crypto Articles.csv')
# Generating report
report = ProfileReport(df, title = 'Wikipedia Crypto Articles')

In [None]:
report # Visualizing report

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">The dataset consists of two columns, <code>title</code> and <code>article</code>. We have nine entries with empty articles. We will remove these rows.</p>

In [None]:
# Checking df length
print('Dataframe Length:', len(df), 'rows')

df = df.dropna() # Dropping empty entries

# Checking df length after dropping empty articles
print('Length After Dropping Empty Values:', len(df), 'rows')

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">Let's take a look at the content of the dataframe. I will print the title and the content of the text in <code>article</code> for the last entry.</p>

In [None]:
print('Title:', df.title.iloc[-1])
print('\n\n\n')
print(df.article.iloc[-1])

<div id = 'storing'
     style="font-size: 42px; font-weight: bold; color: #353935; text-shadow: 1px 2px 3px rgba(0,0,0,0.1);">
        <b>Storing Data</b>
    <hr style="border: none;
               border-top: 1.25px solid #E0E0E0;
               width: 100%;
               margin-top: 20px;
               margin-bottom: 20px;
               margin-left: 0px;
               margin-right: auto;">
</div>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">LangChain has tools called <i>Document Loaders</i> that allow us to load several types of data from a source as a <code>document</code>. A <code>document</code> contains text and associated metadata. We will use the <code>DataFrameLoader</code> class to load the data from a pandas DataFrame.</p>

In [None]:
# Loading dataframe content into a document
articles = DataFrameLoader(df,
                           page_content_column = "title")

In [None]:
# Loading entire dataframe into document format
document = articles.load()

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">Before creating the embeddings from the document, we have to split it into smaller chunks. We do this for several reasons. </p>
<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">First, embedding models may have a maximum token limit, and splitting the data ensures each chunk fits within these limits. </p>
<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">Second, smaller chunks are more memory-efficient, which reduces computational costs. </p>
<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">Third, embedding smaller and coherent segments of text may lead to higher accuracy and more meaningful representations. </p>
<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">We will use LangChains's <code>RecursiveCharacterTextSplitter</code> to split our data.</p>

In [None]:
# Splitting document into smaller chunks
splitter = RecursiveCharacterTextSplitter(chunk_size = 1000,
                                chunk_overlap = 20)
splitted_texts = splitter.split_documents(document)

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">We will use <code>HuggingFaceEmbeddings</code> to load a model from SentenceTransformers 🤗. More specifically, we will load the <code>sentence-transformers/all-MiniLM-L6-v2</code> model, which maps sentences and paragraphs to a 384-dimensional dense vector space. </p>

In [None]:
# Loading model to create the embeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">With <code>Chroma</code>, we will create an indexed database of the embedded data. We use the <code>.from_documents( )</code> method from the Chroma class and input the chunks of text, <code>splitted_texts</code>, the embedding model, <code>embedding_model</code>, and we finally specify a directory where the indexed database will be stored.</p>

In [None]:
# Creating and indexed database
chroma_database = Chroma.from_documents(splitted_texts,
                                      embedding_model,
                                      persist_directory = 'chroma_db')

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">You can see below that <code>chroma_database</code> is a vector store.</p>

In [None]:
# Visualizing the database
chroma_database

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">We use vector stores to store the embedded data. When we ask the model about something, the chain will embed this query and use it to retrieve the embedding vectors from the vector store according to the "similarities" between them and the embedded query. A vector store is simply responsible for storing the embedded data and performing a vector search for high-quality answers to our questions. </p>

<center>
    <img src = "https://python.langchain.com/assets/images/vector_stores-125d1675d58cfb46ce9054c9019fea72.jpg">
<p style = "font-size: 16px;
            font-family: 'Roboto', sans-serif;
            text-align: center;
            margin-top: 10px;">Source: <a href = "https://python.langchain.com/docs/modules/data_connection/vectorstores/">Vector stores on LangChain</a></p>
</center>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">Below, we define a retriever. Retrievers are responsible for retrieving the documents from the vector store based on a given query. They accept a string query as input and return a list of <code>document</code>s as output. </p>

In [None]:
# Defining a retriever
retriever = chroma_database.as_retriever()

In [None]:
# Visualizing the retriever
retriever

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">You can see above that the <code>retriever</code> object is an instance of the <code>VectorStoreRetriever</code>, and it is linked to the vector store <code>chroma_database</code>.</p>

<div id = 'model'
     style="font-size: 42px; font-weight: bold; color: #353935; text-shadow: 1px 2px 3px rgba(0,0,0,0.1);">
        <b>Loading Mistral 7b Model</b>
    <hr style="border: none;
               border-top: 1.25px solid #E0E0E0;
               width: 100%;
               margin-top: 20px;
               margin-bottom: 20px;
               margin-left: 0px;
               margin-right: auto;">
</div>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">We can load the large language model with the <code>HuggingFacePipeline</code> class, which allows us to access over 120,000 open-source models publicly available on Huggingface.co. The model we will load is the <code>mistralai/Mistral-7B-v0.1</code> model for the <code>text-generation</code> task. </p>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">The Mistral 7B is an open-source 7.3B parameter model that outperforms Meta's Llama 2 13B on all benchmarks. It is one of the most powerful open-source models available in January 2024.</p>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">Since this is a large model, we are going to use the <code>bitsandbytes</code> library to create the <code>quantization_config</code> variable that is going to load the model in a 4-bit quantized format and enable double quantization. We are also setting the compute data type to bfloat16. These settings optimize the model's size and performance, avoiding memory usage issues.</p>

In [None]:
# Configuring BitsAndBytesConfig for loading model in an optimal way
quantization_config = transformers.BitsAndBytesConfig(load_in_4bit = True,
                                        bnb_4bit_quant_type = 'nf4',
                                        bnb_4bit_use_double_quant = True,
                                        bnb_4bit_compute_dtype = bfloat16)

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">We finally load the model in the <code>llm</code> variable. With the <code>model_kwargs</code> dictionary, we define a few behaviors for the model. For instance, <code>temperature</code> is a parameter that ranges from 0.0 to 1.0, and it defines how <i>"creative"</i> the model can be. Lower values make responses more predictable. <code>max_length</code> defines the maximum length of generated outputs, and <code>quantization_config</code> applies the previously defined quantization configuration to optimize the model.</p>

In [None]:
# Loading Mistral 7b model
llm = HuggingFacePipeline.from_model_id(model_id='/kaggle/input/mistral/pytorch/7b-instruct-v0.1-hf/1',
                                       task = 'text-generation',
                                       model_kwargs={'temperature': .3,
                                                    'max_length': 1024,
                                                    'quantization_config': quantization_config},
                                       device_map = "auto")

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">In LangChain, we have modules called <b>Chains</b>,  a sequence of calls to an LLM. One of those chains is the <code>RetrievalQA</code>, which fetches relevant documents from an indexed database and then passes those documents into the LLM to generate a response. Let's define a Q&amp;A chain.</p>

In [None]:
# Defining a QnA chain
QnA = RetrievalQA.from_chain_type(llm = llm,
                                 chain_type = 'stuff',
                                 retriever = retriever,
                                 verbose = False)

<div id = 'queries'
     style="font-size: 42px; font-weight: bold; color: #353935; text-shadow: 1px 2px 3px rgba(0,0,0,0.1);">
        <b>Querying Model</b>
    <hr style="border: none;
               border-top: 1.25px solid #E0E0E0;
               width: 100%;
               margin-top: 20px;
               margin-bottom: 20px;
               margin-left: 0px;
               margin-right: auto;">
</div>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">Below, I am going to define the <code>get_answers</code> function, which takes in the <code>QnA</code> chain we created above and a query, which is the question we make to the LLM.</p>

In [None]:
# Defining function to fetch documents according to a query
def get_answers(QnA, query):
    answer = QnA.run(query)
    print(f"\033[1mQuery:\033[0m {query}\n")
    print(f"\033[1mAnswer:\033[0m ", answer)

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">We can now make questions to the model! Let's start with a few examples.</p>

In [None]:
query = """Who created the Bitcoin? When was it created?"""
get_answers(QnA, query)

In [None]:
query = """What was the biggest scam in the history of cryptocurrencies?"""
get_answers(QnA, query)

In [None]:
query = """How much will one Bitcoin cost in 2030?"""
get_answers(QnA, query)

In [None]:
query = """Cite the names of five relevant people in crypto?"""
get_answers(QnA, query)

In [None]:
query = """What exchanges can I use to buy crypto?"""
get_answers(QnA, query)

In [None]:
query = """Who conceived Ethereum?"""
get_answers(QnA, query)

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">The model answers correctly when asked who created the Bitcoin and when it was created. I honestly don't know if the Squid Game scam was the biggest scam, but it was a scam that happened around 2021, and you can <a href ="https://en.wikipedia.org/wiki/2021_Squid_Game_cryptocurrency_scam">read about it on Wikipedia</a>. </p>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">When asked about the price of one Bitcoin in 2030, the model answers with <b>"I don't know"</b>, which is good. We don't want the model to make guesses about information it doesn't have access to.</p>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">When asked about the names of five relevant people in crypto,  the model repeats the name of <i>Andreas Antonopoulos</i> a few times, and even misspells his surname. There is definitely room for improvement here, but still, the model correctly named people who are related to cryptocurrencies.</p>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">In the code below, we will use <code>query</code> to retrieve documents from our vector store and display the number of retrieved documents and the source of these documents, which is the title of the article as displayed on Wikipedia. I will also print the first 350 characters of the article text from the document's metadata so you can read a bit of it.</p>

In [None]:
# Obtaining the source and documents searched
docs = chroma_database.similarity_search(query)
print(f'Query: {query}')
print(f'Retrieved documents: {len(docs)}')
for doc in docs:
    details = doc.to_json()['kwargs']
    print("\nSource (Article Title):", details['page_content'])
    print("\nText", details['metadata']['article'][:350] + ". . .")
    print('\n\n\n')

In [None]:
# Trying a different query
query = """What exchanges can I use to buy crypto?"""
docs = chroma_database.similarity_search(query)
print(f'Query: {query}')
print(f'Retrieved documents: {len(docs)}')
for doc in docs:
    details = doc.to_json()['kwargs']
    print("\nSource (Article Title):", details['page_content'])
    print("\nText", details['metadata']['article'][:350] + ". . .")
    print('\n\n\n')

<div id = 'conclusion'
     style="font-size: 42px; font-weight: bold; color: #353935; text-shadow: 1px 2px 3px rgba(0,0,0,0.1);">
        <b>Conclusion</b>
    <hr style="border: none;
               border-top: 1.25px solid #E0E0E0;
               width: 100%;
               margin-top: 20px;
               margin-bottom: 20px;
               margin-left: 0px;
               margin-right: auto;">
</div>

<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">In this notebook, we have explored the <b>Retrieval-Augmented Generation (RAG)</b> as a process for improving the outputs of LLMs by giving them access to external data through an indexed vectorized database for retrieval of information.</p>
<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">We have used the <a href ="https://www.kaggle.com/models/mistral-ai/mistral/frameworks/PyTorch/variations/7b-instruct-v0.1-hf/versions/1"><b>Mistral 7b model</b></a>, one of the most efficient open-source models of January 2024. For testing, we used the <a href ="https://www.kaggle.com/datasets/lusfernandotorres/wikipedia-crypto-articles"><b>Wikipedia Crypto Articles</b></a>, a dataset I have created by obtaining cryptocurrency-related articles from Wikipedia. </p>
<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">I hope this notebook was convenient in helping you develop a deeper understanding of what RAG is. If you liked the content here, feel free to leave your upvote and feedback. I always love to read your comments and your insights into the subject.</p>
<p style="font-family: 'Roboto', Arial, sans-serif; font-size: 20px; color: #353935;">Thank you very much!</p>

<hr style="border: 0;
           height: 1px;
           border-top: 0.85px;
           solid #b2b2b2">
           
<div style="text-align: left;
            color: #8d8d8d;
            padding-left: 15px;
            font-size: 14.25px;">
    Luis Fernando Torres, 2024<br><br>
    Let's connect!🔗<br>
    <a href="https://www.linkedin.com/in/luuisotorres/">LinkedIn</a> • <a href="https://medium.com/@luuisotorres">Medium</a> • <a href = "https://huggingface.co/luisotorres">Hugging Face</a><br><br>
</div>
<div style="text-align: center;
            margin-top: 50px;
            color: #8d8d8d;
            padding-left: 15px;
            font-size: 14.25px;"><b>Like my content? Feel free to <a href="https://www.buymeacoffee.com/luuisotorres">Buy Me a Coffee ☕</a></b>
</div>
<div style="text-align: center;
            margin-top: 80px;
            color: #8d8d8d;
            padding-left: 15px;
            font-size: 14.25px;"><b>  <a href = "https://luuisotorres.github.io/">https://luuisotorres.github.io/</a> </b>
</div>