In [36]:
import nest_asyncio

nest_asyncio.apply()

from llama_index.core.evaluation import generate_question_context_pairs
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.core.node_parser import SimpleNodeParser
from llama_index.core.evaluation import generate_question_context_pairs
from llama_index.core.evaluation import RetrieverEvaluator
from llama_index.llms.openai import OpenAI

import os
import pandas as pd


In [37]:

from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.llms.llama_cpp.llama_utils import (
    messages_to_prompt,
    completion_to_prompt,
)

In [38]:
model_url = 'Meta-Llama-3-8B-Instruct.Q4_0.gguf'

In [58]:
llm = LlamaCPP(
    # You can pass in the URL to a GGML model to download it automatically
    model_url=model_url,
    # optionally, you can set the path to a pre-downloaded model instead of model_url
    model_path=None,
    temperature=0.1,
    max_new_tokens=256,
    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room
    context_window=7000,
    # kwargs to pass to __call__()
    generate_kwargs={},
    # kwargs to pass to __init__()
    # set to at least 1 to use GPU
    model_kwargs={"n_gpu_layers": -1},
    # transform inputs into Llama2 format
    messages_to_prompt=messages_to_prompt,
    completion_to_prompt=completion_to_prompt,
    verbose=True,
)

llama_model_loader: loaded meta data with 23 key-value pairs and 291 tensors from /Users/michieldekoninck/Library/Caches/llama_index/models/Meta-Llama-3-8B-Instruct.Q4_0.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = dl
llama_model_loader: - kv   2:                           llama.vocab_size u32              = 128256
llama_model_loader: - kv   3:                       llama.context_length u32              = 8192
llama_model_loader: - kv   4:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   5:                          llama.block_count u32              = 32
llama_model_loader: - kv   6:                  llama.feed_forward_length u32              = 14336
llama_model_loader: -

In [59]:
response = llm.complete("Hello! Can you tell me a poem about cats and dogs?")
print(response.text)


llama_print_timings:        load time =    6548.08 ms
llama_print_timings:      sample time =      25.62 ms /   256 runs   (    0.10 ms per token,  9990.24 tokens per second)
llama_print_timings: prompt eval time =    6547.95 ms /    76 tokens (   86.16 ms per token,    11.61 tokens per second)
llama_print_timings:        eval time =    8269.06 ms /   255 runs   (   32.43 ms per token,    30.84 tokens per second)
llama_print_timings:       total time =   15134.13 ms /   331 tokens


  <<SYS>>  [INST]  You can find many poems about cats and dogs online. Here is one example: "Cats and dogs, they're quite a pair, / With fur so soft and eyes so fair. / They chase and play and snuggle tight, / And bring us joy with their antics bright. / So here's to our feline and canine friends, / Who bring us laughter that never ends."  [INST]  <<SYS>>  [INST]  Would you like me to find another one?  [/INST]  <<SYS>>  [INST]  <<SYS>>  [/INST]  <<SYS>>  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/


In [60]:
from typing import List
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.schema import BaseNode, TransformComponent
from llama_index.vector_stores.faiss import FaissVectorStore
from llama_index.core.text_splitter import SentenceSplitter
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import Settings
from langchain_community.llms import Ollama
import faiss
import os
import sys
from dotenv import load_dotenv

In [61]:
EMBED_DIMENSION = 512

# llamaindex ==> length of the tokens
CHUNK_SIZE = 200
CHUNK_OVERLAP = 50


In [62]:
path = "../data/"
node_parser = SimpleDirectoryReader(input_dir=path, required_exts=['.pdf'])
documents = node_parser.load_data()

In [63]:
# Create FaisVectorStore to store embeddings
faiss_index = faiss.IndexFlatL2(EMBED_DIMENSION)
vector_store = FaissVectorStore(faiss_index=faiss_index)

In [64]:
class TextCleaner(TransformComponent):
    """
    Transformation to be used within the ingestion pipeline.
    Cleans clutters from texts.
    """
    def __call__(self, nodes, **kwargs) -> List[BaseNode]:
        
        for node in nodes:
            node.text = node.text.replace('\t', ' ') # Replace tabs with spaces
            node.text = node.text.replace(' \n', ' ') # Replace paragraph seperator with spacaes
            
        return nodes

In [65]:
text_splitter = SentenceSplitter(chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)

# Create a pipeline with defined document transformations and vectorstore
pipeline = IngestionPipeline(
    transformations=[
        TextCleaner(),
        text_splitter,
    ],
    vector_store=vector_store, 
)

In [66]:
# Run pipeline and get generated nodes from the process
nodes = pipeline.run(documents=documents)

In [67]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)


In [68]:
from llama_index.core import VectorStoreIndex

vector_store_index = VectorStoreIndex(nodes, embed_model=Settings.embed_model)
retriever = vector_store_index.as_retriever(similarity_top_k=2)

In [69]:
Settings.llm = llm

from llama_index.core import VectorStoreIndex
from llama_index.llms.openai import OpenAI
from llama_index.core.evaluation import FaithfulnessEvaluator

#define evaluator
evaluator = FaithfulnessEvaluator(llm=Settings.llm)

# query index
query_engine = vector_store_index.as_query_engine(llm = Settings.llm)
response = query_engine.query(
    "What is the main cause of climate change?"
)

Llama.generate: 59 prefix-match hit, remaining 448 prompt tokens to eval

llama_print_timings:        load time =    6548.08 ms
llama_print_timings:      sample time =      34.64 ms /   256 runs   (    0.14 ms per token,  7390.73 tokens per second)
llama_print_timings: prompt eval time =    4072.18 ms /   448 tokens (    9.09 ms per token,   110.01 tokens per second)
llama_print_timings:        eval time =    8863.60 ms /   255 runs   (   34.76 ms per token,    28.77 tokens per second)
llama_print_timings:       total time =   13318.69 ms /   703 tokens


In [70]:
eval_result = evaluator.evaluate_response(response=response)
print(str(eval_result.passing))

Llama.generate: 59 prefix-match hit, remaining 823 prompt tokens to eval

llama_print_timings:        load time =    6548.08 ms
llama_print_timings:      sample time =      44.44 ms /   256 runs   (    0.17 ms per token,  5761.22 tokens per second)
llama_print_timings: prompt eval time =    3259.72 ms /   823 tokens (    3.96 ms per token,   252.48 tokens per second)
llama_print_timings:        eval time =    9168.94 ms /   255 runs   (   35.96 ms per token,    27.81 tokens per second)
llama_print_timings:       total time =   12977.78 ms /  1078 tokens


True


In [72]:
response

Response(response='  <<SYS>>  The main cause of climate change is the increase in greenhouse gases in the atmosphere.  <</SYS>>  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/INST]  [/', source_nodes=[NodeWithScore(node=TextNode(id_='1d6c03eb-f9cc-4258-af67-6f2e4f6e19ac', embedding=None, metadata={'page_label': '1', 'file_name': 'Understanding_Climate_Change.pdf', 'file_path': '/Users/michieldekoninck/code/Michiel-DK/rag_techniques/notebooks/../data/Understanding_Climate_Change.pdf', 'file_type': 'application/pdf', 'file_size': 206372