## Libraries

In [1]:
from llama_index.llms.llama_cpp import LlamaCPP
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core.response.notebook_utils import display_source_node
from llama_index.core.retrievers import RecursiveRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core import PromptTemplate

from llama_index.core.callbacks import (
    CallbackManager,
    LlamaDebugHandler,
)

KeyboardInterrupt: 

In [2]:
import nest_asyncio
nest_asyncio.apply()

In [3]:
#import llama_index.core
#llama_index.core.set_global_handler("simple")

Loading LLM

In [5]:

#from llama_index.core import set_global_tokenizer
#from transformers import AutoTokenizer

#set_global_tokenizer(
    #AutoTokenizer.from_pretrained("mistralai/mistral-7b-instruct-v0.2").encode
#)

In [3]:

llm = LlamaCPP(
    model_path="model\stablelm-zephyr-3b.Q5_K_M.gguf",
    temperature=0.5,
    max_new_tokens=1024,
    context_window=4096,
    #generate_kwargs={"stop":['<|endoftext|>']},
    model_kwargs={"n_gpu_layers": -1},  # if compiled to use GPU
    verbose=True,
)

llama_model_loader: loaded meta data with 21 key-value pairs and 356 tensors from model\stablelm-zephyr-3b.Q5_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = stablelm
llama_model_loader: - kv   1:                               general.name str              = source
llama_model_loader: - kv   2:                    stablelm.context_length u32              = 4096
llama_model_loader: - kv   3:                  stablelm.embedding_length u32              = 2560
llama_model_loader: - kv   4:                       stablelm.block_count u32              = 32
llama_model_loader: - kv   5:               stablelm.feed_forward_length u32              = 6912
llama_model_loader: - kv   6:              stablelm.rope.dimension_count u32              = 20
llama_model_loader: - kv   7:              stablelm.attention.head_count u3

In [4]:
llama_debug = LlamaDebugHandler(print_trace_on_end=True)
callback_manager = CallbackManager([llama_debug])

In [5]:
llm.complete("What is the Capital of Malaysia?")


llama_print_timings:        load time =    2236.79 ms
llama_print_timings:      sample time =     137.09 ms /   395 runs   (    0.35 ms per token,  2881.26 tokens per second)
llama_print_timings: prompt eval time =    2236.71 ms /     8 tokens (  279.59 ms per token,     3.58 tokens per second)
llama_print_timings:        eval time =    7523.06 ms /   394 runs   (   19.09 ms per token,    52.37 tokens per second)
llama_print_timings:       total time =   12017.82 ms /   402 tokens


CompletionResponse(text="\nThe capital city of Malaysia is known as Kuala Lumpur.\nKuala Lumpur, often referred to by its abbreviation KL or KLMP, is a culturally rich and historically significant metropolis situated at the center of Malaysia's political landscape. The city is renowned for its unique blend of traditional Malay culture and modern influences, making it an interesting destination for tourists from all over the world.\nEstablished in 1874 as a British trading colony, Kuala Lumpur has evolved into a bustling urban center with a population of over 5 million people. The city is divided into two main parts by the legendary River of Life (Sultan Salahuddin Abdul Aziz Road), known locally as Jalan Tun Razak.\nOne side of the city, known as the Old Town or Medan Merdeka, features historic landmarks such as the Sultan's Palace, the Islamic Museum, and the Hindu Temple. Meanwhile, the other half of Kuala Lumpur, referred to as the New Town or Damansara, is characterized by modern s

In [6]:
Settings.llm = llm

## Creating Ingestion Pipelne

Loading Local Embedding Model

In [7]:
embed_model = HuggingFaceEmbedding(
    model_name="Snowflake/snowflake-arctic-embed-s"
)
Settings.embed_model = embed_model

You try to use a model that was created with version 2.7.0.dev0, however, your version is 2.7.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.





Loading Documents

In [9]:
documents = SimpleDirectoryReader(input_dir='papers').load_data()

In [10]:
import chromadb
from llama_index.core import Document
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core import StorageContext

# save/load to disk
db = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db.get_or_create_collection("CEM5011")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)


In [11]:
from llama_index.core.node_parser import SentenceSplitter

pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=1000, chunk_overlap=200),
    ],
    vector_store=vector_store,
)


In [12]:
pipeline.run(documents=documents)

[TextNode(id_='8f54ccbc-6a4d-47d3-afe0-8ba075b4e280', embedding=None, metadata={'page_label': 'C1', 'file_name': 'Bianchi, David W - Blue chip kids_ what every child (and parent) should know about money, investing, and the stock market-John Wiley & Sons (2015) (1).pdf', 'file_path': 'e:\\Github Repositories\\RAGTests\\papers\\Bianchi, David W - Blue chip kids_ what every child (and parent) should know about money, investing, and the stock market-John Wiley & Sons (2015) (1).pdf', 'file_type': 'application/pdf', 'file_size': 1926480, 'creation_date': '2024-07-22', 'last_modified_date': '2024-07-01'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='7e071113-b787-466f-b85f-a3db8da6fb29', node_type=<ObjectType.

In [13]:
from llama_index.postprocessor.flag_embedding_reranker import (
    FlagEmbeddingReranker,
)

rerank = FlagEmbeddingReranker(model="mixedbread-ai/mxbai-embed-large-v1", top_n=5)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at mixedbread-ai/mxbai-embed-large-v1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [14]:
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context,callback_manager=callback_manager,similarity_top_k=10, node_postprocessors=[rerank]
)

**********
Trace: index_construction
**********


In [15]:
base_retriever = index.as_retriever(similarity_top_k=5)
retrievals = base_retriever.retrieve(
    "What is the book about?"
)

**********
Trace: query
    |_retrieve -> 0.242055 seconds
**********


In [16]:
for n in retrievals:
    display_source_node(n, source_length=1500)

**Node ID:** 41ce82a6-a439-41c7-99a3-a2952b501503<br>**Similarity:** 0.7228255480621316<br>**Text:** The authors also wish to thank all th e reviewers for their kind 
endorsements.  
CCR wishes to thank all  the fo llowing colleagues for their insights, 
ideas and contributions to this writing and to CCR’s work in general  
(alphabetically by last name):  
John Abele, the Alfond family (Barbara, Bill, Justin, Kat, Ted), Lee  
Batchelor, Michele Bruniges, Annamaria Diniz, P at Farenga, Eron Gjoni, 
Brendan Griffen, Danny Hillis, Jim Koshland, Siva Kumari, Rose Luckin, 
Leticia  Lyle, Rick Miller, Henri Moser, Attilio Oliva, Greg Powell, Robert 
Randall, Todd Rose, Cathy and  Harry Rubin, Courtney Sale -Ross, Bror 
Saxberg, Andreas Sc hleicher,  Morgan Silver -Greenberg, Ray Stata, and all 
the foundations that support  us. 
With sincere gratitude to all external sources; their 
contribution is used for nonprofit education work 
under the Fair Use doctrine of copyright laws.<br>

**Node ID:** 3f340c77-43b5-47ee-b50e-9ce94bef3c5c<br>**Similarity:** 0.6785080352889851<br>**Text:** AI in Education —A Tentative Summary  164 
The Social Consequences of AI in Education  169 
Appendix 1  181 
Connections Between Topics and Concepts  181 
Evolution of Content  182 
Cross -Cutting Themes  185 
Appendix 2  193 
What is AI?  193 
AI Today  202 
AI Techniques  207 
AI Techniques and Terminology  210 
About CCR  224 
Redesigning Education Standards  224 
About the Authors  227<br>

**Node ID:** 5817c06f-dca9-4df4-9ead-f929ac4fe0b8<br>**Similarity:** 0.6773226788573453<br>**Text:** Introduction: The Context  
Artificial intelligence (AI) is arguably  the driving technological force  of 
the first half of this century, and will transform virtually every industry, if 
not human endeavors at large.1 Businesses and governments worldwide 
are pouring enormous sums of money into a very wide array of 
implementations, and dozens of start -ups ar e being funded to the tune of 
billions of dollars . 
Funding of AI startup companies worldwide, from 2013 to 2017  
(in million s of U.S. dollars). Source : Statista2 
It would be naive to think that AI will not have an impact on 
education —au contraire , the possibilities there are profound yet , for the 
time being, overhyped as well.  This book attempt s to provide the right 
balance between reality and hype (per the Gartne r diagram that follows ), 
between true potential and wild extrapolations. Every new  technology 
undergoes a period of intense growth of reputation and expectations, 
followed by a precipitous fall when it inevitably fails to live up to the 
expectations, after  which there is a slower growth as the technology is 
developed and integrated into our lives.  As visualized in the Gartner 
diagram, each technology can be said to reside somewhere on the curve 
                                                 
1 Possibly matched only by biotechnology.  
2 https://www.statista. com/statistics/621468/worldwide -artificial -intelligence -startup -company -funding -by-
year<br>

## Retrieval

In [17]:
query_engine = index.as_query_engine(streaming=True,similarity_top_k=7)

Setting Prompts


In [18]:
qa_prompt_template_str = """<|user|>Context information is below.
---------------------
{context_str}
---------------------
Given the context information and not prior knowledge, answer the query based on the context given ONLY else you will be penalized. Provide your answer in an easy to read and understand format.
Further elaborate your answer by finding examples or information within the context if possible.
Query: {query_str}
Answer:
<|endoftext|>
<|assistant|>
"""

qa_prompt_template = PromptTemplate(qa_prompt_template_str)
query_engine.update_prompts(
    {"response_synthesizer:text_qa_template":qa_prompt_template}
)

In [28]:
from IPython.display import Markdown, display
response = query_engine.query("When was the first known case of COVID-19 discovered, give me the exact date and time?")
display(Markdown(f"<b>{response.print_response_stream()}</b>"))

Llama.generate: prefix-match hit


The first known case of COVID-19, also referred to as SARS-CoV-2, is believed to have been discovered in late December 2019. The exact date varies depending on the source, but it generally ranges between late December 2019 and early January 2020. A detailed timeline of the discovery can be found from various sources:

1. World Health Organization (WHO): According to the WHO, the virus was first identified on December 31, 2019, in Wuhan, China. The organization stated that this was based on samples collected from a group of people presenting with pneumonia.
2. Centers for Disease Control and Prevention (CDC): The CDC also confirmed the first case of COVID-19 in the United States on January


llama_print_timings:        load time =    1842.46 ms
llama_print_timings:      sample time =      46.87 ms /   146 runs   (    0.32 ms per token,  3114.80 tokens per second)
llama_print_timings: prompt eval time =    4978.05 ms /  2954 tokens (    1.69 ms per token,   593.40 tokens per second)
llama_print_timings:        eval time =    5055.08 ms /   145 runs   (   34.86 ms per token,    28.68 tokens per second)
llama_print_timings:       total time =   10926.38 ms /  3099 tokens


<b>None</b>

Evaluation

In [2]:
from llama_index.core.llama_dataset import download_llama_dataset
from llama_index.core.llama_pack import download_llama_pack
from llama_index.core import VectorStoreIndex

# download and install dependencies for benchmark dataset
rag_dataset, documents = download_llama_dataset(
    "EvaluatingLlmSurveyPaperDataset", "./data"
)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import pandas as pd
df = rag_dataset.to_pandas()
df.to_csv('rag_ds.csv')

In [41]:
prediction_dataset = await rag_dataset.amake_predictions_with(
    predictor=query_engine, batch_size=100, show_progress=True
)

Batch processing of predictions:   0%|          | 0/100 [00:00<?, ?it/s]Llama.generate: prefix-match hit

llama_print_timings:        load time =    1842.46 ms
llama_print_timings:      sample time =      27.95 ms /    84 runs   (    0.33 ms per token,  3005.15 tokens per second)
llama_print_timings: prompt eval time =    7116.51 ms /  3425 tokens (    2.08 ms per token,   481.28 tokens per second)
llama_print_timings:        eval time =    3071.47 ms /    83 runs   (   37.01 ms per token,    27.02 tokens per second)
llama_print_timings:       total time =   10648.16 ms /  3508 tokens
Llama.generate: prefix-match hit

llama_print_timings:        load time =    1842.46 ms
llama_print_timings:      sample time =      61.25 ms /   196 runs   (    0.31 ms per token,  3200.05 tokens per second)
llama_print_timings: prompt eval time =    6369.16 ms /  3307 tokens (    1.93 ms per token,   519.22 tokens per second)
llama_print_timings:        eval time =    7179.63 ms /   195 runs   (   36.82