# LlamaIndex Familiarisation

### Imports and config

In [14]:
#import os
#os.environ["OPENAI_API_KEY"] = 'YOUR_API_KEY'

## Load data and build an index

In [15]:
# pip install llama-index 
from llama_index import VectorStoreIndex, SimpleDirectoryReader

documents = SimpleDirectoryReader("data").load_data()
index = VectorStoreIndex.from_documents(documents)

## Query your data <a id='query'></a>

In [20]:
query_engine = index.as_query_engine()
response = query_engine.query("What did the author do growing up?")
print(response)

DEBUG:openai._base_client:Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'post_parser': <function Embeddings.create.<locals>.parser at 0x0000022BE56BF740>, 'json_data': {'input': ['What did the author do growing up?'], 'model': <OpenAIEmbeddingModeModel.TEXT_EMBED_ADA_002: 'text-embedding-ada-002'>, 'encoding_format': 'base64'}}
Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'post_parser': <function Embeddings.create.<locals>.parser at 0x0000022BE56BF740>, 'json_data': {'input': ['What did the author do growing up?'], 'model': <OpenAIEmbeddingModeModel.TEXT_EMBED_ADA_002: 'text-embedding-ada-002'>, 'encoding_format': 'base64'}}
Request options: {'method': 'post', 'url': '/embeddings', 'files': None, 'post_parser': <function Embeddings.create.<locals>.parser at 0x0000022BE56BF740>, 'json_data': {'input': ['What did the author do growing up?'], 'model': <OpenAIEmbeddingModeModel.TEXT_EMBED_ADA_002: 'text-embedding-ada-002'>, 'encoding_f

## Viewing Queries and Events Using Logging

In [21]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

## Save Embedings to disk

In [22]:
index.storage_context.persist()

DEBUG:fsspec.local:open file: d:/MoroccanAI/Hackathon1/RAGs/storage/docstore.json
open file: d:/MoroccanAI/Hackathon1/RAGs/storage/docstore.json
open file: d:/MoroccanAI/Hackathon1/RAGs/storage/docstore.json
open file: d:/MoroccanAI/Hackathon1/RAGs/storage/docstore.json
DEBUG:fsspec.local:open file: d:/MoroccanAI/Hackathon1/RAGs/storage/index_store.json
open file: d:/MoroccanAI/Hackathon1/RAGs/storage/index_store.json
open file: d:/MoroccanAI/Hackathon1/RAGs/storage/index_store.json
open file: d:/MoroccanAI/Hackathon1/RAGs/storage/index_store.json
DEBUG:fsspec.local:open file: d:/MoroccanAI/Hackathon1/RAGs/storage/graph_store.json
open file: d:/MoroccanAI/Hackathon1/RAGs/storage/graph_store.json
open file: d:/MoroccanAI/Hackathon1/RAGs/storage/graph_store.json
open file: d:/MoroccanAI/Hackathon1/RAGs/storage/graph_store.json
DEBUG:fsspec.local:open file: d:/MoroccanAI/Hackathon1/RAGs/storage/default__vector_store.json
open file: d:/MoroccanAI/Hackathon1/RAGs/storage/default__vector_sto

## Modify the [Query](#query)

In [4]:
import os.path
from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    StorageContext,
    load_index_from_storage,
)

# check if storage already exists
if not os.path.exists("./storage"):
    # load the documents and create the index
    documents = SimpleDirectoryReader("data").load_data()
    index = VectorStoreIndex.from_documents(documents)
    # store it for later
    index.storage_context.persist()
else:
    # load the existing index
    storage_context = StorageContext.from_defaults(persist_dir="./storage")
    index = load_index_from_storage(storage_context)

# either way we can now query the index
query_engine = index.as_query_engine()
response = query_engine.query("Did i drink water ?")
print(response)

I cannot answer the query as there is no information provided in the context about whether or not you drank water.


# Customize your own solution

## ServiceContext

#### .from_defaults

chunk_size = X  ==> each chunk/node would have X bytes

In [2]:
from llama_index import ServiceContext

service_context = ServiceContext.from_defaults(chunk_size=1000)

print(service_context)

ServiceContext(llm_predictor=LLMPredictor(system_prompt=None, query_wrapper_prompt=None, pydantic_program_mode=<PydanticProgramMode.DEFAULT: 'default'>), prompt_helper=PromptHelper(context_window=4096, num_output=256, chunk_overlap_ratio=0.1, chunk_size_limit=None, separator=' '), embed_model=OpenAIEmbedding(model_name='text-embedding-ada-002', embed_batch_size=10, callback_manager=<llama_index.callbacks.base.CallbackManager object at 0x00000253B2704FD0>, additional_kwargs={}, api_key='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', api_base='https://api.openai.com/v1', api_version='', max_retries=10, timeout=60.0, default_headers=None, reuse_client=True), transformations=[SentenceSplitter(include_metadata=True, include_prev_next_rel=True, callback_manager=<llama_index.callbacks.base.CallbackManager object at 0x00000253B2704FD0>, chunk_size=1000, chunk_overlap=200, separator=' ', paragraph_separator='\n\n\n', secondary_chunking_regex='[^,.;。？！]+[,.;。？！]?')], llama_logger=<llama_i