In [None]:
!docker run --name redis -d -p 6379:6379 redis


In [None]:
!docker ps  


In [None]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

In [None]:
%pip install llama-index-storage-docstore-redis
%pip install llama-index-storage-index-store-redis
%pip install llama-index-llms-openai
!pip install llama-index


In [None]:
from llama_index.core import SimpleDirectoryReader

# documents = SimpleDirectoryReader(input_files=['../data/paul_graham_essay3.txt']).load_data()
documents = SimpleDirectoryReader(input_files=['../data/2022 Q3 AAPL.pdf']).load_data()




In [None]:
from llama_index.core.node_parser import SentenceSplitter
nodes = SentenceSplitter().get_nodes_from_documents(documents)


In [None]:
import os
from llama_index.storage.docstore.redis import RedisDocumentStore
from llama_index.storage.index_store.redis import RedisIndexStore
from llama_index.core import StorageContext

REDIS_HOST = os.getenv("REDIS_HOST", "127.0.0.1")
REDIS_PORT = os.getenv("REDIS_PORT", 6379)

storage_context = StorageContext.from_defaults(
    docstore=RedisDocumentStore.from_host_and_port(
        host=REDIS_HOST, port=REDIS_PORT, namespace="llama_index"
    ),
    index_store=RedisIndexStore.from_host_and_port(
        host=REDIS_HOST, port=REDIS_PORT, namespace="llama_index"
    ),
)


In [None]:
storage_context.docstore.add_documents(nodes)
print(f"Total nodes in Redis DocumentStore: {len(storage_context.docstore.docs)}")


In [None]:
#os.environ["OPENAI_API_KEY"] = getpass.getpass("open ai api key: ")
from llama_index.core import Settings
from llama_index.llms.ollama import  Ollama
Settings.llm = Ollama(model='llama3.2:latest', base_url='http://localhost:11434',temperature=0.1)
Settings.chunk_size = 1024



In [None]:
from llama_index.embeddings.ollama import OllamaEmbedding

ollama_embedding = OllamaEmbedding(
    model_name="nomic-embed-text:latest",  # Replace with your desired model
    base_url="http://localhost:11434",  # Ensure Ollama is running at this endpoint
    ollama_additional_kwargs={"mirostat": 0}, #Mirostat is a technique for controlling perplexity and balancing the text generation process in large language models (LLMs).
    embed_batch_size=1024,
    num_workers=1,
) 

In [None]:
from llama_index.core import SummaryIndex
summary_index = SummaryIndex(nodes, storage_context=storage_context)


In [None]:
from llama_index.core import VectorStoreIndex
vector_index = VectorStoreIndex(nodes, storage_context=storage_context, embed_model=ollama_embedding)


In [None]:
from llama_index.core import SimpleKeywordTableIndex
keyword_table_index = SimpleKeywordTableIndex(nodes, storage_context=storage_context)


In [None]:
storage_context.persist(persist_dir="./redis_storage")


In [None]:
list_id = summary_index.index_id
vector_id = vector_index.index_id
keyword_id = keyword_table_index.index_id


In [None]:
from llama_index.core import load_index_from_storage

storage_context = StorageContext.from_defaults(
    docstore=RedisDocumentStore.from_host_and_port(
        host=REDIS_HOST, port=REDIS_PORT, namespace="llama_index"
    ),
    index_store=RedisIndexStore.from_host_and_port(
        host=REDIS_HOST, port=REDIS_PORT, namespace="llama_index"
    ),
)

# Load indices
summary_index_loaded = load_index_from_storage(storage_context=storage_context, index_id=list_id)
vector_index_loaded = load_index_from_storage(storage_context=storage_context, index_id=vector_id, embed_model=ollama_embedding)
keyword_table_index_loaded = load_index_from_storage(storage_context=storage_context, index_id=keyword_id)


In [None]:
query_engine = summary_index_loaded.as_query_engine()
summary_index_response = query_engine.query("What is a summary of this document?")
print(summary_index_response)


In [None]:

query_engine = vector_index_loaded.as_query_engine()
vector_index_response = query_engine.query("What this document is about?")
print(vector_index_response)

In [None]:
query_engine = keyword_table_index_loaded.as_query_engine()
keyword_table_response = query_engine.query("Which company result document is this?")
print(keyword_table_response)
