In [1]:
from llama_index.core import VectorStoreIndex
from llama_index.core.node_parser import SentenceSplitter
from llama_index.vector_stores.postgres import PGVectorStore
from llama_index.core import SimpleDirectoryReader
from llama_index.core.storage import StorageContext

In [2]:
docs = SimpleDirectoryReader("../data").load_data()
# Text splitter + node parser
splitter = SentenceSplitter(chunk_size=512, chunk_overlap=100)
nodes = splitter.get_nodes_from_documents(
    docs, show_progress=False
)

In [5]:
type(docs[0])

llama_index.core.schema.Document

In [2]:
vector_store = PGVectorStore.from_params(
    database="pgvector_db",
    host="localhost",
    password="password",
    port=5432,
    user="myuser",
    embed_dim=768,
)

In [4]:
from llama_index.embeddings.ollama import OllamaEmbedding
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(
        nodes,
        embed_model=OllamaEmbedding(model_name="nomic-embed-text"),
        # vector_store=vector_store,
        storage_context=storage_context
    )

In [5]:
from llama_index.embeddings.ollama import OllamaEmbedding
storage_context = StorageContext.from_defaults(vector_store=vector_store)
retriever = VectorStoreIndex.from_vector_store(
        vector_store=vector_store,
        storage_context=storage_context,
        embed_model=OllamaEmbedding(model_name="nomic-embed-text"),
    ).as_retriever(similarity_top_k = 2)

In [6]:
from llama_index.llms.ollama import Ollama
llm = Ollama(model="mistral:7b")

In [7]:
from llama_index.core.tools import RetrieverTool
retriever_tool = RetrieverTool.from_defaults(
    retriever=retriever,
    description="Tool for retrieving relevant documents",
    name="CompanyDocsRetriever"
)

In [8]:
from llama_index.core.retrievers import RouterRetriever
router_retriever = RouterRetriever.from_defaults(
    llm=llm,
    select_multi = True,
    retriever_tools = [retriever_tool])

In [9]:
from llama_index.core.chat_engine.condense_plus_context import CondensePlusContextChatEngine

import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
from configuration.nodeprocessor import DefaultNodePostProcessor

chat_engine = CondensePlusContextChatEngine.from_defaults(
        retriever=router_retriever,
        llm=llm,
        chat_history=[],
        system_prompt="You are a helpful assistant answering questions based on company documents.",
        # node_postprocessors=[DefaultNodePostProcessor()]
    )

In [10]:
response = chat_engine.stream_chat("Tell me about the deep singer paper")

In [11]:
accumulated_response = ""
for res in response.response_gen:
    accumulated_response += res

In [12]:
accumulated_response

' The "DeepSinger: Singing Voice Synthesis with Data Mined From the Web" is a research paper presented at the 26th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD ’20) in 2020.\n\nThe primary focus of this paper is Singing Voice Synthesis (SVS), which is a technology that generates singing voices from lyrics. This field has gained significant attention due to the advancements in deep neural networks. The authors compare SVS to Text-to-Speech (TTS) systems, stating that while TTS allows machines to speak, SVS enables them to sing as well. They mention that singing voices have more complex prosody than speaking voices, making SVS more challenging than TTS.\n\nThe paper discusses previous works on SVS, including lyrics-to-singing alignment, parametric synthesis, acoustic modeling, and adversarial synthesis. However, these systems often require high-quality singing recordings as training data and strict alignments between lyrics and singing audio for accurate singing mode