In [1]:
%pip install -q --upgrade llama-index-llms-together llama-index llama-index-embeddings-jinaai llama-index-vector-stores-chroma docx2txt transformers torch "numpy<2" gTTS datasets sentencepiece backports.lzma pylzma


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


### What is Retrieval Augmented Generation (RAG)?
RAG is a technique that enhances language models by combining them with a retrieval system. It allows the model to access and utilize external knowledge when generating responses.

The process typically involves:
#### Indexing a large corpus of documents

In [None]:
import os
os.environ["TOGETHER_API_KEY"] = ""
os.environ["JINAAI_API_KEY"] = ""

### Call LLM

In [3]:
from llama_index.llms.together import TogetherLLM

llm = TogetherLLM(
    api_key=os.environ["TOGETHER_API_KEY"],
    model="meta-llama/Llama-Vision-Free",
)


llm_response = llm.complete("Tell me a joke")

print(llm_response)

  from .autonotebook import tqdm as notebook_tqdm


Here's one:

What do you call a fake noodle?

An impasta.


### LLM Messages

In [4]:
from llama_index.core.llms import ChatMessage

messages = [
    ChatMessage(
        role="system", content="You are a pirate with a colorful personality"
    ),
    ChatMessage(role="user", content="What is your name"),
]
resp = llm.chat(messages)
print(resp)

assistant: Me hearty! Me name be Captain Blackbeak Betty, the most feared and infamous pirate to ever sail the Seven Seas! Me be a swashbucklin' scallywag with a heart o' gold and a spirit o' adventure. Me ship, the "Maverick's Revenge", be me home, and me trusty cutlass, "Betsy", be me best mate.

Now, what be bringin' ye to these fair waters? Are ye lookin' to join me crew and sail the seas in search o' treasure and glory? Or be ye just lookin' for a bit o' pirate-y fun and games? Either way, I be happy to have ye aboard!


In [5]:
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
from IPython.display import Markdown, display
import chromadb

documents = SimpleDirectoryReader("./docs/").load_data()

print(len(documents))



2


## Splitter

In [6]:
from llama_index.core import PromptHelper
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import Settings

Settings.node_parser = SentenceSplitter(chunk_size=300, chunk_overlap=30)

### Embeddings

In [7]:
from llama_index.embeddings.jinaai import JinaEmbedding

embed_model = JinaEmbedding(
    api_key=os.environ["JINAAI_API_KEY"],
    model="jina-embeddings-v3",
)

## Set Settings

In [8]:
Settings.llm = llm
Settings.embed_model = embed_model
Settings.num_output = 512
Settings.context_window = 4096
Settings.chunk_overlap_ratio = 0.1

### Create and persist Chroma vector store

In [9]:
db = chromadb.PersistentClient(path="./chroma_db")
chroma_collection = db.get_or_create_collection("my_collection")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
    settings = Settings
    )

## Chat Engine English

In [10]:
query_engine = index.as_chat_engine()
response = query_engine.chat("When was TechNova Solutions founded?")
print(response)

TechNova Solutions was founded in 2015.


In [11]:
query_engine = index.as_chat_engine()
response = query_engine.chat("What is the website of TechNova Solutions?")
print(response)

The website of TechNova Solutions is www.technovasolutions.com.


## Voice RAG Chatbot

In [12]:
from transformers import pipeline
import torch

device = "cuda:0" if torch.cuda.is_available() else "cpu"
device

'cpu'

### Speech to text using microphone via ffmpeg and Whisper

In [13]:
from transformers.pipelines.audio_utils import ffmpeg_microphone_live
import sys

transcriber = pipeline(
    "automatic-speech-recognition", model="openai/whisper-base.en", device=device
)
def transcribe(chunk_length_s=5.0, stream_chunk_s=1.0):
    sampling_rate = transcriber.feature_extractor.sampling_rate

    mic = ffmpeg_microphone_live(
        sampling_rate=sampling_rate,
        chunk_length_s=chunk_length_s,
        stream_chunk_s=stream_chunk_s,
    )

    item = {"text": ""}  # Initialize item with a default value

    print("Start speaking...")
    for item in transcriber(mic, generate_kwargs={"max_new_tokens": 128}):
        sys.stdout.write("\033[K")
        #print(item["text"], end="\r")
        if not item["partial"][0]:
            break

    print("Question: ",item["text"])
    return item["text"]

In [20]:
transcribe()

Start speaking...




[K[K[K[K[KQuestion:   Hello, how are you?


' Hello, how are you?'

### Text to Speech using Microsoft open source model

In [15]:
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan

processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")

model = SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts").to(device)
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan").to(device)

In [16]:
from datasets import load_dataset

embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
speaker_embeddings = torch.tensor(embeddings_dataset[7306]["xvector"]).unsqueeze(0)

In [17]:
def synthesise(text):
    inputs = processor(text=text, return_tensors="pt")
    speech = model.generate_speech(
        inputs["input_ids"].to(device), speaker_embeddings.to(device), vocoder=vocoder
    )
    return speech.cpu()

In [18]:
from IPython.display import Audio

audio = synthesise(
    "Hello! how are you?"
)

Audio(audio, rate=16000)

#### Question Answering using both Transcribe and Synthesise Functions.

In [None]:
query_engine = index.as_chat_engine()
response = query_engine.chat(transcribe())
print("Answer :",response)
audio = synthesise(response.response)
Audio(audio, rate=16000)

Start speaking...




[K[K[K[K[KQuestion:   When was technova solutions founded?
Answer : Technova Solutions was founded in 2015.


In [30]:
query_engine = index.as_chat_engine()
response = query_engine.chat(transcribe())
print("Answer :",response)
audio = synthesise(response.response)
Audio(audio, rate=16000)

Start speaking...




[K[K[K[K[KQuestion:   What is the website of Technova Solutions?
Answer : The website of Technova Solutions is www.technovasolutions.com.
