In [None]:
from llama_index.core import VectorStoreIndex,SimpleDirectoryReader,ServiceContext
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.prompts.prompts import SimpleInputPrompt
from ctransformers import  AutoModelForCausalLM

In [None]:
documents=SimpleDirectoryReader("./docs").load_data()
documents

In [None]:
import json
with open('../prompts.json', 'r') as f:
    loader = json.load(f)
    system_prompt = loader['Default']
## Default format supportable by LLama2

#query_wrapper_prompt = SimpleInputPrompt("[INST] <<SYS>>{query_str}<</SYS>> \n {system_prompt}[/INST]")
query_wrapper_prompt=SimpleInputPrompt("<|USER|>{query_str}<|ASSISTANT|>")

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()

HuggingFace_Api = os.environ.get('HF_TOKEN')

In [None]:
import torch
print(torch.cuda.get_device_name())

llm = HuggingFaceLLM(
    context_window=4096,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.0, "do_sample": True},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name="../meta",
    model_name="../meta",
    device_map="cuda",
    # uncomment this if using CUDA to reduce memory usage
    model_kwargs={"torch_dtype": torch.float16,"load_in_8bit":True }
)

In [None]:
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.core import ServiceContext
from llama_index.embeddings.langchain import LangchainEmbedding

embed_model=LangchainEmbedding(
    HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2"))

In [None]:
service_context=ServiceContext.from_defaults(
    chunk_size=1024,
    llm=llm,
    embed_model=embed_model
)

In [None]:
service_context

In [None]:
index=VectorStoreIndex.from_documents(documents,service_context=service_context)
index

In [None]:
query_engine=index.as_query_engine()
response=query_engine.query("what is this PDF tells about?")
out = response
print(response)

In [None]:
gg = "This PDF provides an introduction to Python programming language, covering basic concepts, syntax, performance, and application areas. It also provides examples to illustrate the concepts and encourage students to learn more."

In [None]:
from elevenlabs.client import ElevenLabs
client = ElevenLabs(api_key=os.environ.get('ELEVENLABS_TOKEN'))

In [None]:
from elevenlabs import generate, play
audio = generate(
    text = out,
    voice = 'Rachel',
    model = 'eleven_multilingual_v2'
)

play(audio)