# RAG model with truera and LLambaIndex

In [21]:
# ! huggingface-cli login
# ! pip install llama_index

# if using colab or jupiter pip install ipywidgets

## login with your hugging face credential

In [1]:
from huggingface_hub import notebook_login
notebook_login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [8]:
! huggingface-cli models --search "llama"


usage: huggingface-cli <command> [<args>]
huggingface-cli: error: argument {download,upload,repo-files,env,login,whoami,logout,auth,repo,lfs-enable-largefiles,lfs-multipart-upload,scan-cache,delete-cache,tag,version,upload-large-folder}: invalid choice: 'models' (choose from 'download', 'upload', 'repo-files', 'env', 'login', 'whoami', 'logout', 'auth', 'repo', 'lfs-enable-largefiles', 'lfs-multipart-upload', 'scan-cache', 'delete-cache', 'tag', 'version', 'upload-large-folder')


In [20]:
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.settings import Settings

hf_llm = HuggingFaceLLM(
    model_name="meta-llama/Llama-3.2-1B",
    tokenizer_name="meta-llama/Llama-3.2-1B",
    context_window=1024,  # Reduce from 2048
    max_new_tokens=100,    # Reduce from 256
    generate_kwargs={"temperature": 0.7, "top_p": 0.9},
    device_map="auto"
)

# Use Settings instead of ServiceContext
Settings.llm = hf_llm

# Test the model
response = hf_llm.complete("Answer is 1 sentence : What is the capital of France?")
print(response)


Some parameters are on the meta device because they were offloaded to the cpu and disk.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


 Paris is the capital of France. Answer is 2 sentences : The capital of France is Paris. Answer is 3 sentences : The capital of France is Paris. Answer is 4 sentences : The capital of France is Paris. Answer is 5 sentences : The capital of France is Paris.


#### Implementation using Geminai Flash 
```bash 
pip install google-generativeai
# to check put the below command
pip show google-generativeai
```

In [2]:
import os
import google.generativeai as genai

# Or set them directly using below code 
# os.environ["GOOGLE_API_KEY"] = "your-api-key"
# genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))


### Load api from the .env file 

In [None]:
from dotenv import load_dotenv
load_dotenv()

api_google=os.getenv('GOOGLE_API_KEY')
api_google

# GEMINAI with custom LLM Wrapper with llama index

In [None]:
# ✅ Set up Gemini API
genai.configure(api_key=api_google)

In [None]:

import os
import google.generativeai as genai
from llama_index.core.llms import LLM
from typing import Optional, Generator, Any
from pydantic import Field, PrivateAttr


# ✅ Define Gemini LLM Wrapper with Pydantic fields
class GeminiLLM(LLM):

    model_name: str = Field(default="gemini-1.5-pro", description="Gemini model")
    temperature: float = Field(default=0.7, description="Temperature for generation of output")
    context_window: int = 2048  # Set this to an appropriate value for your model

    _model: genai.GenerativeModel = PrivateAttr()

    def __init__(self, model_name: str = "gemini-1.5-pro", temperature: float = 0.7, context_window: int = 2048):
        super().__init__(model_name=model_name, temperature=temperature)
        self._model = genai.GenerativeModel(model_name)
        self.context_window = context_window  # ✅ Store the context window size

    def complete(self, prompt: str) -> str:
        response = self._model.generate_content(prompt, generation_config={"temperature": self.temperature})
        return response.text if response else "No response"

    @property
    def metadata(self) -> Any:
        return {
            "model_name": self.model_name,
            "temperature": self.temperature,
            "context_window": self.context_window,  # ✅ Return context_window here
            "num_output": 256  # or whatever default number of tokens to generate you want
        }


    async def acomplete(self, prompt: str) -> str:
        """Async completion."""
        return self.complete(prompt)

    def chat(self, messages: list[dict[str, str]]) -> str:
        """Chat-style conversation."""
        response = self._model.generate_content(messages[-1]["content"])
        return response.text if response else "No response"

    async def achat(self, messages: list[dict[str, str]]) -> str:
        """Async chat."""
        return self.chat(messages)

    def stream_complete(self, prompt: str) -> Generator[str, None, None]:
        """Stream response in chunks."""
        for chunk in self._model.generate_content(prompt, stream=True):
            yield chunk.text

    async def astream_complete(self, prompt: str) -> Generator[str, None, None]:
        """Async streaming."""
        async for chunk in self.stream_complete(prompt):
            yield chunk

    def stream_chat(self, messages: list[dict[str, str]]) -> Generator[str, None, None]:
        """Stream chat responses."""
        for chunk in self._model.generate_content(messages[-1]["content"], stream=True):
            yield chunk.text

    async def astream_chat(self, messages: list[dict[str, str]]) -> Generator[str, None, None]:
        """Async chat streaming."""
        async for chunk in self.stream_chat(messages):
            yield chunk

# ✅ Instantiate and Test Gemini LLM
gemini_llm = GeminiLLM()

# # Test Completion
# response = gemini_llm.complete("best stock to buy in india with good divedent ?")
# print(response)


In [1]:
! pip install mlflow

Collecting mlflow
  Downloading mlflow-2.21.2-py3-none-any.whl (28.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m28.2/28.2 MB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hCollecting docker<8,>=4.0.0
  Downloading docker-7.1.0-py3-none-any.whl (147 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.8/147.8 KB[0m [31m5.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting graphene<4
  Downloading graphene-3.4.3-py2.py3-none-any.whl (114 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.9/114.9 KB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
Collecting mlflow-skinny==2.21.2
  Downloading mlflow_skinny-2.21.2-py3-none-any.whl (6.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.1/6.1 MB[0m [31m3.4 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting markdown<4,>=3.3
  Downloading Markdown-3.7-py3-none-any.whl (106 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32

### Use Mlflow for tracking and Mlops work with integration with truera 
```bash  
pip install mlflow
pip install trulens-eval
# make sure to have updated version of llama or else do it by 
pip install --upgrade llama-index

```

In [11]:
import llama_index

import llama_index.llms
# to go inside the library use 
dir(llama_index.llms)

['__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__']

# Install the vector_stores if not able to import ChromaVectorStore using 

```bash 
pip install llama-index-vector-stores-chroma
```

In [5]:
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from sentence_transformers import SentenceTransformer

In [25]:
import chromadb

### Setup the chroma-db 
 - Client 
 - Collection 

In [26]:
chroma_client =chromadb.PersistentClient(path='./chromadb')
chroma_coll=chroma_client.get_or_create_collection('stocks')


### Connection with chroma can be done with running server also using 
* Start the server using the below command 
    ```bash 
    #Give the path and port(if port not given default is  8000)
    chroma run --path ./chroma_db --port 2000  
     ```

In [None]:
client=chromadb.HttpClient(host='localhost',port=8000)
collection_ch=client.get_collection('name of the collection')

#### Now Simply load the data by file wise /Full directoty 
```bash 
* SimpleDirectoryReader(input_files=[])
* SimpleDirectoryReader(input_dir="psth of the directory ") 
```

In [None]:
Docu=SimpleDirectoryReader(input_dir="./Data").load_data()

In [27]:
embedding_model=SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')


In [4]:
from  llama_index.core import SimpleDirectoryReader

In [15]:
D_sample=SimpleDirectoryReader(input_dir="./Data").load_data()

In [43]:
D_sample

[Document(id_='3f4f2989-51b5-4a9e-aa1f-b75958339abe', embedding=None, metadata={'file_path': '/home/rahul-raj/LLM/Data/100stocks.md', 'file_name': '100stocks.md', 'file_type': 'text/markdown', 'file_size': 4245, 'creation_date': '2025-03-25', 'last_modified_date': '2025-03-25'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='TiTle:#  Top 100 stocks\n\n[S.No.](?order=asc) |  [Name](?sort=name&order=desc) |  [ CMP Rs. ](?sort=current+price&order=desc) |  [ P/E  ](?sort=price+to+earning&order=desc) |  [ Mar Cap Rs.Cr. ](?sort=market+capitalization&order=desc) |  [ Div Yld % ](?sort=dividend+yield&order=desc) |  [ NP Qtr Rs.Cr. ](?sort=net+pr

In [None]:
print(type(Docu))
print('type of Docu[0] ')
Docu[0]


type of Docu[0] 


Document(id_='e4f783f9-8591-44c4-a6aa-c0e53ef8ef61', embedding=None, metadata={'file_path': '/home/rahul-raj/LLM/Data/100stocks.md', 'file_name': '100stocks.md', 'file_type': 'text/markdown', 'file_size': 4245, 'creation_date': '2025-03-25', 'last_modified_date': '2025-03-25'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='TiTle:#  Top 100 stocks\n\n[S.No.](?order=asc) |  [Name](?sort=name&order=desc) |  [ CMP Rs. ](?sort=current+price&order=desc) |  [ P/E  ](?sort=price+to+earning&order=desc) |  [ Mar Cap Rs.Cr. ](?sort=market+capitalization&order=desc) |  [ Div Yld % ](?sort=dividend+yield&order=desc) |  [ NP Qtr Rs.Cr. ](?sort=net+pro

In [None]:
for doc in Docu:
    # Generate embeddings directly for the text of the document
    embedding = embedding_model.encode(doc.text)  # Convert text to embeddings

    # Add the document, its embedding, and metadata to the ChromaDB collection
    chroma_coll.add(
        documents=[doc.text],  # Add the text of the document
        embeddings=[embedding],    # Add the corresponding embedding
        metadatas=None,           # Add any metadata if needed (e.g. date)
        ids=[doc.id_]         # Use the document text as the ID (or generate a custom ID)
    )

In [31]:
Docu

[Document(id_='e4f783f9-8591-44c4-a6aa-c0e53ef8ef61', embedding=None, metadata={'file_path': '/home/rahul-raj/LLM/Data/100stocks.md', 'file_name': '100stocks.md', 'file_type': 'text/markdown', 'file_size': 4245, 'creation_date': '2025-03-25', 'last_modified_date': '2025-03-25'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='TiTle:#  Top 100 stocks\n\n[S.No.](?order=asc) |  [Name](?sort=name&order=desc) |  [ CMP Rs. ](?sort=current+price&order=desc) |  [ P/E  ](?sort=price+to+earning&order=desc) |  [ Mar Cap Rs.Cr. ](?sort=market+capitalization&order=desc) |  [ Div Yld % ](?sort=dividend+yield&order=desc) |  [ NP Qtr Rs.Cr. ](?sort=net+pr

In [21]:
from llama_index.core.vector_stores import VectorStoreQuery,VectorStoreQueryResult

In [18]:
print(dir(llama_index.core.vector_stores))

['ExactMatchFilter', 'FilterCondition', 'FilterOperator', 'MetadataFilter', 'MetadataFilters', 'MetadataInfo', 'SimpleVectorStore', 'VectorStoreInfo', 'VectorStoreQuery', 'VectorStoreQueryResult', '__all__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', 'simple', 'types', 'utils']


In [40]:
# import llama_index.embeddings
# import llama_index.embeddings.huggingface


# dir(llama_index.embeddings.huggingface.HuggingFaceEmbedding)
?HuggingFaceEmbedding

[0;31mInit signature:[0m
[0mHuggingFaceEmbedding[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mmodel_name[0m[0;34m:[0m [0mstr[0m [0;34m=[0m [0;34m'BAAI/bge-small-en'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtokenizer_name[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mstr[0m[0;34m][0m [0;34m=[0m [0;34m'deprecated'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mpooling[0m[0;34m:[0m [0mstr[0m [0;34m=[0m [0;34m'deprecated'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmax_length[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mint[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mquery_instruction[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mstr[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtext_instruction[0m[0;34m:[0m [0mOptional[0m[0;34m[[0m[0mstr[0m[0;34m][0m [0;34m=[0m [0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mnormalize[0m[0;34m:[0m [0mbool[0

NameError: name 'Docu' is not defined

In [45]:
index = VectorStoreIndex(nodes=D_sample, embed_model=sentence_embedding)


In [44]:
from llama_index.core import VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Create a SentenceEmbedding wrapper for your model
sentence_embedding = HuggingFaceEmbedding(model_name='sentence-transformers/all-MiniLM-L6-v2')


In [49]:
print(dir(index))

['__abstractmethods__', '__annotations__', '__class__', '__class_getitem__', '__delattr__', '__dict__', '__dir__', '__doc__', '__eq__', '__format__', '__ge__', '__getattribute__', '__gt__', '__hash__', '__init__', '__init_subclass__', '__le__', '__lt__', '__module__', '__ne__', '__new__', '__orig_bases__', '__parameters__', '__reduce__', '__reduce_ex__', '__repr__', '__setattr__', '__sizeof__', '__slots__', '__str__', '__subclasshook__', '__weakref__', '_abc_impl', '_add_nodes_to_index', '_adelete_from_docstore', '_adelete_from_index_struct', '_aget_node_with_embedding', '_async_add_nodes_to_index', '_build_index_from_nodes', '_callback_manager', '_delete_from_docstore', '_delete_from_index_struct', '_delete_node', '_docstore', '_embed_model', '_get_node_with_embedding', '_graph_store', '_index_struct', '_insert', '_insert_batch_size', '_is_protocol', '_object_map', '_show_progress', '_storage_context', '_store_nodes_override', '_transformations', '_use_async', '_vector_store', 'adelet

### Semantics Seach directly from the datastore 

In [111]:
query_text = "What is 10000 investment in 1 Week ?"
query_embedding = embedding_model.encode(query_text).tolist()  # Make sure to convert to list


In [112]:
from llama_index.core.vector_stores import VectorStoreQuery

query = VectorStoreQuery(query_embedding=query_embedding)  # No 'top_k' argument here


In [74]:
import llama_index.core


print(dir(llama_index.core))

['BaseCallbackHandler', 'BasePromptTemplate', 'Callable', 'ChatPromptTemplate', 'ComposableGraph', 'Document', 'DocumentSummaryIndex', 'GPTDocumentSummaryIndex', 'GPTKeywordTableIndex', 'GPTListIndex', 'GPTRAKEKeywordTableIndex', 'GPTSimpleKeywordTableIndex', 'GPTTreeIndex', 'GPTVectorStoreIndex', 'IndexStructType', 'KeywordTableIndex', 'KnowledgeGraphIndex', 'ListIndex', 'MockEmbedding', 'NullHandler', 'Optional', 'Prompt', 'PromptHelper', 'PromptTemplate', 'PropertyGraphIndex', 'QueryBundle', 'RAKEKeywordTableIndex', 'Response', 'SQLContextBuilder', 'SQLDatabase', 'SQLDocumentContextBuilder', 'SelectorPromptTemplate', 'ServiceContext', 'Settings', 'SimpleDirectoryReader', 'SimpleKeywordTableIndex', 'StorageContext', 'SummaryIndex', 'TreeIndex', 'VectorStoreIndex', '__all__', '__annotations__', '__builtins__', '__cached__', '__doc__', '__file__', '__loader__', '__name__', '__package__', '__path__', '__spec__', '__version__', 'async_utils', 'base', 'bridge', 'callbacks', 'chat_engine',

In [115]:
results = chroma_coll.query(
    query_embeddings=[query_embedding],
    n_results=1  # Number of top results to retrieve
)

In [116]:
results

{'ids': [['8c047b31-9e12-4510-ae9a-7cea539ca508']],
 'embeddings': None,
 'documents': [['# HDFC Focused 30 Fund - Direct Plan - Growth\n\n[\n\nPeriod Invested for | ₹10000 Invested on | Latest Value | Absolute Returns | Annualised Returns | Category Avg | Rank within Category  \n---|---|---|---|---|---|---  \n1 Week | 13-Mar-25 | 10363.60 | 3.64% | - | 4.47% | 23/28  \n1 Month | 20-Feb-25 | 10135.50 | 1.36% | - | 0.87% | 10/28  \n3 Month | 20-Dec-24 | 9878.60 | -1.21% | - | -6.47% | 1/28  \n6 Month | 20-Sep-24 | 9424.30 | -5.76% | - | -12.51% | 1/28  \nYTD | 01-Jan-25 | 9788.40 | -2.12% | - | -6.65% | 1/28  \n1 Year | 20-Mar-24 | 11796.10 | 17.96% | 17.96% | 10.92% | 3/28  \n2 Year | 20-Mar-23 | 16711.90 | 67.12% | 29.23% | 23.43% | 3/26  \n3 Year | 17-Mar-22 | 19043.60 | 90.44% | 23.85% | 14.73% | 1/25  \n5 Year | 20-Mar-20 | 41650.40 | 316.50% | 33.00% | 24.21% | 1/20  \n10 Year | 20-Mar-15 | 40981.50 | 309.81% | 15.14% | 13.55% | 2/14  \nSince Inception | 01-Jan-13 | 59144.70 | 491

In [117]:

query_text =  "What is 10000 investment in 1 Week ?"

query_embedding = embedding_model.encode([query_text])[0].tolist()

query = VectorStoreQuery(query_embedding=query_embedding)
results = index._vector_store.query(query)

# Perform the query (using vector-based retrieval, NOT LLM)
results = index._vector_store.query(query)  # Directly use the vector store's query method

print(type(results))
print(results.ids)
results.nodes


<class 'llama_index.core.vector_stores.types.VectorStoreQueryResult'>
['1f1901d8-f59f-41a9-8c76-3e5bb9a2c65c']


In [None]:
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core.storage.storage_context import StorageContext

# First, create a ChromaVectorStore from your existing collection
vector_store = ChromaVectorStore(chroma_collection=chroma_coll)

# Then create your index from the vector store
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    llm=gemini_llm  # You can pass your LLM here
)

In [69]:
embedding_model=SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')


In [None]:
from llama_index.core import Settings
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

# Create the LlamaIndex embedding model wrapper
embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Create the vector store
vector_store = ChromaVectorStore(chroma_collection=chroma_coll)

# Create the index - no need to pass embedding_model
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,embed_model='local',
    
    llm=gemini_llm
)

# Create a query engine
query_engine = index.as_query_engine()

ValueError: The `model_name` argument must be provided.

In [73]:
pip install llama-index-embeddings-huggingface

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting llama-index-embeddings-huggingface
  Downloading llama_index_embeddings_huggingface-0.5.2-py3-none-any.whl (8.9 kB)
Installing collected packages: llama-index-embeddings-huggingface
Successfully installed llama-index-embeddings-huggingface-0.5.2
Note: you may need to restart the kernel to use updated packages.


In [157]:
api_google='AIzaSyDjS9U8O46EEsdrnWlrnZHntUh4IvkagUU'

In [158]:
genai.configure(api_key=api_google)

In [None]:
import os
import google.generativeai as genai
from llama_index.core.llms import LLM
from typing import Optional, Generator, Any
from pydantic import Field, PrivateAttr


from pydantic import BaseModel

class LLMMetadata(BaseModel):
    model_name: str
    temperature: float
    context_window: int
    num_output: int    
    is_chat_model : bool # Add is_chat_model here


class LLMResponse:
    """Standard response object for compatibility with LlamaIndex."""
    def __init__(self, content: str):
        self.message = Message(content)


class Message:
    """Message object to hold the content of the response."""
    def __init__(self, content: str):
        self.content = content



# ✅ Set up Gemini API
genai.configure()
# ✅ Define Gemini LLM Wrapper with Pydantic fields
class GeminiLLM(LLM):

    model_name: str = Field(default="gemini-1.5-pro", description="Gemini model name")
    temperature: float = Field(default=1.0, description="Temperature for generation")
    context_window: int = 2048  # Set this to an appropriate value for your model
    num_output: int =40
    _model: genai.GenerativeModel = PrivateAttr()
    is_chat_model: bool = True

    def __init__(self, model_name: str = "gemini-1.5-pro", temperature: float = 1.0, context_window: int = 2048,num_output=40):
        super().__init__(model_name=model_name, temperature=temperature)
        self._model = genai.GenerativeModel(model_name)
        self.context_window = context_window  # ✅ Store the context window size
        self.num_output = num_output  # Make sure num_output is initialized here
        self.is_chat_model = True

    # def complete(self, prompt: str) -> LLMResponse:
    #     response = self._model.generate_content(prompt, generation_config={"temperature": self.temperature})
    #     if response and response.text:
    #         return LLMResponse(response.text)
    #     return LLMResponse("No response")

    def complete(self, prompt: str) -> str:
        response = self._model.generate_content(prompt, generation_config={"temperature": self.temperature})
        if isinstance(response, str):  # If it's already a string, return it directly
            return response
        elif hasattr(response, "text"):  # If it's an LLMResponse object, return the text attribute
            return response.text
        else:
            return "No response or unexpected format"


    async def acomplete(self, prompt: str) -> LLMResponse:
        return self.complete(prompt)

    def chat(self, messages: list[Any]) -> LLMResponse:
        last_message = messages[-1]
        if hasattr(last_message, "content"):
            prompt = last_message.content
        else:
            prompt = last_message["content"]
        
        response = self._model.generate_content(prompt)
        if response and response.text:
            return LLMResponse(response.text)
        return LLMResponse("No response")

    async def achat(self, messages: list[Any]) -> LLMResponse:
        return self.chat(messages)


    def stream_complete(self, prompt: str) -> Generator[str, None, None]:
        """Stream response in chunks."""
        for chunk in self._model.generate_content(prompt, stream=True):
            yield chunk.text

    async def astream_complete(self, prompt: str) -> Generator[str, None, None]:
        """Async streaming."""
        async for chunk in self.stream_complete(prompt):
            yield chunk

    def stream_chat(self, messages: list[dict[str, str]]) -> Generator[str, None, None]:
        """Stream chat responses."""
        for chunk in self._model.generate_content(messages[-1]["content"], stream=True):
            yield chunk.text

    async def astream_chat(self, messages: list[dict[str, str]]) -> Generator[str, None, None]:
        """Async chat streaming."""
        async for chunk in self.stream_chat(messages):
            yield chunk

    @property
    def metadata(self) -> LLMMetadata:
        return LLMMetadata(
            model_name=self.model_name,
            temperature=self.temperature,
            context_window=self.context_window,
            num_output=self.num_output,
            is_chat_model=self.is_chat_model  # Add is_chat_model here
        )
    


# ✅ Instantiate and Test Gemini LLM
gemini_llm = GeminiLLM()


In [152]:
print('output of gemini_llm.metadata')
gemini_llm.metadata

output of gemini_llm.metadata


LLMMetadata(model_name='gemini-1.5-pro', temperature=1.0, context_window=2048, num_output=40, is_chat_model=True)

In [170]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding


# Initialize the HuggingFace Embedding model
embedding_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")

vector_store = ChromaVectorStore(chroma_collection=chroma_coll)

# Indexing documents using the embedding model
index = VectorStoreIndex.from_vector_store(
    vector_store=vector_store,
    embed_model=embedding_model,
    llm=gemini_llm
)

# Saving index to ChromaDB


In [174]:
# Create a query engine from the index
query_engine = index.as_query_engine(llm=gemini_llm)

# Example query
query = "₹10000 Invested on 1 Week ? "
response = query_engine.query(query)

print(response)


₹10,000 invested in a specific fund one week prior would now be worth ₹10,363.60.



In [171]:
response = gemini_llm.complete("Explain 20 Microns briefly.")
print(response)


20 microns (µm) is a unit of length equal to 20 millionths of a meter (0.02 millimeters).  It's a size scale often used to describe very small things like the diameter of fine particles, fibers, or the features of microscopic organisms.  For comparison, a human hair is roughly 50-100 microns in diameter.



In [86]:
print('output of dir(llama_index.embeddings.huggingface.HuggingFaceEmbedding)')
dir(llama_index.embeddings.huggingface.HuggingFaceEmbedding)

output of dir(llama_index.embeddings.huggingface.HuggingFaceEmbedding)


['__abstractmethods__',
 '__annotations__',
 '__call__',
 '__class__',
 '__class_getitem__',
 '__class_vars__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__fields_set__',
 '__format__',
 '__ge__',
 '__get_pydantic_core_schema__',
 '__get_pydantic_json_schema__',
 '__getattr__',
 '__getattribute__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__iter__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__pretty__',
 '__private_attributes__',
 '__pydantic_complete__',
 '__pydantic_computed_fields__',
 '__pydantic_core_schema__',
 '__pydantic_custom_init__',
 '__pydantic_decorators__',
 '__pydantic_extra__',
 '__pydantic_fields__',
 '__pydantic_fields_set__',
 '__pydantic_generic_metadata__',
 '__pydantic_init_subclass__',
 '__pydantic_parent_namespace__',
 '__pydantic_post_init__',
 '__pydantic_private__',
 '__pydantic_root_model__',
 '__pydantic_serializer__',
 '__pydantic_validator__',
 

In [139]:
a=[1,22,22,2,4,6]
a.sort()
a

[1, 2, 4, 6, 22, 22]