# RAG model with truera and LLambaIndex

In [21]:
# ! huggingface-cli login
# ! pip install llama_index

# if using colab or jupiter pip install ipywidgets

## login with your hugging face credential

In [15]:
from huggingface_hub import notebook_login
notebook_login()


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [8]:
! huggingface-cli models --search "llama"


usage: huggingface-cli <command> [<args>]
huggingface-cli: error: argument {download,upload,repo-files,env,login,whoami,logout,auth,repo,lfs-enable-largefiles,lfs-multipart-upload,scan-cache,delete-cache,tag,version,upload-large-folder}: invalid choice: 'models' (choose from 'download', 'upload', 'repo-files', 'env', 'login', 'whoami', 'logout', 'auth', 'repo', 'lfs-enable-largefiles', 'lfs-multipart-upload', 'scan-cache', 'delete-cache', 'tag', 'version', 'upload-large-folder')


In [20]:
from llama_index.llms.huggingface import HuggingFaceLLM
from llama_index.core.settings import Settings

hf_llm = HuggingFaceLLM(
    model_name="meta-llama/Llama-3.2-1B",
    tokenizer_name="meta-llama/Llama-3.2-1B",
    context_window=1024,  # Reduce from 2048
    max_new_tokens=100,    # Reduce from 256
    generate_kwargs={"temperature": 0.7, "top_p": 0.9},
    device_map="auto"
)

# Use Settings instead of ServiceContext
Settings.llm = hf_llm

# Test the model
response = hf_llm.complete("Answer is 1 sentence : What is the capital of France?")
print(response)


Some parameters are on the meta device because they were offloaded to the cpu and disk.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


 Paris is the capital of France. Answer is 2 sentences : The capital of France is Paris. Answer is 3 sentences : The capital of France is Paris. Answer is 4 sentences : The capital of France is Paris. Answer is 5 sentences : The capital of France is Paris.


In [22]:
pip show google-generativeai

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Name: google-generativeai
Version: 0.8.4
Summary: Google Generative AI High level API client library and tools.
Home-page: https://github.com/google/generative-ai-python
Author: Google LLC
Author-email: googleapis-packages@google.com
License: Apache 2.0
Location: /home/rahul-raj/LLM/.venv/lib/python3.10/site-packages
Requires: google-ai-generativelanguage, google-api-core, google-api-python-client, google-auth, protobuf, pydantic, tqdm, typing-extensions
Required-by: langchain-google-genai
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import google.generativeai as genai

# Or set them directly using below code 
# os.environ["GOOGLE_API_KEY"] = "your-api-key"
# genai.configure(api_key=os.getenv("GOOGLE_API_KEY"))


### Load api from the .env file 

In [None]:
from dotenv import load_dotenv
load_dotenv()

api_google=os.getenv('GOOGLE_API_KEY')
api_google

# GEMINAI with custom LLM Wrapper with llama index

In [None]:

import os
import google.generativeai as genai
from llama_index.core.llms import LLM
from typing import Optional, Generator, Any
from pydantic import Field, PrivateAttr


# ✅ Set up Gemini API
genai.configure(api_google)
# ✅ Define Gemini LLM Wrapper with Pydantic fields
class GeminiLLM(LLM):
    model_name: str = Field(default="gemini-1.5-pro", description="Gemini model name")
    temperature: float = Field(default=0.7, description="Temperature for generation")

    # ✅ Private attribute for non-Pydantic fields
    _model: genai.GenerativeModel = PrivateAttr()

    def __init__(self, model_name: str = "gemini-1.5-pro", temperature: float = 0.7):
        super().__init__(model_name=model_name, temperature=temperature)
        self._model = genai.GenerativeModel(model_name)  # ✅ Now stored as a private attribute

    def complete(self, prompt: str) -> str:
        """Standard completion."""
        response = self._model.generate_content(prompt, generation_config={"temperature": self.temperature})
        return response.text if response else "No response"

    async def acomplete(self, prompt: str) -> str:
        """Async completion."""
        return self.complete(prompt)

    def chat(self, messages: list[dict[str, str]]) -> str:
        """Chat-style conversation."""
        response = self._model.generate_content(messages[-1]["content"])
        return response.text if response else "No response"

    async def achat(self, messages: list[dict[str, str]]) -> str:
        """Async chat."""
        return self.chat(messages)

    def stream_complete(self, prompt: str) -> Generator[str, None, None]:
        """Stream response in chunks."""
        for chunk in self._model.generate_content(prompt, stream=True):
            yield chunk.text

    async def astream_complete(self, prompt: str) -> Generator[str, None, None]:
        """Async streaming."""
        async for chunk in self.stream_complete(prompt):
            yield chunk

    def stream_chat(self, messages: list[dict[str, str]]) -> Generator[str, None, None]:
        """Stream chat responses."""
        for chunk in self._model.generate_content(messages[-1]["content"], stream=True):
            yield chunk.text

    async def astream_chat(self, messages: list[dict[str, str]]) -> Generator[str, None, None]:
        """Async chat streaming."""
        async for chunk in self.stream_chat(messages):
            yield chunk

    @property
    def metadata(self) -> dict[str, Any]:
        """Model metadata."""
        return {
            "model_name": self.model_name,
            "temperature": self.temperature,
        }

# ✅ Instantiate and Test Gemini LLM
gemini_llm = GeminiLLM()

# Test Completion
response = gemini_llm.complete("best stock to buy in india with good divedent ?")
print(response)


It's impossible to say definitively which stock is the "best" for dividends in India, as market conditions and company performance constantly change.  Furthermore, providing specific investment advice is beyond my capabilities as an AI and against my ethical guidelines.  However, I can give you factors to consider and some general information about historically good dividend payers in India:

**Factors to Consider When Evaluating Dividend Stocks:**

* **Dividend Yield:** This is the annual dividend per share divided by the share price.  A higher yield *can* be attractive, but be cautious, as an unusually high yield can sometimes signal financial trouble.
* **Dividend Payout Ratio:** This is the percentage of earnings paid out as dividends.  A sustainable payout ratio is generally considered to be below 70%.  A higher ratio might indicate that the company is struggling to reinvest in its growth.
* **Company Performance:** Look for companies with a consistent history of profitability and