In [22]:
from langchain._api import LangChainDeprecationWarning
import warnings

warnings.filterwarnings("ignore", category=LangChainDeprecationWarning)
from dotenv import load_dotenv


In [23]:
from langchain_google_genai import ChatGoogleGenerativeAI

In [24]:
from openai import OpenAI
class LlamaRouter:
    """
    A simple, reusable wrapper for using Meta Llama (or any Hugging Face model)
    via the OpenAI-compatible Hugging Face Inference endpoint.
    """

    def __init__(
        self,
        model: str = "meta-llama/Llama-3.1-8B-Instruct:novita",
        token_env: str = "HF_TOKEN",
        base_url: str = "https://router.huggingface.co/v1",
    ):
        load_dotenv()
        api_key = os.getenv(token_env)
        if not api_key:
            raise ValueError(f"Missing token: environment variable '{token_env}' not found.")
        self.client = OpenAI(base_url=base_url, api_key=api_key)
        self.model = model
        self.api_key = api_key

    def invoke(self, prompt: str, temperature: float = 0.7, max_tokens: int = 256) -> str:
        """Send a prompt string to the model and return the reply."""
        if not isinstance(prompt, str):
            return "[Error] Prompt must be a string."
        try:
            completion = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                temperature=temperature,
                max_tokens=max_tokens,
            )
            return completion.choices[0].message.content.strip()
        except Exception as e:
            return f"[Error] {e}"


In [25]:
import os
from dotenv import load_dotenv
from openai import OpenAI

class GemmaRouter:
    """
    A simple wrapper for using Google's Gemma-2-2B-IT model
    via Hugging Face's OpenAI-compatible inference API.
    """

    def __init__(
        self,
        model: str = "google/gemma-2-2b-it:nebius",
        token_env: str = "HF_THIRD_TOKEN",
        base_url: str = "https://router.huggingface.co/v1",
    ):
        load_dotenv()
        api_key = os.getenv(token_env)
        if not api_key:
            raise ValueError(
                f"Missing token: environment variable '{token_env}' not found."
            )

        self.client = OpenAI(base_url=base_url, api_key=api_key)
        self.model = model
        self.api_key = api_key

    def invoke(self, prompt: str, temperature: float = 0.7, max_tokens: int = 256) -> str:
        """Send a prompt string to the Gemma model and return the reply."""
        if not isinstance(prompt, str):
            return "[Error] Prompt must be a string."

        try:
            completion = self.client.chat.completions.create(
                model=self.model,
                messages=[{"role": "user", "content": prompt}],
                temperature=temperature,
                max_tokens=max_tokens,
            )
            return completion.choices[0].message.content.strip()

        except Exception as e:
            return f"[Error] {e}"


In [26]:
from langchain_google_genai import ChatGoogleGenerativeAI
import os

model = ChatGoogleGenerativeAI(model='gemini-2.5-flash')
model2 = LlamaRouter(token_env="HF_TOKEN")
model3 = LlamaRouter(token_env="HF_SECOND_TOKEN")
model4 = LlamaRouter(token_env="HF_THIRD_TOKEN")
gemma=GemmaRouter()
from langchain.schema.runnable import RunnableLambda
gemma_runnable = RunnableLambda(lambda x, **kwargs: gemma.invoke(x["content"]))


In [27]:
from langchain import LLMChain
from langchain_core.prompts import ChatPromptTemplate,MessagesPlaceholder,HumanMessagePromptTemplate
from langchain.memory import ConversationBufferMemory,FileChatMessageHistory


In [28]:
memory=ConversationBufferMemory(chat_memory=FileChatMessageHistory("message.json"),memory_key="message",return_messages=True)

In [29]:
prompt = ChatPromptTemplate(
    input_variables=["content"],
    messages=[
        MessagesPlaceholder(variable_name="messages"),
        HumanMessagePromptTemplate.from_template("{content}")
    ],
)


In [30]:
chain = LLMChain(
    llm=gemma_runnable,
    prompt=prompt,
    memory=memory
)


In [32]:
from langchain.schema.runnable import RunnableLambda
gemma_runnable = RunnableLambda(lambda x: gemma.invoke(x["content"]))

from langchain import LLMChain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, HumanMessagePromptTemplate
from langchain.memory import ConversationBufferMemory, FileChatMessageHistory

memory = ConversationBufferMemory(
    chat_memory=FileChatMessageHistory("message.json"),
    memory_key="messages",
    return_messages=True
)

prompt = ChatPromptTemplate(
    input_variables=["content"],
    messages=[
        MessagesPlaceholder(variable_name="messages"),
        HumanMessagePromptTemplate.from_template("{content}")
    ],
)

chain = LLMChain(
    llm=model,
    prompt=prompt,
    memory=memory
)

response = chain.invoke({"content": "Hello how are you?"})
print(response)


{'content': 'Hello how are you?', 'messages': [HumanMessage(content='Hello how are you?', additional_kwargs={}, response_metadata={}), AIMessage(content="Hello! I'm doing well, thank you for asking.\n\nHow are you doing today? Is there anything I can help you with?", additional_kwargs={}, response_metadata={}), HumanMessage(content='Hello how are you?', additional_kwargs={}, response_metadata={}), AIMessage(content="Hello again! I'm still doing well, thank you for asking.\n\nHow are you doing this time? Is there something specific you'd like to discuss or need help with?", additional_kwargs={}, response_metadata={}), HumanMessage(content='Hello how are you?', additional_kwargs={}, response_metadata={}), AIMessage(content="Hello! I'm still doing well, thank you for asking again.\n\nAs before, I'm ready to help with whatever you need. Is everything alright on your end, or did you have something specific you wanted to discuss today?", additional_kwargs={}, response_metadata={}), HumanMess

In [35]:
chain.invoke("what is")

{'content': 'what is',
 'messages': [HumanMessage(content='Hello how are you?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="Hello! I'm doing well, thank you for asking.\n\nHow are you doing today? Is there anything I can help you with?", additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Hello how are you?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="Hello again! I'm still doing well, thank you for asking.\n\nHow are you doing this time? Is there something specific you'd like to discuss or need help with?", additional_kwargs={}, response_metadata={}),
  HumanMessage(content='Hello how are you?', additional_kwargs={}, response_metadata={}),
  AIMessage(content="Hello! I'm still doing well, thank you for asking again.\n\nAs before, I'm ready to help with whatever you need. Is everything alright on your end, or did you have something specific you wanted to discuss today?", additional_kwargs={}, response_metadata={}),
  HumanMe