In [2]:
import requests
import langchain

In [None]:
from typing import Any, Dict, Iterator, List, Mapping, Optional

from langchain_core.callbacks.manager import CallbackManagerForLLMRun
from langchain_core.language_models.llms import LLM
from langchain_core.outputs import GenerationChunk

class CustomLLM(LLM):
    """A custom chat model that echoes the first `n` characters of the input.

    When contributing an implementation to LangChain, carefully document
    the model including the initialization parameters, include
    an example of how to initialize the model and include any relevant
    links to the underlying models documentation or API.

    Example:

        .. code-block:: python

            model = CustomChatModel(n=2)
            result = model.invoke([HumanMessage(content="hello")])
            result = model.batch([[HumanMessage(content="hello")],
                                 [HumanMessage(content="world")]])
    """

    n: int
    """The number of characters from the last message of the prompt to be echoed."""

    def _call(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> str:
        """Run the LLM on the given input.

        Override this method to implement the LLM logic.

        Args:
            prompt: The prompt to generate from.
            stop: Stop words to use when generating. Model output is cut off at the
                first occurrence of any of the stop substrings.
                If stop tokens are not supported consider raising NotImplementedError.
            run_manager: Callback manager for the run.
            **kwargs: Arbitrary additional keyword arguments. These are usually passed
                to the model provider API call.

        Returns:
            The model output as a string. Actual completions SHOULD NOT include the prompt.
        """
        if stop is not None:
            raise ValueError("stop kwargs are not permitted.")
        return prompt[: self.n]

    def _stream(
        self,
        prompt: str,
        stop: Optional[List[str]] = None,
        run_manager: Optional[CallbackManagerForLLMRun] = None,
        **kwargs: Any,
    ) -> Iterator[GenerationChunk]:
        """Stream the LLM on the given prompt.

        This method should be overridden by subclasses that support streaming.

        If not implemented, the default behavior of calls to stream will be to
        fallback to the non-streaming version of the model and return
        the output as a single chunk.

        Args:
            prompt: The prompt to generate from.
            stop: Stop words to use when generating. Model output is cut off at the
                first occurrence of any of these substrings.
            run_manager: Callback manager for the run.
            **kwargs: Arbitrary additional keyword arguments. These are usually passed
                to the model provider API call.

        Returns:
            An iterator of GenerationChunks.
        """
        for char in prompt[: self.n]:
            chunk = GenerationChunk(text=char)
            if run_manager:
                run_manager.on_llm_new_token(chunk.text, chunk=chunk)

            yield chunk

    @property
    def _identifying_params(self) -> Dict[str, Any]:
        """Return a dictionary of identifying parameters."""
        return {
            # The model name allows users to specify custom token counting
            # rules in LLM monitoring applications (e.g., in LangSmith users
            # can provide per token pricing for their model and monitor
            # costs for the given LLM.)
            "model_name": "CustomChatModel",
        }

    @property
    def _llm_type(self) -> str:
        """Get the type of language model used by this chat model. Used for logging purposes only."""
        return "custom"

In [4]:
import vertexai

from vertexai.generative_models import GenerativeModel

In [None]:


# TODO(developer): Update and un-comment below line
# PROJECT_ID = "your-project-id"
vertexai.init(project=PROJECT_ID, location="us-central1")

model = GenerativeModel("gemini-1.5-flash-002")
responses = model.generate_content(
    "Write a story about a magic backpack.", stream=True
)

for response in responses:
    print(response.text)
# Example response:
# El
# ara wasn't looking for magic. She was looking for rent money.
# Her tiny apartment, perched precariously on the edge of Whispering Woods,
# ...

In [None]:
class Gemini:
    def __init__(self, url=None, isLog=False):
        
        self.url = url
        self.max_output = 8192
        self.temperature = 0
        self.is_log = isLog
        
        with open("/Users/sirabhobs/Desktop/poc-hr-helpdesk-chatbot/credential/ktb-complaint-center-poc-d47fde693217.json", "r") as fr:
            credentials = fr.read()
            
        self.token = self.getToken(credentials)

    @staticmethod
    def getToken(credential):
        
        url = 'http://10.9.93.83:8443/google-authen'
        data = {'service_account': credential}
        headers = {'Content-Type': 'application/json'}
        response = requests.post(url, data, headers)
        response_json = response.json()

        return response_json['Token']

    def call_gemini(self, prompt, isJsonOutput=False):
        """Synchronous call to Gemini API"""
        headers = {
            "Authorization": f"Bearer {self.token}",
            "Content-Type": "application/json",
        }

        data = {
            "contents": [
                {
                    "role": "user",
                    "parts": [{"text": prompt}],
                }
            ],
            "generation_config": {
                "temperature": 0,
                "topP": 1, 
                "seed": 42
            },
            "safetySettings": [
                {
                "category": "HARM_CATEGORY_HATE_SPEECH",
                "threshold": "OFF"
                },
                {
                "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
                "threshold": "OFF"
                },
                {
                "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
                "threshold": "OFF"
                },
                {
                "category": "HARM_CATEGORY_HARASSMENT",
                "threshold": "OFF"
                }
            ]
        }
        
        if isJsonOutput:
            data["generation_config"]["responseMimeType"] = "application/json"
        
        try:
            response = requests.post(self.url, headers=headers, json=data, timeout=300)
            return self.process_response(response, prompt)

        except (requests.RequestException, IndexError, KeyError, TypeError) as e:
            if self.is_log:
                CustomLogging.log_error(f"Request failed or unexpected response structure: {e}")

    def process_response(self, response, prompt):
        """Processes the API response and logs information"""
        
        result = response.json()
        candidates = [
            candidate.get("content", {}).get("parts", [])[0].get("text", "")
            for line in result
            if "candidates" in line
            for candidate in line["candidates"]
        ]
        
        content = "".join(filter(None, candidates))

        usage_metadata = result[-1].get("usageMetadata", {})
        
        token_usage = {
            "promptTokenCount": usage_metadata.get("promptTokenCount", 0),
            "candidatesTokenCount": usage_metadata.get("candidatesTokenCount", 0),
            "totalTokenCount": usage_metadata.get("totalTokenCount", 0),
        }
        
        llm_info = {
            'prompt': prompt,
            'llmModel': result[0].get('modelVersion', ''),
            'temperature': self.temperature,
            'llmResponse': content,
            'processingTime': response.elapsed.total_seconds() * 1000,  # Convert to milliseconds
            **token_usage
        }

        if self.is_log:
            CustomLogging.log_llm_info(llm_info)

        return content