In [None]:
from transformers import pipeline
import torch, os
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, LlamaTokenizer, LlamaForCausalLM, MistralForCausalLM
import random, json
import inspect
import json
from typing import Dict, Any, Optional, Callable, List

class Agent:
    def __init__(self, model,name):
        # Load Qwen model and tokenizer            
        bnb_config = BitsAndBytesConfig(
            torch_dtype="auto",
            device_map="auto",
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quantw_type="nf4",
            bnb_4bit_compute_dtype=torch.float16,  # Changed from bfloat16 to float16
            bnb_4bit_quant_storage=torch.uint8,    # Added for storage optimization
            use_nested_quant=True,                 # Added for nested quantization
        )
        save_directory = model.replace('/','_')+'_saved_response'
        try:
            print('Trying to load the mode:',save_directory,'from local repo')
            self.model = AutoModelForCausalLM.from_pretrained(save_directory)
            self.tokenizer = AutoTokenizer.from_pretrained(save_directory)
        except:  
            print('The model:',model,'is not found locally, downloading it')
            self.model = AutoModelForCausalLM.from_pretrained(
                model, quantization_config=bnb_config, token="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"
            )
            self.tokenizer = AutoTokenizer.from_pretrained(model, token="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")
            print("Saving the model:",model," locally")
            self.model.save_pretrained(save_directory)
            self.tokenizer.save_pretrained(save_directory)
        self.name = name
        self.model_name = model
        self.system_message = ""
        self.tools = []
        
    def clear_response(self,messages, response_string):
        #print('agent_name',agent_name)
        if all(keyword in self.model_name for keyword in ['Qwen','Instruct']):
            #print('//////////////',response_string,'\n','//////////')
            return response_string.replace('ssistant.','%').split('ssistant\n')[1]
        if all(keyword in self.model_name for keyword in ['falcon','instruct']): 
            return response_string.split('ssistant:')[1].split('User')[0]
        if all(keyword in self.model_name for keyword in ['lama','nstruct']):
            return response_string.split('ssistant\n')[1].split('User')[0]
        if all(keyword in self.model_name for keyword in ['mistralai','nstruct']):
            return response_string[len(messages[0]['content'])+len(messages[1]['content'])+2:]
        if all(keyword in self.model_name for keyword in ['OpenHermes','OpenHermes']):
            return response_string.split('ssistant\n')[1].split('User')[0]
    
    def llm_create_system_prompt(self,prompt):  ### for instruct models
            
            return [
                dict({"role": "system", "content": self.system_message}),
                dict({"role": "user", "content": prompt}),
            ]
    
        
    def get_tool_schema(self,func: Callable) -> dict:
        """
        Generate a JSON schema for a tool function.

        Args:
            func (Callable): The function to generate a schema for.

        Returns:
            dict: A JSON schema representing the function's parameters.
        """
        import inspect
        signature = inspect.signature(func)
        parameters = {}

        for name, param in signature.parameters.items():
            parameters[name] = {
                "type": "string",  # Assume string type for simplicity
                "description": f"Parameter {name}"
            }

        return {
            "type": "function",
            "function": {
                "name": func.__name__,
                "description": func.__doc__.split('\n')[0] if func.__doc__ else "",
                "parameters": {
                    "type": "object",
                    "properties": parameters
                }
            }
        }
        
    def llm_generate_response(self,prompt):  #### given we are using instruct model and it is  tools compatible (if tools are stated)
        
        # Generate response
        messages =  self.llm_create_system_prompt(prompt)
        schema_tools = []
        
        for tool in self.tools:
            schema_tools.append(self.get_tool_schema(tool))
        text = self.tokenizer.apply_chat_template(
            messages,
            tools= schema_tools,
            tokenize=False,
            add_generation_prompt=True
        )

        # Generate response
        inputs = self.tokenizer(text, return_tensors="pt", return_attention_mask=True).to(self.model.device)
        generated_ids = self.model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_new_tokens=384,
            #pad_token_id=self.model.config.eos_token_id
        )

        # Decode response
        response = self.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
        return response
    
    def generate_response(self, messages): # if the model is not instruct
        # Prepare input
        
        text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )

        # Generate response
        inputs = self.tokenizer(text, return_tensors="pt", return_attention_mask=True).to(self.model.device)
        generated_ids = self.model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_new_tokens=384,
            #pad_token_id=self.model.config.eos_token_id
        )

        # Decode response
        response = self.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
        return response
