In [1]:
#!pip3 install sentencepiece

In [2]:
from transformers import pipeline
import torch, os
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, LlamaTokenizer, LlamaForCausalLM, MistralForCausalLM
import random, json
import inspect
import json
from typing import Dict, Any, Optional, Callable, List




class Agent:
    def __init__(self, model,name):
        # Load Qwen model and tokenizer            
        bnb_config = BitsAndBytesConfig(
            torch_dtype="auto",
            device_map="auto",
            load_in_4bit=True,
            bnb_4bit_use_double_quant=True,
            bnb_4bit_quantw_type="nf4",
            bnb_4bit_compute_dtype=torch.float16,  # Changed from bfloat16 to float16
            bnb_4bit_quant_storage=torch.uint8,    # Added for storage optimization
            use_nested_quant=True,                 # Added for nested quantization
        )
        save_directory = model.replace('/','_')+'_saved_response'
        try:
            print('Trying to load the mode:',save_directory,'from local repo')

            #self.model = pipeline.load_from_pretrained(save_directory)
            self.model = AutoModelForCausalLM.from_pretrained(save_directory)
            self.tokenizer = AutoTokenizer.from_pretrained(save_directory)
        except:  
            print('The model:',model,'is not found locally, downloading it')
            #self.model = pipeline(
            #    "text-generation",
            #    model=model,
            #    model_kwargs={"torch_dtype": torch.bfloat16},
            #    device_map="auto",
            #    token="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"
            #)
            self.model = AutoModelForCausalLM.from_pretrained(
                model, quantization_config=bnb_config, token="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"
            )
            self.tokenizer = AutoTokenizer.from_pretrained(model, token="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")
            print("Saving the model:",model," locally")
            #self.model.save_pretrained(save_directory)
            self.model.save_pretrained(save_directory)
            self.tokenizer.save_pretrained(save_directory)
        self.name = name
        self.model_name = model
        self.system_message = ""
        self.tools = []
        
    def clear_response(self,messages, response_string):
        #print('agent_name',agent_name)
        if all(keyword in self.model_name for keyword in ['Qwen','Instruct']):
            #print('//////////////',response_string,'\n','//////////')
            return response_string.replace('ssistant.','%').split('ssistant\n')[1]
        if all(keyword in self.model_name for keyword in ['falcon','instruct']): 
            return response_string.split('ssistant:')[1].split('User')[0]
        if all(keyword in self.model_name for keyword in ['lama','nstruct']):
            return response_string.split('ssistant\n')[1].split('User')[0]
        if all(keyword in self.model_name for keyword in ['mistralai','nstruct']):
            return response_string[len(messages[0]['content'])+len(messages[1]['content'])+2:]
        if all(keyword in self.model_name for keyword in ['OpenHermes','OpenHermes']):
            return response_string.split('ssistant\n')[1].split('User')[0]
    
    
    def sqr_root(self, number: float)-> float:
        
        return float(number ** 0.5)
    def llm_create_system_prompt(self,prompt):
            
            return [
                dict({"role": "system", "content": self.system_message}),
                dict({"role": "user", "content": prompt}),
            ]
        
            functions = 'get_status(), self.list_files(directory)'
            '''
            return f"""
You have access to a a list of functions {functions} which returns the current system status.
When asked about the system's status, you should call this function.

Example interaction:
User: What is the current system status?
Assistant:
[Function Call: get_status()]
Result: System is operational and running smoothly
Response: The system is currently operational and running smoothly
Another Example:
User: what is listed in the directory: /home`
Assistant:
[Function Call: self.list_files(/home)]
Result: bla1, bla2,...
"""
        '''
        
    def create_system_prompt(self):
        """
        Generate a system prompt that describes available tools
        """
        system_prompt = {"role":"tool_calls","content":'tools_description'}
        system_prompt = {
        "role": "tool_calls",
        "content": [
            {
                "type": "function",
                "name": "get_status",
                "description": "Get▁the▁current▁status",
                "parameters": {"type": "object", "properties": {"opt": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}
               
                
                                                                }
                              }
            }
        ]
    }  

        return system_prompt
    
        
    def get_tool_schema(self,func: Callable) -> dict:
        """
        Generate a JSON schema for a tool function.

        Args:
            func (Callable): The function to generate a schema for.

        Returns:
            dict: A JSON schema representing the function's parameters.
        """
        import inspect
        signature = inspect.signature(func)
        parameters = {}

        for name, param in signature.parameters.items():
            parameters[name] = {
                "type": "string",  # Assume string type for simplicity
                "description": f"Parameter {name}"
            }

        return {
            "type": "function",
            "function": {
                "description": func.__doc__.split('\n')[0] if func.__doc__ else "",
                "parameters": {
                    "type": "object",
                    "properties": parameters
                }
            }
        }
        
    def llm_generate_response(self,prompt):
        
        # Generate response
        messages =  self.llm_create_system_prompt(prompt)
        schema_tools = []
        
        for tool in self.tools:
            schema_tools.append(self.get_tool_schema(tool))
            
        text = self.tokenizer.apply_chat_template(
            messages,
            tools= self.tools,
            tokenize=False,
            add_generation_prompt=True
        )

        # Generate response
        inputs = self.tokenizer(text, return_tensors="pt", return_attention_mask=True).to(self.model.device)
        generated_ids = self.model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_new_tokens=384,
            #pad_token_id=self.model.config.eos_token_id
        )

        # Decode response
        response = self.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
        # Decode response
        
        #outputs = pipeline(
        #    messages,
        #    max_new_tokens=256,
        #)
        return response
    
    def generate_response(self, messages):
        # Prepare input
        
        text = self.tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )

        # Generate response
        inputs = self.tokenizer(text, return_tensors="pt", return_attention_mask=True).to(self.model.device)
        generated_ids = self.model.generate(
            input_ids=inputs["input_ids"],
            attention_mask=inputs["attention_mask"],
            max_new_tokens=384,
            #pad_token_id=self.model.config.eos_token_id
        )

        # Decode response
        response = self.tokenizer.decode(generated_ids[0], skip_special_tokens=True)
        return response


In [3]:
number_setter = None
guesser = None
react_agent = None
cleaner_agent = None
llm = None
rephrase_agent = None

In [4]:
def list_files(directory:str) -> str:
    """
        List files in the specified directory.

        Args:
            directory (str): The path to the directory to list files from.

        Returns:
            str: A comma-separated string of files in the directory.
    """
    try:
        files = os.listdir(directory)
        return ', '.join(files)
    except Exception as e:
        return f"Error listing files: {str(e)}"

In [5]:
def get_status()->str:
    """
    Get the current status
    
    Args:
        {}
        
    Returns:
        The current status as a string.
    """
    
    return "my status is great"

In [6]:
import json
import re
import os

def list_files(directory):
    """
    Lists files in the specified directory.
    
    Args:
        directory (str): Path to the directory
    
    Returns:
        list: List of files in the directory
    """
    try:
        print('checking directory',directory)
        return ', '.join(os.listdir(directory))
    except Exception as e:
        return f"Error listing files: {e}"
def clean_json(json_str):
    """
    Clean the JSON string by removing trailing characters that might break parsing.
    
    Args:
        json_str (str): The potentially malformed JSON string
    
    Returns:
        str: A cleaned JSON string that can be parsed by json.loads()
    """
    # Remove the 'function_call:' prefix and any surrounding text
    json_str = re.sub(r'^.*?function_call:\s*', '', json_str, flags=re.DOTALL)
    
    # Remove escaped quotes and unescape the string
    json_str = json_str.replace('\\"', '"').strip()
    
    # Remove any trailing characters after the JSON object
    json_str = re.sub(r'}.*$', '}', json_str, flags=re.DOTALL)
    return json_str

def execute_function_call(llm_output):
    """
    Extracts and executes the function call from LLM output.
    
    Args:
        llm_output (str): The full LLM output string
    
    Returns:
        Result of the function call or None if no valid function call
    """
    
    # Use regex to find the JSON between function_call: and the next closing brace
    llm_output = llm_output.replace('parameters','arguments')
    match = re.search(r'\s*({.*})', llm_output, re.DOTALL)
    match = re.search(r'\s*({(?:\\}|[^}])*?}?})(?:,|\]|\'|\")?', llm_output, re.DOTALL)
    print('match',match, llm_output)
    if match:
        try:
            # Extract the potential JSON string
            potential_json = match.group(1)
            # Clean the JSON string
            cleaned_json = clean_json(potential_json)
            # Parse the cleaned JSON
            function_call = json.loads(potential_json)
           
            
            # Get the function name and arguments
            func_name = function_call.get('name')
            args = function_call.get('arguments', {})

            
            # Dynamically call the function
            if func_name == 'list_files':
                return 'output:'+ list_files(args.get('directory'))
            if func_name == 'get_status':
                return 'output:'+get_status()
            else:
                return f"Unknown function: {func_name}"
        
        except json.JSONDecodeError as e:
            return f"JSON Parsing Error: {e}. Original string: {potential_json}"
        except Exception as e:
            return f"Error processing function call: {e}"
    
    return None

# Example usage
sample_output = '{"name": "list_files", "parameters": {"directory": "/"}}}dlk'
result = execute_function_call(sample_output)
print(result)

match <re.Match object; span=(0, 55), match='{"name": "list_files", "arguments": {"directory":> {"name": "list_files", "arguments": {"directory": "/"}}}dlk
checking directory /
output:bin, boot, dev, etc, home, lib, lib32, lib64, libx32, media, mnt, opt, proc, root, run, sbin, srv, sys, tmp, usr, var, .dockerenv, workspace, .singularity.d


In [7]:
def react():
    global react_agent, cleaner_agent, rephrase_agent,  prompt, system_message, tools
    READER_MODEL_NAME1 = "Qwen/Qwen2.5-Coder-7B-Instruct"
    READER_MODEL_NAME2 = "tiiuae/falcon-7b-instruct"
    READER_MODEL_NAME3 = 'teknium/OpenHermes-2.5-Mistral-7B'
    READER_MODEL_NAME4 = 'meta-llama/Llama-3.2-3B-Instruct'
    READER_MODEL_NAME5 = "mistralai/Mistral-7B-Instruct-v0.3"
    READER_MODEL_NAME6 = "meta-llama/Llama-3.1-8B"
    READER_MODEL_NAME7 = "meta-llama/Llama-3.1-8B-Instruct"
    READER_MODEL_NAME8 = "meta-llama/Meta-Llama-3.1-8b-Instruct"
    READER_MODEL_NAME9 = "meta-llama/Llama-3.2-1B-Instruct"
    
    if react_agent is None:
        react_agent = Agent(READER_MODEL_NAME8,"react")
        cleaner_agent = Agent(READER_MODEL_NAME9,'cleaner')
        #rephrase_agent = Agent(READER_MODEL_NAME6,'rephrase')
        
    react_agent.system_message = system_message
    react_agent.tools = tools
    #thedir = react_agent.list_files_in_directory("/")
    
    question = prompt
    agent_response = react_agent.llm_generate_response(question)
    print('####################################')
    agent_response = list(agent_response.split(['assistant'][-1]))[-1]
    print(agent_response)
    print('####################################')
    
    cleaner_prompt=[{"role":"system","content":"You are smart text understanding expert, Extract the results from the given prompt\
     Ignore the texts that has not meaning and just extract the results"},
                   {"role":"user","content":agent_response}]
    filter_prompt = f"""
You are a precise extraction of assistant: or Assistant: replies.\
Your ONLY task is to find and return the result from the given text. 

Rules:
- Look for the single, precise explanation output
- if it is numerical  reply only with the numerical
- Ignore all surrounding text or context
- If no clear numerical result is found, respond with the found explanation
- If you find a list of dictionnaries or a list of json data after the assistant/Assistant words, \
then say the word 'Execute Functions:' and list them
- Extract all the explanation if it shows that there is no results or otherwise, extract ONLY the numerical value


"""
    cleaner_prompt = [{"role":"system","content":filter_prompt}, {"role":"user","content":agent_response}]
    #cleaner_response = cleaner_agent.generate_response(cleaner_prompt)
    #cleaned_response = cleaner_agent.clear_response(cleaner_prompt, cleaner_response)
    try:
        #print(';;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;')
        #print('agent_resposne:',type(agent_response),str(agent_response.split(['Assistant:'][-1])))
        #print(';;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;')
        #print('cleaner_response:',cleaner_response)
        print(';;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;')
        #print('cleaned_response:',cleaned_response)
        print(';;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;')
        function_call = execute_function_call(agent_response)
        if 'output:' in function_call:
            #prompt = 'the function is called and the output is:' + function_call + 'so please \
            #rephrase youre reply'
            rephrase_filter = " you are expert in english language you rephrase any text you professionally.\
                    be very brief and reply by rephrasing the given text and starty"
            
            rephrase_text = "rephrase in a bief way and do not add any infomration from your side \
            : After cheking the outside environment, the  reply is:"+function_call
            cleaner_prompt = [{"role":"system","content":rephrase_filter}, {"role":"user","content":rephrase_text}]
            print('hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh')
            rephrase_response = cleaner_agent.generate_response(cleaner_prompt)
            cleaned_response = cleaner_agent.clear_response(cleaner_prompt, rephrase_response)
            print(cleaned_response)
            
            
    except:
        print('all is not running')
      
           

In [8]:
if __name__ == "__main__":
    tools=[]
    tools = [get_status, list_files]
    system_message = "you are a bot .\
    start youre reply with Assistant:. if the question needs you to run a function, then only say the following:\
    function_call: and add the function and arguments in a json format and end your reply. \
    remember not to state any other function that are not needed.\
    and remember to not to run the function. \
    if there is no function is needed to call, then be brief in your reply and only reply directly\
  "
    prompt = "list the files in the / "
    #prompt = "get status"
    #prompt = "what files found in the /bin ?"
    react()

Unused kwargs: ['torch_dtype', 'device_map', 'bnb_4bit_quantw_type', 'use_nested_quant']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Trying to load the mode: meta-llama_Meta-Llama-3.1-8b-Instruct_saved_response from local repo


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Unused kwargs: ['torch_dtype', 'device_map', 'bnb_4bit_quantw_type', 'use_nested_quant']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Trying to load the mode: meta-llama_Llama-3.2-1B-Instruct_saved_response from local repo


Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


####################################


Assistant: {"name": "list_files", "parameters": {"directory": "/"}}
####################################
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
match <re.Match object; span=(12, 68), match=' {"name": "list_files", "arguments": {"directory"> 

Assistant: {"name": "list_files", "arguments": {"directory": "/"}}
checking directory /
hhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhhh

The command to check the outside environment is: 
```bash
bin, boot, dev, etc, home, lib, lib32, lib64, libx32, media, mnt, opt, proc, root, run, sbin, srv, sys, tmp, usr, var, dockerenv, workspace, singularity.d
```

Explanation:
- bin: Binary files
- boot: Boot loader
- dev: Device files
- etc: System information
- home: Home directory
- lib: Library files
- lib32: Library files (32-bit)
- lib64: Library files (64-bit)
- libx32: Lib