In [1]:
import os
from typing import Tuple, Dict, List

from langchain.agents import initialize_agent
from langchain.tools import StructuredTool
from langchain.agents import AgentType
from langchain.chat_models import ChatOpenAI
from langchain.agents import AgentExecutor
from langchain.memory import ConversationBufferMemory
from langchain.prompts.chat import MessagesPlaceholder

from langchain.chains import LLMChain

from langchain.prompts import ChatPromptTemplate
from langchain.prompts import HumanMessagePromptTemplate, AIMessagePromptTemplate
from langchain.schema.messages import SystemMessage

from langchain.callbacks.manager import CallbackManager
import tools_wrappers
import asyncio
from langchain.callbacks.streaming_stdout_final_only import (
    FinalStreamingStdOutCallbackHandler,
)



In [27]:
class Config:
    """
    Contains the configuration of the LLM.
    """
    model = 'gpt-3.5-turbo-16k-0613'
    OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
    temperature = 0.0
    verbose = True
    print(f'OPENAI_API_KEY={OPENAI_API_KEY}')

OPENAI_API_KEY=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


In [28]:
def setup_memory() -> Tuple[Dict, ConversationBufferMemory]:
    """
    Sets up memory for the open ai functions agent.
    :return a tuple with the agent keyword pairs and the conversation memory.
    """
    system_message = SystemMessage(
        content="""
        You are a helpful AI assistant who aim to train the user how to assemble a LEGO car in XR immersive system.
        Extended Reality (XR) directs to the assortment of Virtual Reality (VR), Augmented Reality (AR), and Mixed Reality (MR).
        Please make sure you complete the objective above with the following rules:
        1/ The user is a trainee who is wearing HoloLen 2 glasses and is able to see XR environments in realtime.
        2/ You are able to call Unity functions in the LEGO AR application.
        3/ You are able to obtain HoloLens 2 Sensor Streaming data via TCP.
        4/ Alert if the user ask you something outside of LEGO assembly task but do not give overconfident answers.
        Your task is to answer the user's questions and assist the user to understand how to complete LEGO assembly task in XR. 
        """
    )
    agent_kwargs = {
        "extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")],
        "system_message": system_message,
    }
    memory = ConversationBufferMemory(memory_key="memory", return_messages=True)

    return agent_kwargs, memory

In [29]:
# In the setup_tools function, access descriptions from LegoAPIWrapper
def setup_tools() -> List[StructuredTool]:

    lego_toolkits = tools_wrappers.LegoAPIWrapper()     # async toolkits

    # Create StructuredTool objects with descriptions from LegoAPIWrapper
    structured_tools = []

    for name, description in lego_toolkits.descriptions.items():
        func = getattr(lego_toolkits, name)
        structured_tools.append(StructuredTool.from_function(func=func, name=name))

    return structured_tools

In [30]:
def setup_agent() -> AgentExecutor:
    """
    Sets up the tools for a function based chain.
    """
    cfg = Config()

    llm = ChatOpenAI(
        temperature=cfg.temperature,
        model=cfg.model,
        verbose=cfg.verbose
    )

    agent_kwargs, memory = setup_memory()

    tools = setup_tools()

    return initialize_agent(
        tools, 
        llm,
        agent=AgentType.OPENAI_FUNCTIONS, 
        verbose=False, 
        agent_kwargs=agent_kwargs,
        memory=memory
    )

In [31]:
import os
from typing import Tuple, Dict, List

from langchain.agents import initialize_agent
from langchain.tools import StructuredTool
from langchain.agents import AgentType
from langchain.chat_models import ChatOpenAI
from langchain.agents import AgentExecutor
from langchain.memory import ConversationBufferMemory
from langchain.prompts.chat import MessagesPlaceholder
from langchain.llms import HuggingFacePipeline

from langchain.chains import LLMChain

from langchain.prompts import ChatPromptTemplate
from langchain.prompts import HumanMessagePromptTemplate, AIMessagePromptTemplate
from langchain.schema.messages import SystemMessage

from langchain.callbacks.manager import CallbackManager
import tools_wrappers
import asyncio
from langchain.callbacks.streaming_stdout_final_only import (
    FinalStreamingStdOutCallbackHandler,
)


class Config:
    """
    Contains the configuration of the LLM.
    """
    import transformers
    import torch
    from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
    from peft import PeftConfig

    project = "vox-finetune"
    base_model_name = "llama-2-7b-chat"
    storage_dir = '/media/PampusData/jpei'
    data_name = 'testdata'
    checkpoint_name = 'checkpoint-500'
    # base_model_id = "meta-llama/Llama-2-7b-hf"
    base_model_id = f'{storage_dir}/transformer_data/{base_model_name}'  # local model dir: /media/PampusData/jpei/transformer_data/llama-2-7b-chat
    run_name = f'{base_model_name}-{data_name}'
    ft_model_id = f'{storage_dir}/{project}/{run_name}'
    ft_ckpt_id = f'{ft_model_id}/{checkpoint_name}'

    config = PeftConfig.from_pretrained(ft_model_id)

    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )

    ft_model = AutoModelForCausalLM.from_pretrained(
        config.base_model_name_or_path,
        return_dict=True,
        quantization_config=bnb_config,
        device_map="auto",
        # device_map={"": 'cuda'},
        trust_remote_code=True
    )

    tokenizer = AutoTokenizer.from_pretrained(ft_model_id, add_bos_token=True, trust_remote_code=True)

    generate_text = transformers.pipeline(
        model=ft_model, tokenizer=tokenizer,
        return_full_text=True,  # langchain expects the full text
        task='text-generation',
        # we pass model parameters here too
        temperature=0.0,  # 'randomness' of outputs, 0.0 is the min and 1.0 the max
        max_new_tokens=512,  # mex number of tokens to generate in the output
        repetition_penalty=1.1  # without this output begins repeating
    )

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [34]:
def setup_memory() -> Tuple[Dict, ConversationBufferMemory]:
    """
    Sets up memory for the open ai functions agent.
    :return a tuple with the agent keyword pairs and the conversation memory.
    """
    # filename = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'task_instructions/LEGO_Assembly_Task_Instructions.txt')
    filename = os.path.join('/home/jpei/ARTA', 'task_instructions/LEGO_Assembly_Task_Instructions.txt')
    LEGO_assembly_task_instructions = open(filename, 'r').read()
    system_message = SystemMessage(
        content=LEGO_assembly_task_instructions
    )
    agent_kwargs = {
        "extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")],
        "system_message": system_message,
    }
    memory = ConversationBufferMemory(memory_key="memory", return_messages=True)

    return agent_kwargs, memory


# In the setup_tools function, access descriptions from LegoAPIWrapper
def setup_tools() -> List[StructuredTool]:

    lego_toolkits = tools_wrappers.LegoAPIWrapper()     # async toolkits

    # Create StructuredTool objects with descriptions from LegoAPIWrapper
    structured_tools = []

    for name, description in lego_toolkits.descriptions.items():
        func = getattr(lego_toolkits, name)
        structured_tools.append(StructuredTool.from_function(func=func, name=name))

    return structured_tools

def setup_agent() -> AgentExecutor:
    """
    Sets up the tools for a function based chain.
    """
    cfg = Config()

    llm = HuggingFacePipeline(pipeline=cfg.generate_text)

    agent_kwargs, memory = setup_memory()

    tools = setup_tools()

    agent = initialize_agent(
        agent="chat-conversational-react-description",
        # tools=tools,
        llm=llm,
        verbose=True,
        early_stopping_method="generate",
        memory=memory,
        agent_kwargs=agent_kwargs
        # agent_kwargs={"output_parser": parser}
    )

    return agent


In [35]:
agent_executor: AgentExecutor = setup_agent()

AttributeError: 'Config' object has no attribute 'generate_text'

In [33]:
from callbacks import AgentCallbackHandler
agent_executor.run('Hi can you start assemble?', callbacks=[AgentCallbackHandler()])

APIError: The server had an error processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if you keep seeing this error. (Please include the request ID 9a9ba8943f2ff8c2a77904db5d713818 in your email.) {
  "error": {
    "message": "The server had an error processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if you keep seeing this error. (Please include the request ID 9a9ba8943f2ff8c2a77904db5d713818 in your email.)",
    "type": "server_error",
    "param": null,
    "code": null
  }
}
 500 {'error': {'message': 'The server had an error processing your request. Sorry about that! You can retry your request, or contact us through our help center at help.openai.com if you keep seeing this error. (Please include the request ID 9a9ba8943f2ff8c2a77904db5d713818 in your email.)', 'type': 'server_error', 'param': None, 'code': None}} {'Date': 'Wed, 22 Nov 2023 00:34:43 GMT', 'Content-Type': 'application/json', 'Content-Length': '366', 'Connection': 'keep-alive', 'access-control-allow-origin': '*', 'openai-organization': 'kaust-vfs7hx', 'openai-processing-ms': '274', 'openai-version': '2020-10-01', 'strict-transport-security': 'max-age=15724800; includeSubDomains', 'x-ratelimit-limit-requests': '5000', 'x-ratelimit-limit-tokens': '180000', 'x-ratelimit-limit-tokens_usage_based': '180000', 'x-ratelimit-remaining-requests': '4999', 'x-ratelimit-remaining-tokens': '179757', 'x-ratelimit-remaining-tokens_usage_based': '179757', 'x-ratelimit-reset-requests': '12ms', 'x-ratelimit-reset-tokens': '80ms', 'x-ratelimit-reset-tokens_usage_based': '80ms', 'x-request-id': '9a9ba8943f2ff8c2a77904db5d713818', 'CF-Cache-Status': 'DYNAMIC', 'Server': 'cloudflare', 'CF-RAY': '829d169a6c020b56-AMS', 'alt-svc': 'h3=":443"; ma=86400'}